{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 20239, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 3.7912962436676025, "learning_rate": 4.9990118088838384e-05, "loss": 10.3461, "step": 4 }, { "epoch": 0.0, "grad_norm": 3.3178839683532715, "learning_rate": 4.9980236177676767e-05, "loss": 9.7253, "step": 8 }, { "epoch": 0.0, "grad_norm": 3.043405055999756, "learning_rate": 4.997035426651515e-05, "loss": 9.476, "step": 12 }, { "epoch": 0.0, "grad_norm": 3.0094900131225586, "learning_rate": 4.9960472355353524e-05, "loss": 9.2119, "step": 16 }, { "epoch": 0.0, "grad_norm": 2.7685112953186035, "learning_rate": 4.9950590444191906e-05, "loss": 9.0744, "step": 20 }, { "epoch": 0.0, "grad_norm": 2.816589832305908, "learning_rate": 4.994070853303029e-05, "loss": 8.8707, "step": 24 }, { "epoch": 0.0, "grad_norm": 3.2649807929992676, "learning_rate": 4.993082662186867e-05, "loss": 8.677, "step": 28 }, { "epoch": 0.0, "grad_norm": 2.543574571609497, "learning_rate": 4.992094471070705e-05, "loss": 8.5568, "step": 32 }, { "epoch": 0.0, "grad_norm": 2.669053316116333, "learning_rate": 4.9911062799545434e-05, "loss": 8.2874, "step": 36 }, { "epoch": 0.0, "grad_norm": 2.4249765872955322, "learning_rate": 4.9901180888383816e-05, "loss": 8.2541, "step": 40 }, { "epoch": 0.0, "grad_norm": 2.1649515628814697, "learning_rate": 4.98912989772222e-05, "loss": 8.0488, "step": 44 }, { "epoch": 0.0, "grad_norm": 2.1681978702545166, "learning_rate": 4.9881417066060574e-05, "loss": 7.9646, "step": 48 }, { "epoch": 0.0, "grad_norm": 2.2630350589752197, "learning_rate": 4.9871535154898956e-05, "loss": 7.7305, "step": 52 }, { "epoch": 0.0, "grad_norm": 2.286691904067993, "learning_rate": 4.986165324373734e-05, "loss": 7.5245, "step": 56 }, { "epoch": 0.0, "grad_norm": 1.9876737594604492, "learning_rate": 4.9851771332575727e-05, "loss": 7.4304, "step": 60 }, { "epoch": 0.0, "grad_norm": 1.7167022228240967, "learning_rate": 4.984188942141411e-05, "loss": 7.4031, "step": 64 }, { "epoch": 0.0, "grad_norm": 1.8193055391311646, "learning_rate": 4.983200751025249e-05, "loss": 7.3022, "step": 68 }, { "epoch": 0.0, "grad_norm": 1.543209195137024, "learning_rate": 4.9822125599090866e-05, "loss": 7.2365, "step": 72 }, { "epoch": 0.0, "grad_norm": 1.75989830493927, "learning_rate": 4.981224368792925e-05, "loss": 7.0086, "step": 76 }, { "epoch": 0.0, "grad_norm": 1.8119884729385376, "learning_rate": 4.980236177676763e-05, "loss": 7.1398, "step": 80 }, { "epoch": 0.0, "grad_norm": 1.4095029830932617, "learning_rate": 4.979247986560601e-05, "loss": 6.9534, "step": 84 }, { "epoch": 0.0, "grad_norm": 1.6824373006820679, "learning_rate": 4.9782597954444394e-05, "loss": 7.0708, "step": 88 }, { "epoch": 0.0, "grad_norm": 1.6116963624954224, "learning_rate": 4.9772716043282776e-05, "loss": 6.957, "step": 92 }, { "epoch": 0.0, "grad_norm": 1.2887672185897827, "learning_rate": 4.976283413212116e-05, "loss": 6.8803, "step": 96 }, { "epoch": 0.0, "grad_norm": 1.2640583515167236, "learning_rate": 4.9752952220959534e-05, "loss": 6.6778, "step": 100 }, { "epoch": 0.01, "grad_norm": 1.3888378143310547, "learning_rate": 4.9743070309797916e-05, "loss": 6.7205, "step": 104 }, { "epoch": 0.01, "grad_norm": 1.4042987823486328, "learning_rate": 4.97331883986363e-05, "loss": 6.7003, "step": 108 }, { "epoch": 0.01, "grad_norm": 1.531121015548706, "learning_rate": 4.972330648747468e-05, "loss": 6.7113, "step": 112 }, { "epoch": 0.01, "grad_norm": 1.5138936042785645, "learning_rate": 4.971342457631306e-05, "loss": 6.6541, "step": 116 }, { "epoch": 0.01, "grad_norm": 1.5610084533691406, "learning_rate": 4.9703542665151444e-05, "loss": 6.6966, "step": 120 }, { "epoch": 0.01, "grad_norm": 1.3988789319992065, "learning_rate": 4.9693660753989826e-05, "loss": 6.5739, "step": 124 }, { "epoch": 0.01, "grad_norm": 1.511693000793457, "learning_rate": 4.968377884282821e-05, "loss": 6.6209, "step": 128 }, { "epoch": 0.01, "grad_norm": 1.4789738655090332, "learning_rate": 4.967389693166658e-05, "loss": 6.497, "step": 132 }, { "epoch": 0.01, "grad_norm": 1.312217354774475, "learning_rate": 4.9664015020504965e-05, "loss": 6.4966, "step": 136 }, { "epoch": 0.01, "grad_norm": 1.2716199159622192, "learning_rate": 4.965413310934335e-05, "loss": 6.6663, "step": 140 }, { "epoch": 0.01, "grad_norm": 1.5896294116973877, "learning_rate": 4.964425119818173e-05, "loss": 6.7358, "step": 144 }, { "epoch": 0.01, "grad_norm": 3.169236660003662, "learning_rate": 4.963436928702011e-05, "loss": 6.7237, "step": 148 }, { "epoch": 0.01, "grad_norm": 2.6182782649993896, "learning_rate": 4.9624487375858494e-05, "loss": 6.4832, "step": 152 }, { "epoch": 0.01, "grad_norm": 1.5328868627548218, "learning_rate": 4.9614605464696876e-05, "loss": 6.5984, "step": 156 }, { "epoch": 0.01, "grad_norm": 1.2593821287155151, "learning_rate": 4.960472355353525e-05, "loss": 6.5301, "step": 160 }, { "epoch": 0.01, "grad_norm": 1.7900025844573975, "learning_rate": 4.959484164237363e-05, "loss": 6.555, "step": 164 }, { "epoch": 0.01, "grad_norm": 2.1635518074035645, "learning_rate": 4.9584959731212015e-05, "loss": 6.5686, "step": 168 }, { "epoch": 0.01, "grad_norm": 1.4018573760986328, "learning_rate": 4.95750778200504e-05, "loss": 6.4757, "step": 172 }, { "epoch": 0.01, "grad_norm": 1.6255518198013306, "learning_rate": 4.9565195908888786e-05, "loss": 6.4482, "step": 176 }, { "epoch": 0.01, "grad_norm": 1.5604472160339355, "learning_rate": 4.955531399772717e-05, "loss": 6.448, "step": 180 }, { "epoch": 0.01, "grad_norm": 1.4451005458831787, "learning_rate": 4.954543208656554e-05, "loss": 6.5368, "step": 184 }, { "epoch": 0.01, "grad_norm": 1.330167531967163, "learning_rate": 4.9535550175403925e-05, "loss": 6.473, "step": 188 }, { "epoch": 0.01, "grad_norm": 1.9696011543273926, "learning_rate": 4.952566826424231e-05, "loss": 6.4951, "step": 192 }, { "epoch": 0.01, "grad_norm": 1.7933244705200195, "learning_rate": 4.951578635308069e-05, "loss": 6.471, "step": 196 }, { "epoch": 0.01, "grad_norm": 1.6044100522994995, "learning_rate": 4.950590444191907e-05, "loss": 6.4612, "step": 200 }, { "epoch": 0.01, "grad_norm": 1.3888564109802246, "learning_rate": 4.9496022530757454e-05, "loss": 6.4879, "step": 204 }, { "epoch": 0.01, "grad_norm": 1.2492824792861938, "learning_rate": 4.9486140619595836e-05, "loss": 6.3364, "step": 208 }, { "epoch": 0.01, "grad_norm": 1.8178552389144897, "learning_rate": 4.947625870843422e-05, "loss": 6.5152, "step": 212 }, { "epoch": 0.01, "grad_norm": 1.363503098487854, "learning_rate": 4.946637679727259e-05, "loss": 6.2865, "step": 216 }, { "epoch": 0.01, "grad_norm": 1.319766640663147, "learning_rate": 4.9456494886110975e-05, "loss": 6.4775, "step": 220 }, { "epoch": 0.01, "grad_norm": 1.3917903900146484, "learning_rate": 4.944661297494936e-05, "loss": 6.4147, "step": 224 }, { "epoch": 0.01, "grad_norm": 2.465641736984253, "learning_rate": 4.943673106378774e-05, "loss": 6.2991, "step": 228 }, { "epoch": 0.01, "grad_norm": 1.6081475019454956, "learning_rate": 4.942684915262612e-05, "loss": 6.442, "step": 232 }, { "epoch": 0.01, "grad_norm": 1.3391867876052856, "learning_rate": 4.94169672414645e-05, "loss": 6.3932, "step": 236 }, { "epoch": 0.01, "grad_norm": 1.4221361875534058, "learning_rate": 4.9407085330302885e-05, "loss": 6.5403, "step": 240 }, { "epoch": 0.01, "grad_norm": 1.60570228099823, "learning_rate": 4.939720341914126e-05, "loss": 6.397, "step": 244 }, { "epoch": 0.01, "grad_norm": 1.3305834531784058, "learning_rate": 4.938732150797964e-05, "loss": 6.4258, "step": 248 }, { "epoch": 0.01, "grad_norm": 1.617050290107727, "learning_rate": 4.9377439596818025e-05, "loss": 6.3044, "step": 252 }, { "epoch": 0.01, "grad_norm": 1.6071454286575317, "learning_rate": 4.936755768565641e-05, "loss": 6.3836, "step": 256 }, { "epoch": 0.01, "grad_norm": 2.0243899822235107, "learning_rate": 4.935767577449479e-05, "loss": 6.4785, "step": 260 }, { "epoch": 0.01, "grad_norm": 1.4023394584655762, "learning_rate": 4.934779386333317e-05, "loss": 6.4129, "step": 264 }, { "epoch": 0.01, "grad_norm": 1.6561346054077148, "learning_rate": 4.933791195217155e-05, "loss": 6.227, "step": 268 }, { "epoch": 0.01, "grad_norm": 1.4024381637573242, "learning_rate": 4.932803004100993e-05, "loss": 6.3957, "step": 272 }, { "epoch": 0.01, "grad_norm": 1.576090693473816, "learning_rate": 4.931814812984831e-05, "loss": 6.359, "step": 276 }, { "epoch": 0.01, "grad_norm": 1.2421482801437378, "learning_rate": 4.930826621868669e-05, "loss": 6.3735, "step": 280 }, { "epoch": 0.01, "grad_norm": 1.453856110572815, "learning_rate": 4.9298384307525074e-05, "loss": 6.4156, "step": 284 }, { "epoch": 0.01, "grad_norm": 1.426669955253601, "learning_rate": 4.928850239636346e-05, "loss": 6.3278, "step": 288 }, { "epoch": 0.01, "grad_norm": 1.5175261497497559, "learning_rate": 4.9278620485201845e-05, "loss": 6.3645, "step": 292 }, { "epoch": 0.01, "grad_norm": 1.3980427980422974, "learning_rate": 4.926873857404023e-05, "loss": 6.3977, "step": 296 }, { "epoch": 0.01, "grad_norm": 1.6867117881774902, "learning_rate": 4.92588566628786e-05, "loss": 6.3506, "step": 300 }, { "epoch": 0.02, "grad_norm": 1.5795122385025024, "learning_rate": 4.9248974751716985e-05, "loss": 6.3062, "step": 304 }, { "epoch": 0.02, "grad_norm": 1.5149136781692505, "learning_rate": 4.923909284055537e-05, "loss": 6.302, "step": 308 }, { "epoch": 0.02, "grad_norm": 1.2091716527938843, "learning_rate": 4.922921092939375e-05, "loss": 6.2064, "step": 312 }, { "epoch": 0.02, "grad_norm": 1.553070306777954, "learning_rate": 4.921932901823213e-05, "loss": 6.3236, "step": 316 }, { "epoch": 0.02, "grad_norm": 1.7350945472717285, "learning_rate": 4.920944710707051e-05, "loss": 6.3381, "step": 320 }, { "epoch": 0.02, "grad_norm": 1.2416383028030396, "learning_rate": 4.9199565195908895e-05, "loss": 6.4411, "step": 324 }, { "epoch": 0.02, "grad_norm": 1.4091325998306274, "learning_rate": 4.918968328474727e-05, "loss": 6.4273, "step": 328 }, { "epoch": 0.02, "grad_norm": 1.261334776878357, "learning_rate": 4.917980137358565e-05, "loss": 6.2724, "step": 332 }, { "epoch": 0.02, "grad_norm": 1.6297985315322876, "learning_rate": 4.9169919462424034e-05, "loss": 6.2387, "step": 336 }, { "epoch": 0.02, "grad_norm": 1.458993673324585, "learning_rate": 4.9160037551262417e-05, "loss": 6.3042, "step": 340 }, { "epoch": 0.02, "grad_norm": 1.355808138847351, "learning_rate": 4.91501556401008e-05, "loss": 6.2788, "step": 344 }, { "epoch": 0.02, "grad_norm": 1.2890819311141968, "learning_rate": 4.914027372893918e-05, "loss": 6.3692, "step": 348 }, { "epoch": 0.02, "grad_norm": 1.5858672857284546, "learning_rate": 4.913039181777756e-05, "loss": 6.2324, "step": 352 }, { "epoch": 0.02, "grad_norm": 1.5306330919265747, "learning_rate": 4.912050990661594e-05, "loss": 6.4247, "step": 356 }, { "epoch": 0.02, "grad_norm": 1.386789083480835, "learning_rate": 4.911062799545432e-05, "loss": 6.4219, "step": 360 }, { "epoch": 0.02, "grad_norm": 1.288030743598938, "learning_rate": 4.91007460842927e-05, "loss": 6.2128, "step": 364 }, { "epoch": 0.02, "grad_norm": 1.5249669551849365, "learning_rate": 4.9090864173131084e-05, "loss": 6.2724, "step": 368 }, { "epoch": 0.02, "grad_norm": 1.787862777709961, "learning_rate": 4.9080982261969466e-05, "loss": 6.3139, "step": 372 }, { "epoch": 0.02, "grad_norm": 1.7556533813476562, "learning_rate": 4.907110035080785e-05, "loss": 6.3006, "step": 376 }, { "epoch": 0.02, "grad_norm": 1.5250591039657593, "learning_rate": 4.906121843964623e-05, "loss": 6.3109, "step": 380 }, { "epoch": 0.02, "grad_norm": 1.591663122177124, "learning_rate": 4.905133652848461e-05, "loss": 6.2026, "step": 384 }, { "epoch": 0.02, "grad_norm": 1.3261005878448486, "learning_rate": 4.904145461732299e-05, "loss": 6.2905, "step": 388 }, { "epoch": 0.02, "grad_norm": 1.692723035812378, "learning_rate": 4.903157270616137e-05, "loss": 6.3296, "step": 392 }, { "epoch": 0.02, "grad_norm": 1.2827457189559937, "learning_rate": 4.902169079499975e-05, "loss": 6.229, "step": 396 }, { "epoch": 0.02, "grad_norm": 1.3109639883041382, "learning_rate": 4.9011808883838134e-05, "loss": 6.2625, "step": 400 }, { "epoch": 0.02, "grad_norm": 1.7134902477264404, "learning_rate": 4.900192697267652e-05, "loss": 6.1219, "step": 404 }, { "epoch": 0.02, "grad_norm": 1.3591091632843018, "learning_rate": 4.8992045061514905e-05, "loss": 6.2059, "step": 408 }, { "epoch": 0.02, "grad_norm": 1.4868909120559692, "learning_rate": 4.898216315035328e-05, "loss": 6.2244, "step": 412 }, { "epoch": 0.02, "grad_norm": 1.4216508865356445, "learning_rate": 4.897228123919166e-05, "loss": 6.2348, "step": 416 }, { "epoch": 0.02, "grad_norm": 1.6655827760696411, "learning_rate": 4.8962399328030044e-05, "loss": 6.2708, "step": 420 }, { "epoch": 0.02, "grad_norm": 1.2668567895889282, "learning_rate": 4.8952517416868426e-05, "loss": 6.1847, "step": 424 }, { "epoch": 0.02, "grad_norm": 1.4796233177185059, "learning_rate": 4.894263550570681e-05, "loss": 6.2613, "step": 428 }, { "epoch": 0.02, "grad_norm": 1.6667245626449585, "learning_rate": 4.893275359454519e-05, "loss": 6.3676, "step": 432 }, { "epoch": 0.02, "grad_norm": 1.780787467956543, "learning_rate": 4.892287168338357e-05, "loss": 6.341, "step": 436 }, { "epoch": 0.02, "grad_norm": 1.415461540222168, "learning_rate": 4.891298977222195e-05, "loss": 6.2441, "step": 440 }, { "epoch": 0.02, "grad_norm": 1.4248474836349487, "learning_rate": 4.890310786106033e-05, "loss": 6.282, "step": 444 }, { "epoch": 0.02, "grad_norm": 2.0168416500091553, "learning_rate": 4.889322594989871e-05, "loss": 6.2457, "step": 448 }, { "epoch": 0.02, "grad_norm": 1.983055830001831, "learning_rate": 4.8883344038737094e-05, "loss": 6.1607, "step": 452 }, { "epoch": 0.02, "grad_norm": 1.9059456586837769, "learning_rate": 4.8873462127575476e-05, "loss": 6.1443, "step": 456 }, { "epoch": 0.02, "grad_norm": 1.5710817575454712, "learning_rate": 4.886358021641386e-05, "loss": 6.2631, "step": 460 }, { "epoch": 0.02, "grad_norm": 2.3912734985351562, "learning_rate": 4.885369830525224e-05, "loss": 6.2484, "step": 464 }, { "epoch": 0.02, "grad_norm": 1.5551170110702515, "learning_rate": 4.884381639409062e-05, "loss": 6.3059, "step": 468 }, { "epoch": 0.02, "grad_norm": 1.5288927555084229, "learning_rate": 4.8833934482929e-05, "loss": 6.0518, "step": 472 }, { "epoch": 0.02, "grad_norm": 1.6663211584091187, "learning_rate": 4.882405257176738e-05, "loss": 6.2273, "step": 476 }, { "epoch": 0.02, "grad_norm": 1.5548433065414429, "learning_rate": 4.881417066060576e-05, "loss": 6.1407, "step": 480 }, { "epoch": 0.02, "grad_norm": 1.4325834512710571, "learning_rate": 4.8804288749444144e-05, "loss": 6.1346, "step": 484 }, { "epoch": 0.02, "grad_norm": 1.622158408164978, "learning_rate": 4.8794406838282526e-05, "loss": 6.1832, "step": 488 }, { "epoch": 0.02, "grad_norm": 1.4777097702026367, "learning_rate": 4.878452492712091e-05, "loss": 6.2451, "step": 492 }, { "epoch": 0.02, "grad_norm": 1.1623598337173462, "learning_rate": 4.877464301595929e-05, "loss": 6.1631, "step": 496 }, { "epoch": 0.02, "grad_norm": 1.598676323890686, "learning_rate": 4.8764761104797665e-05, "loss": 6.2348, "step": 500 }, { "epoch": 0.02, "grad_norm": 1.610736608505249, "learning_rate": 4.875487919363605e-05, "loss": 6.3664, "step": 504 }, { "epoch": 0.03, "grad_norm": 1.4379361867904663, "learning_rate": 4.874499728247443e-05, "loss": 6.202, "step": 508 }, { "epoch": 0.03, "grad_norm": 1.3208808898925781, "learning_rate": 4.873511537131281e-05, "loss": 6.1286, "step": 512 }, { "epoch": 0.03, "grad_norm": 1.3087486028671265, "learning_rate": 4.872523346015119e-05, "loss": 6.2657, "step": 516 }, { "epoch": 0.03, "grad_norm": 1.3428252935409546, "learning_rate": 4.871535154898958e-05, "loss": 6.0752, "step": 520 }, { "epoch": 0.03, "grad_norm": 1.5278196334838867, "learning_rate": 4.870546963782796e-05, "loss": 6.267, "step": 524 }, { "epoch": 0.03, "grad_norm": 1.5938080549240112, "learning_rate": 4.869558772666634e-05, "loss": 6.177, "step": 528 }, { "epoch": 0.03, "grad_norm": 1.2772130966186523, "learning_rate": 4.868570581550472e-05, "loss": 6.2219, "step": 532 }, { "epoch": 0.03, "grad_norm": 1.4687914848327637, "learning_rate": 4.8675823904343104e-05, "loss": 6.2316, "step": 536 }, { "epoch": 0.03, "grad_norm": 1.4582056999206543, "learning_rate": 4.8665941993181486e-05, "loss": 6.0915, "step": 540 }, { "epoch": 0.03, "grad_norm": 1.3734614849090576, "learning_rate": 4.865606008201987e-05, "loss": 6.2694, "step": 544 }, { "epoch": 0.03, "grad_norm": 1.426127314567566, "learning_rate": 4.864617817085825e-05, "loss": 6.1274, "step": 548 }, { "epoch": 0.03, "grad_norm": 1.3772287368774414, "learning_rate": 4.863629625969663e-05, "loss": 6.2704, "step": 552 }, { "epoch": 0.03, "grad_norm": 1.2581090927124023, "learning_rate": 4.862641434853501e-05, "loss": 6.1335, "step": 556 }, { "epoch": 0.03, "grad_norm": 1.1419745683670044, "learning_rate": 4.861653243737339e-05, "loss": 6.1259, "step": 560 }, { "epoch": 0.03, "grad_norm": 1.2435179948806763, "learning_rate": 4.860665052621177e-05, "loss": 6.2289, "step": 564 }, { "epoch": 0.03, "grad_norm": 1.5236430168151855, "learning_rate": 4.859676861505015e-05, "loss": 6.0294, "step": 568 }, { "epoch": 0.03, "grad_norm": 2.289353370666504, "learning_rate": 4.8586886703888535e-05, "loss": 6.1533, "step": 572 }, { "epoch": 0.03, "grad_norm": 1.5420417785644531, "learning_rate": 4.857700479272692e-05, "loss": 6.155, "step": 576 }, { "epoch": 0.03, "grad_norm": 1.8614426851272583, "learning_rate": 4.85671228815653e-05, "loss": 6.2381, "step": 580 }, { "epoch": 0.03, "grad_norm": 1.405530571937561, "learning_rate": 4.8557240970403675e-05, "loss": 6.2005, "step": 584 }, { "epoch": 0.03, "grad_norm": 1.4058623313903809, "learning_rate": 4.854735905924206e-05, "loss": 6.1903, "step": 588 }, { "epoch": 0.03, "grad_norm": 1.7855048179626465, "learning_rate": 4.853747714808044e-05, "loss": 6.2939, "step": 592 }, { "epoch": 0.03, "grad_norm": 1.3504676818847656, "learning_rate": 4.852759523691882e-05, "loss": 6.0347, "step": 596 }, { "epoch": 0.03, "grad_norm": 1.3448097705841064, "learning_rate": 4.85177133257572e-05, "loss": 6.1058, "step": 600 }, { "epoch": 0.03, "grad_norm": 1.2908382415771484, "learning_rate": 4.8507831414595585e-05, "loss": 6.1323, "step": 604 }, { "epoch": 0.03, "grad_norm": 1.6946722269058228, "learning_rate": 4.849794950343397e-05, "loss": 6.0755, "step": 608 }, { "epoch": 0.03, "grad_norm": 1.0505646467208862, "learning_rate": 4.848806759227235e-05, "loss": 6.0742, "step": 612 }, { "epoch": 0.03, "grad_norm": 1.269295334815979, "learning_rate": 4.8478185681110724e-05, "loss": 6.2864, "step": 616 }, { "epoch": 0.03, "grad_norm": 1.3444817066192627, "learning_rate": 4.8468303769949107e-05, "loss": 6.1599, "step": 620 }, { "epoch": 0.03, "grad_norm": 1.5646265745162964, "learning_rate": 4.845842185878749e-05, "loss": 6.1745, "step": 624 }, { "epoch": 0.03, "grad_norm": 1.2452738285064697, "learning_rate": 4.844853994762587e-05, "loss": 6.1796, "step": 628 }, { "epoch": 0.03, "grad_norm": 1.9167345762252808, "learning_rate": 4.843865803646426e-05, "loss": 6.1854, "step": 632 }, { "epoch": 0.03, "grad_norm": 1.3464816808700562, "learning_rate": 4.842877612530264e-05, "loss": 6.1394, "step": 636 }, { "epoch": 0.03, "grad_norm": 1.5352030992507935, "learning_rate": 4.841889421414102e-05, "loss": 6.1579, "step": 640 }, { "epoch": 0.03, "grad_norm": 1.2066044807434082, "learning_rate": 4.84090123029794e-05, "loss": 6.1284, "step": 644 }, { "epoch": 0.03, "grad_norm": 1.3687278032302856, "learning_rate": 4.839913039181778e-05, "loss": 6.1768, "step": 648 }, { "epoch": 0.03, "grad_norm": 1.4037103652954102, "learning_rate": 4.838924848065616e-05, "loss": 6.0552, "step": 652 }, { "epoch": 0.03, "grad_norm": 1.8637155294418335, "learning_rate": 4.8379366569494545e-05, "loss": 6.126, "step": 656 }, { "epoch": 0.03, "grad_norm": 1.8843746185302734, "learning_rate": 4.836948465833293e-05, "loss": 6.1839, "step": 660 }, { "epoch": 0.03, "grad_norm": 1.2985315322875977, "learning_rate": 4.835960274717131e-05, "loss": 6.0767, "step": 664 }, { "epoch": 0.03, "grad_norm": 1.5230488777160645, "learning_rate": 4.8349720836009684e-05, "loss": 6.1762, "step": 668 }, { "epoch": 0.03, "grad_norm": 1.8898823261260986, "learning_rate": 4.8339838924848067e-05, "loss": 6.0165, "step": 672 }, { "epoch": 0.03, "grad_norm": 1.7203717231750488, "learning_rate": 4.832995701368645e-05, "loss": 6.1677, "step": 676 }, { "epoch": 0.03, "grad_norm": 1.478527307510376, "learning_rate": 4.832007510252483e-05, "loss": 6.2011, "step": 680 }, { "epoch": 0.03, "grad_norm": 1.2331160306930542, "learning_rate": 4.831019319136321e-05, "loss": 6.1311, "step": 684 }, { "epoch": 0.03, "grad_norm": 1.4134057760238647, "learning_rate": 4.8300311280201595e-05, "loss": 6.157, "step": 688 }, { "epoch": 0.03, "grad_norm": 1.2819682359695435, "learning_rate": 4.829042936903998e-05, "loss": 6.0408, "step": 692 }, { "epoch": 0.03, "grad_norm": 1.646480917930603, "learning_rate": 4.828054745787836e-05, "loss": 6.0527, "step": 696 }, { "epoch": 0.03, "grad_norm": 1.3179553747177124, "learning_rate": 4.8270665546716734e-05, "loss": 6.1771, "step": 700 }, { "epoch": 0.03, "grad_norm": 1.3149442672729492, "learning_rate": 4.8260783635555116e-05, "loss": 6.1106, "step": 704 }, { "epoch": 0.03, "grad_norm": 1.345340371131897, "learning_rate": 4.82509017243935e-05, "loss": 6.0963, "step": 708 }, { "epoch": 0.04, "grad_norm": 2.3398826122283936, "learning_rate": 4.824101981323188e-05, "loss": 6.075, "step": 712 }, { "epoch": 0.04, "grad_norm": 1.1765165328979492, "learning_rate": 4.823113790207026e-05, "loss": 6.1648, "step": 716 }, { "epoch": 0.04, "grad_norm": 1.5758169889450073, "learning_rate": 4.8221255990908645e-05, "loss": 6.206, "step": 720 }, { "epoch": 0.04, "grad_norm": 1.3040266036987305, "learning_rate": 4.8211374079747027e-05, "loss": 6.0651, "step": 724 }, { "epoch": 0.04, "grad_norm": 1.7494781017303467, "learning_rate": 4.82014921685854e-05, "loss": 6.0144, "step": 728 }, { "epoch": 0.04, "grad_norm": 1.5416646003723145, "learning_rate": 4.8191610257423784e-05, "loss": 6.2011, "step": 732 }, { "epoch": 0.04, "grad_norm": 1.3276399374008179, "learning_rate": 4.8181728346262166e-05, "loss": 6.2073, "step": 736 }, { "epoch": 0.04, "grad_norm": 1.3996212482452393, "learning_rate": 4.817184643510055e-05, "loss": 6.0768, "step": 740 }, { "epoch": 0.04, "grad_norm": 1.3483880758285522, "learning_rate": 4.816196452393893e-05, "loss": 5.9309, "step": 744 }, { "epoch": 0.04, "grad_norm": 1.1830189228057861, "learning_rate": 4.815208261277732e-05, "loss": 6.0258, "step": 748 }, { "epoch": 0.04, "grad_norm": 1.3837305307388306, "learning_rate": 4.8142200701615694e-05, "loss": 6.0004, "step": 752 }, { "epoch": 0.04, "grad_norm": 1.1996792554855347, "learning_rate": 4.8132318790454076e-05, "loss": 6.067, "step": 756 }, { "epoch": 0.04, "grad_norm": 1.6578378677368164, "learning_rate": 4.812243687929246e-05, "loss": 6.1112, "step": 760 }, { "epoch": 0.04, "grad_norm": 1.732629418373108, "learning_rate": 4.811255496813084e-05, "loss": 6.0361, "step": 764 }, { "epoch": 0.04, "grad_norm": 1.3690398931503296, "learning_rate": 4.810267305696922e-05, "loss": 6.0105, "step": 768 }, { "epoch": 0.04, "grad_norm": 1.551343560218811, "learning_rate": 4.8092791145807605e-05, "loss": 5.9922, "step": 772 }, { "epoch": 0.04, "grad_norm": 1.222483515739441, "learning_rate": 4.8082909234645987e-05, "loss": 6.1032, "step": 776 }, { "epoch": 0.04, "grad_norm": 1.4626158475875854, "learning_rate": 4.807302732348437e-05, "loss": 6.2455, "step": 780 }, { "epoch": 0.04, "grad_norm": 1.4593418836593628, "learning_rate": 4.8063145412322744e-05, "loss": 6.104, "step": 784 }, { "epoch": 0.04, "grad_norm": 1.4751644134521484, "learning_rate": 4.8053263501161126e-05, "loss": 6.0467, "step": 788 }, { "epoch": 0.04, "grad_norm": 1.25166916847229, "learning_rate": 4.804338158999951e-05, "loss": 6.0525, "step": 792 }, { "epoch": 0.04, "grad_norm": 1.3588769435882568, "learning_rate": 4.803349967883789e-05, "loss": 6.1355, "step": 796 }, { "epoch": 0.04, "grad_norm": 1.2559925317764282, "learning_rate": 4.802361776767627e-05, "loss": 6.3282, "step": 800 }, { "epoch": 0.04, "grad_norm": 1.3895193338394165, "learning_rate": 4.8013735856514654e-05, "loss": 6.0936, "step": 804 }, { "epoch": 0.04, "grad_norm": 1.691657543182373, "learning_rate": 4.8003853945353036e-05, "loss": 6.1079, "step": 808 }, { "epoch": 0.04, "grad_norm": 1.3020676374435425, "learning_rate": 4.799397203419141e-05, "loss": 6.0076, "step": 812 }, { "epoch": 0.04, "grad_norm": 1.306980848312378, "learning_rate": 4.7984090123029794e-05, "loss": 6.1308, "step": 816 }, { "epoch": 0.04, "grad_norm": 1.3331334590911865, "learning_rate": 4.7974208211868176e-05, "loss": 6.0901, "step": 820 }, { "epoch": 0.04, "grad_norm": 1.2504887580871582, "learning_rate": 4.796432630070656e-05, "loss": 6.1079, "step": 824 }, { "epoch": 0.04, "grad_norm": 1.462392807006836, "learning_rate": 4.795444438954494e-05, "loss": 6.0692, "step": 828 }, { "epoch": 0.04, "grad_norm": 1.281559944152832, "learning_rate": 4.794456247838332e-05, "loss": 6.0399, "step": 832 }, { "epoch": 0.04, "grad_norm": 1.2926335334777832, "learning_rate": 4.7934680567221704e-05, "loss": 6.0005, "step": 836 }, { "epoch": 0.04, "grad_norm": 1.5699244737625122, "learning_rate": 4.792479865606008e-05, "loss": 5.9845, "step": 840 }, { "epoch": 0.04, "grad_norm": 1.491740345954895, "learning_rate": 4.791491674489846e-05, "loss": 6.015, "step": 844 }, { "epoch": 0.04, "grad_norm": 1.3208236694335938, "learning_rate": 4.790503483373684e-05, "loss": 6.0501, "step": 848 }, { "epoch": 0.04, "grad_norm": 1.5497496128082275, "learning_rate": 4.7895152922575225e-05, "loss": 6.105, "step": 852 }, { "epoch": 0.04, "grad_norm": 1.3851691484451294, "learning_rate": 4.788527101141361e-05, "loss": 6.1256, "step": 856 }, { "epoch": 0.04, "grad_norm": 1.506717324256897, "learning_rate": 4.787538910025199e-05, "loss": 5.9827, "step": 860 }, { "epoch": 0.04, "grad_norm": 1.2789814472198486, "learning_rate": 4.786550718909038e-05, "loss": 6.0632, "step": 864 }, { "epoch": 0.04, "grad_norm": 1.2237069606781006, "learning_rate": 4.7855625277928754e-05, "loss": 6.0908, "step": 868 }, { "epoch": 0.04, "grad_norm": 1.2778388261795044, "learning_rate": 4.7845743366767136e-05, "loss": 6.0442, "step": 872 }, { "epoch": 0.04, "grad_norm": 1.534972071647644, "learning_rate": 4.783586145560552e-05, "loss": 6.1035, "step": 876 }, { "epoch": 0.04, "grad_norm": 1.4843637943267822, "learning_rate": 4.78259795444439e-05, "loss": 6.0233, "step": 880 }, { "epoch": 0.04, "grad_norm": 1.4751485586166382, "learning_rate": 4.781609763328228e-05, "loss": 6.109, "step": 884 }, { "epoch": 0.04, "grad_norm": 1.399499535560608, "learning_rate": 4.7806215722120664e-05, "loss": 6.0075, "step": 888 }, { "epoch": 0.04, "grad_norm": 1.5269832611083984, "learning_rate": 4.7796333810959046e-05, "loss": 6.1387, "step": 892 }, { "epoch": 0.04, "grad_norm": 1.342552661895752, "learning_rate": 4.778645189979742e-05, "loss": 6.0763, "step": 896 }, { "epoch": 0.04, "grad_norm": 1.2727460861206055, "learning_rate": 4.77765699886358e-05, "loss": 5.9279, "step": 900 }, { "epoch": 0.04, "grad_norm": 1.3465628623962402, "learning_rate": 4.7766688077474185e-05, "loss": 6.0226, "step": 904 }, { "epoch": 0.04, "grad_norm": 1.288557767868042, "learning_rate": 4.775680616631257e-05, "loss": 6.0799, "step": 908 }, { "epoch": 0.05, "grad_norm": 1.375345230102539, "learning_rate": 4.774692425515095e-05, "loss": 6.0868, "step": 912 }, { "epoch": 0.05, "grad_norm": 1.440955638885498, "learning_rate": 4.773704234398933e-05, "loss": 5.9835, "step": 916 }, { "epoch": 0.05, "grad_norm": 1.3870395421981812, "learning_rate": 4.7727160432827714e-05, "loss": 6.0711, "step": 920 }, { "epoch": 0.05, "grad_norm": 1.4793189764022827, "learning_rate": 4.771727852166609e-05, "loss": 6.1001, "step": 924 }, { "epoch": 0.05, "grad_norm": 1.2727391719818115, "learning_rate": 4.770739661050447e-05, "loss": 6.0782, "step": 928 }, { "epoch": 0.05, "grad_norm": 1.3459304571151733, "learning_rate": 4.769751469934285e-05, "loss": 5.9659, "step": 932 }, { "epoch": 0.05, "grad_norm": 1.6304296255111694, "learning_rate": 4.7687632788181235e-05, "loss": 6.0666, "step": 936 }, { "epoch": 0.05, "grad_norm": 1.721566081047058, "learning_rate": 4.767775087701962e-05, "loss": 6.0233, "step": 940 }, { "epoch": 0.05, "grad_norm": 1.486752986907959, "learning_rate": 4.7667868965858e-05, "loss": 6.0544, "step": 944 }, { "epoch": 0.05, "grad_norm": 1.404415488243103, "learning_rate": 4.765798705469638e-05, "loss": 5.8913, "step": 948 }, { "epoch": 0.05, "grad_norm": 1.550802230834961, "learning_rate": 4.764810514353476e-05, "loss": 6.0449, "step": 952 }, { "epoch": 0.05, "grad_norm": 1.5222808122634888, "learning_rate": 4.763822323237314e-05, "loss": 6.1524, "step": 956 }, { "epoch": 0.05, "grad_norm": 1.2582489252090454, "learning_rate": 4.762834132121152e-05, "loss": 6.09, "step": 960 }, { "epoch": 0.05, "grad_norm": 1.5326069593429565, "learning_rate": 4.76184594100499e-05, "loss": 6.0265, "step": 964 }, { "epoch": 0.05, "grad_norm": 1.5986106395721436, "learning_rate": 4.7608577498888285e-05, "loss": 6.1111, "step": 968 }, { "epoch": 0.05, "grad_norm": 1.2689043283462524, "learning_rate": 4.759869558772667e-05, "loss": 6.0827, "step": 972 }, { "epoch": 0.05, "grad_norm": 1.2669328451156616, "learning_rate": 4.758881367656505e-05, "loss": 6.1393, "step": 976 }, { "epoch": 0.05, "grad_norm": 1.2476458549499512, "learning_rate": 4.757893176540343e-05, "loss": 6.0072, "step": 980 }, { "epoch": 0.05, "grad_norm": 1.4797428846359253, "learning_rate": 4.756904985424181e-05, "loss": 6.0025, "step": 984 }, { "epoch": 0.05, "grad_norm": 1.9927531480789185, "learning_rate": 4.7559167943080195e-05, "loss": 6.087, "step": 988 }, { "epoch": 0.05, "grad_norm": 1.6228762865066528, "learning_rate": 4.754928603191858e-05, "loss": 6.0355, "step": 992 }, { "epoch": 0.05, "grad_norm": 1.5881098508834839, "learning_rate": 4.753940412075696e-05, "loss": 6.0462, "step": 996 }, { "epoch": 0.05, "grad_norm": 1.944541335105896, "learning_rate": 4.752952220959534e-05, "loss": 6.1294, "step": 1000 }, { "epoch": 0.05, "grad_norm": 1.6126629114151, "learning_rate": 4.751964029843372e-05, "loss": 6.1586, "step": 1004 }, { "epoch": 0.05, "grad_norm": 1.4057198762893677, "learning_rate": 4.75097583872721e-05, "loss": 6.24, "step": 1008 }, { "epoch": 0.05, "grad_norm": 1.3622597455978394, "learning_rate": 4.749987647611048e-05, "loss": 5.9785, "step": 1012 }, { "epoch": 0.05, "grad_norm": 1.3643813133239746, "learning_rate": 4.748999456494886e-05, "loss": 6.0392, "step": 1016 }, { "epoch": 0.05, "grad_norm": 1.793847918510437, "learning_rate": 4.7480112653787245e-05, "loss": 5.9712, "step": 1020 }, { "epoch": 0.05, "grad_norm": 1.4792604446411133, "learning_rate": 4.747023074262563e-05, "loss": 6.0705, "step": 1024 }, { "epoch": 0.05, "grad_norm": 1.35064697265625, "learning_rate": 4.746034883146401e-05, "loss": 6.0608, "step": 1028 }, { "epoch": 0.05, "grad_norm": 1.3092408180236816, "learning_rate": 4.745046692030239e-05, "loss": 5.885, "step": 1032 }, { "epoch": 0.05, "grad_norm": 1.560230016708374, "learning_rate": 4.744058500914077e-05, "loss": 6.1149, "step": 1036 }, { "epoch": 0.05, "grad_norm": 1.5619248151779175, "learning_rate": 4.743070309797915e-05, "loss": 6.1887, "step": 1040 }, { "epoch": 0.05, "grad_norm": 1.4519466161727905, "learning_rate": 4.742082118681753e-05, "loss": 6.0853, "step": 1044 }, { "epoch": 0.05, "grad_norm": 1.4475151300430298, "learning_rate": 4.741093927565591e-05, "loss": 6.0921, "step": 1048 }, { "epoch": 0.05, "grad_norm": 1.2523047924041748, "learning_rate": 4.7401057364494295e-05, "loss": 6.1132, "step": 1052 }, { "epoch": 0.05, "grad_norm": 1.2426884174346924, "learning_rate": 4.7391175453332677e-05, "loss": 5.9939, "step": 1056 }, { "epoch": 0.05, "grad_norm": 1.4118828773498535, "learning_rate": 4.738129354217106e-05, "loss": 6.082, "step": 1060 }, { "epoch": 0.05, "grad_norm": 1.6150298118591309, "learning_rate": 4.737141163100944e-05, "loss": 6.0342, "step": 1064 }, { "epoch": 0.05, "grad_norm": 1.3286446332931519, "learning_rate": 4.7361529719847816e-05, "loss": 5.9456, "step": 1068 }, { "epoch": 0.05, "grad_norm": 1.4334193468093872, "learning_rate": 4.73516478086862e-05, "loss": 5.9225, "step": 1072 }, { "epoch": 0.05, "grad_norm": 1.2666079998016357, "learning_rate": 4.734176589752458e-05, "loss": 6.0389, "step": 1076 }, { "epoch": 0.05, "grad_norm": 1.2868504524230957, "learning_rate": 4.733188398636296e-05, "loss": 6.0719, "step": 1080 }, { "epoch": 0.05, "grad_norm": 1.7467691898345947, "learning_rate": 4.7322002075201344e-05, "loss": 5.9916, "step": 1084 }, { "epoch": 0.05, "grad_norm": 1.4924054145812988, "learning_rate": 4.7312120164039726e-05, "loss": 5.9197, "step": 1088 }, { "epoch": 0.05, "grad_norm": 1.1824862957000732, "learning_rate": 4.730223825287811e-05, "loss": 5.9122, "step": 1092 }, { "epoch": 0.05, "grad_norm": 1.435604214668274, "learning_rate": 4.729235634171649e-05, "loss": 6.1722, "step": 1096 }, { "epoch": 0.05, "grad_norm": 1.6537317037582397, "learning_rate": 4.728247443055487e-05, "loss": 6.0557, "step": 1100 }, { "epoch": 0.05, "grad_norm": 1.237634539604187, "learning_rate": 4.7272592519393255e-05, "loss": 6.1475, "step": 1104 }, { "epoch": 0.05, "grad_norm": 1.2861469984054565, "learning_rate": 4.7262710608231637e-05, "loss": 6.0261, "step": 1108 }, { "epoch": 0.05, "grad_norm": 1.3722236156463623, "learning_rate": 4.725282869707002e-05, "loss": 5.9994, "step": 1112 }, { "epoch": 0.06, "grad_norm": 1.4114540815353394, "learning_rate": 4.72429467859084e-05, "loss": 5.923, "step": 1116 }, { "epoch": 0.06, "grad_norm": 1.572077751159668, "learning_rate": 4.723306487474678e-05, "loss": 5.9886, "step": 1120 }, { "epoch": 0.06, "grad_norm": 1.971572995185852, "learning_rate": 4.722318296358516e-05, "loss": 6.0632, "step": 1124 }, { "epoch": 0.06, "grad_norm": 2.1920833587646484, "learning_rate": 4.721330105242354e-05, "loss": 6.066, "step": 1128 }, { "epoch": 0.06, "grad_norm": 1.6061211824417114, "learning_rate": 4.720341914126192e-05, "loss": 5.9523, "step": 1132 }, { "epoch": 0.06, "grad_norm": 1.617601990699768, "learning_rate": 4.7193537230100304e-05, "loss": 6.0579, "step": 1136 }, { "epoch": 0.06, "grad_norm": 1.674851655960083, "learning_rate": 4.7183655318938686e-05, "loss": 5.869, "step": 1140 }, { "epoch": 0.06, "grad_norm": 1.6465020179748535, "learning_rate": 4.717377340777707e-05, "loss": 5.9525, "step": 1144 }, { "epoch": 0.06, "grad_norm": 1.4357322454452515, "learning_rate": 4.716389149661545e-05, "loss": 5.8798, "step": 1148 }, { "epoch": 0.06, "grad_norm": 1.319442868232727, "learning_rate": 4.7154009585453826e-05, "loss": 6.0924, "step": 1152 }, { "epoch": 0.06, "grad_norm": 1.7044557332992554, "learning_rate": 4.714412767429221e-05, "loss": 6.0367, "step": 1156 }, { "epoch": 0.06, "grad_norm": 1.6891183853149414, "learning_rate": 4.713424576313059e-05, "loss": 5.9983, "step": 1160 }, { "epoch": 0.06, "grad_norm": 1.340993046760559, "learning_rate": 4.712436385196897e-05, "loss": 6.0353, "step": 1164 }, { "epoch": 0.06, "grad_norm": 1.3144937753677368, "learning_rate": 4.7114481940807354e-05, "loss": 5.9603, "step": 1168 }, { "epoch": 0.06, "grad_norm": 1.9154691696166992, "learning_rate": 4.7104600029645736e-05, "loss": 6.057, "step": 1172 }, { "epoch": 0.06, "grad_norm": 1.2976912260055542, "learning_rate": 4.709471811848412e-05, "loss": 5.992, "step": 1176 }, { "epoch": 0.06, "grad_norm": 1.4527958631515503, "learning_rate": 4.70848362073225e-05, "loss": 5.9253, "step": 1180 }, { "epoch": 0.06, "grad_norm": 1.6319037675857544, "learning_rate": 4.7074954296160875e-05, "loss": 6.1028, "step": 1184 }, { "epoch": 0.06, "grad_norm": 1.7407358884811401, "learning_rate": 4.706507238499926e-05, "loss": 5.9943, "step": 1188 }, { "epoch": 0.06, "grad_norm": 2.127054452896118, "learning_rate": 4.705519047383764e-05, "loss": 6.0707, "step": 1192 }, { "epoch": 0.06, "grad_norm": 1.761857271194458, "learning_rate": 4.704530856267602e-05, "loss": 6.0144, "step": 1196 }, { "epoch": 0.06, "grad_norm": 1.4145355224609375, "learning_rate": 4.7035426651514404e-05, "loss": 6.0829, "step": 1200 }, { "epoch": 0.06, "grad_norm": 1.3968502283096313, "learning_rate": 4.7025544740352786e-05, "loss": 6.0907, "step": 1204 }, { "epoch": 0.06, "grad_norm": 1.406941294670105, "learning_rate": 4.701566282919117e-05, "loss": 5.9521, "step": 1208 }, { "epoch": 0.06, "grad_norm": 1.3803069591522217, "learning_rate": 4.700578091802955e-05, "loss": 5.9801, "step": 1212 }, { "epoch": 0.06, "grad_norm": 1.3069123029708862, "learning_rate": 4.699589900686793e-05, "loss": 5.9288, "step": 1216 }, { "epoch": 0.06, "grad_norm": 1.5246793031692505, "learning_rate": 4.6986017095706314e-05, "loss": 6.0128, "step": 1220 }, { "epoch": 0.06, "grad_norm": 1.5078657865524292, "learning_rate": 4.6976135184544696e-05, "loss": 5.9037, "step": 1224 }, { "epoch": 0.06, "grad_norm": 1.9630528688430786, "learning_rate": 4.696625327338308e-05, "loss": 5.9762, "step": 1228 }, { "epoch": 0.06, "grad_norm": 1.4446896314620972, "learning_rate": 4.695637136222146e-05, "loss": 5.9254, "step": 1232 }, { "epoch": 0.06, "grad_norm": 1.1959322690963745, "learning_rate": 4.6946489451059835e-05, "loss": 5.9263, "step": 1236 }, { "epoch": 0.06, "grad_norm": 1.5779132843017578, "learning_rate": 4.693660753989822e-05, "loss": 5.9132, "step": 1240 }, { "epoch": 0.06, "grad_norm": 1.3477551937103271, "learning_rate": 4.69267256287366e-05, "loss": 6.0072, "step": 1244 }, { "epoch": 0.06, "grad_norm": 1.950527310371399, "learning_rate": 4.691684371757498e-05, "loss": 5.9209, "step": 1248 }, { "epoch": 0.06, "grad_norm": 2.0284175872802734, "learning_rate": 4.6906961806413364e-05, "loss": 6.0581, "step": 1252 }, { "epoch": 0.06, "grad_norm": 1.3213205337524414, "learning_rate": 4.6897079895251746e-05, "loss": 5.9722, "step": 1256 }, { "epoch": 0.06, "grad_norm": 1.2748960256576538, "learning_rate": 4.688719798409013e-05, "loss": 6.0285, "step": 1260 }, { "epoch": 0.06, "grad_norm": 1.4361499547958374, "learning_rate": 4.687731607292851e-05, "loss": 5.9448, "step": 1264 }, { "epoch": 0.06, "grad_norm": 1.5793545246124268, "learning_rate": 4.6867434161766885e-05, "loss": 6.0026, "step": 1268 }, { "epoch": 0.06, "grad_norm": 1.7214800119400024, "learning_rate": 4.685755225060527e-05, "loss": 6.1118, "step": 1272 }, { "epoch": 0.06, "grad_norm": 1.3730213642120361, "learning_rate": 4.684767033944365e-05, "loss": 5.9988, "step": 1276 }, { "epoch": 0.06, "grad_norm": 1.4589751958847046, "learning_rate": 4.683778842828203e-05, "loss": 6.0041, "step": 1280 }, { "epoch": 0.06, "grad_norm": 1.3940002918243408, "learning_rate": 4.682790651712041e-05, "loss": 5.9818, "step": 1284 }, { "epoch": 0.06, "grad_norm": 1.3168179988861084, "learning_rate": 4.6818024605958795e-05, "loss": 5.9819, "step": 1288 }, { "epoch": 0.06, "grad_norm": 1.737804651260376, "learning_rate": 4.680814269479718e-05, "loss": 5.9301, "step": 1292 }, { "epoch": 0.06, "grad_norm": 1.2958004474639893, "learning_rate": 4.679826078363555e-05, "loss": 5.9922, "step": 1296 }, { "epoch": 0.06, "grad_norm": 1.7017489671707153, "learning_rate": 4.6788378872473935e-05, "loss": 6.0218, "step": 1300 }, { "epoch": 0.06, "grad_norm": 1.7935067415237427, "learning_rate": 4.677849696131232e-05, "loss": 5.8878, "step": 1304 }, { "epoch": 0.06, "grad_norm": 2.293447256088257, "learning_rate": 4.67686150501507e-05, "loss": 6.1183, "step": 1308 }, { "epoch": 0.06, "grad_norm": 1.470645785331726, "learning_rate": 4.675873313898908e-05, "loss": 5.8939, "step": 1312 }, { "epoch": 0.07, "grad_norm": 1.3084948062896729, "learning_rate": 4.674885122782746e-05, "loss": 5.9267, "step": 1316 }, { "epoch": 0.07, "grad_norm": 1.2316402196884155, "learning_rate": 4.6738969316665845e-05, "loss": 6.0664, "step": 1320 }, { "epoch": 0.07, "grad_norm": 1.3546851873397827, "learning_rate": 4.672908740550423e-05, "loss": 5.9785, "step": 1324 }, { "epoch": 0.07, "grad_norm": 1.1932753324508667, "learning_rate": 4.671920549434261e-05, "loss": 5.8807, "step": 1328 }, { "epoch": 0.07, "grad_norm": 1.3659350872039795, "learning_rate": 4.670932358318099e-05, "loss": 6.0175, "step": 1332 }, { "epoch": 0.07, "grad_norm": 1.316653847694397, "learning_rate": 4.669944167201937e-05, "loss": 5.9124, "step": 1336 }, { "epoch": 0.07, "grad_norm": 1.4429857730865479, "learning_rate": 4.6689559760857755e-05, "loss": 5.9828, "step": 1340 }, { "epoch": 0.07, "grad_norm": 1.4865456819534302, "learning_rate": 4.667967784969614e-05, "loss": 6.0829, "step": 1344 }, { "epoch": 0.07, "grad_norm": 1.453019618988037, "learning_rate": 4.666979593853452e-05, "loss": 5.9101, "step": 1348 }, { "epoch": 0.07, "grad_norm": 1.2942475080490112, "learning_rate": 4.6659914027372895e-05, "loss": 6.0094, "step": 1352 }, { "epoch": 0.07, "grad_norm": 1.6731159687042236, "learning_rate": 4.665003211621128e-05, "loss": 5.9274, "step": 1356 }, { "epoch": 0.07, "grad_norm": 1.5828598737716675, "learning_rate": 4.664015020504966e-05, "loss": 6.0568, "step": 1360 }, { "epoch": 0.07, "grad_norm": 1.3310965299606323, "learning_rate": 4.663026829388804e-05, "loss": 5.9873, "step": 1364 }, { "epoch": 0.07, "grad_norm": 1.5734776258468628, "learning_rate": 4.662038638272642e-05, "loss": 5.9194, "step": 1368 }, { "epoch": 0.07, "grad_norm": 1.8752501010894775, "learning_rate": 4.6610504471564805e-05, "loss": 5.945, "step": 1372 }, { "epoch": 0.07, "grad_norm": 1.5967419147491455, "learning_rate": 4.660062256040319e-05, "loss": 5.8828, "step": 1376 }, { "epoch": 0.07, "grad_norm": 1.3059850931167603, "learning_rate": 4.659074064924156e-05, "loss": 5.9692, "step": 1380 }, { "epoch": 0.07, "grad_norm": 1.3276833295822144, "learning_rate": 4.6580858738079945e-05, "loss": 5.9998, "step": 1384 }, { "epoch": 0.07, "grad_norm": 1.481294870376587, "learning_rate": 4.6570976826918327e-05, "loss": 6.0054, "step": 1388 }, { "epoch": 0.07, "grad_norm": 1.2492045164108276, "learning_rate": 4.656109491575671e-05, "loss": 5.7969, "step": 1392 }, { "epoch": 0.07, "grad_norm": 1.3145158290863037, "learning_rate": 4.655121300459509e-05, "loss": 6.0625, "step": 1396 }, { "epoch": 0.07, "grad_norm": 1.7083948850631714, "learning_rate": 4.654133109343347e-05, "loss": 6.0563, "step": 1400 }, { "epoch": 0.07, "grad_norm": 2.5010440349578857, "learning_rate": 4.6531449182271855e-05, "loss": 5.9032, "step": 1404 }, { "epoch": 0.07, "grad_norm": 1.3582074642181396, "learning_rate": 4.652156727111023e-05, "loss": 5.9933, "step": 1408 }, { "epoch": 0.07, "grad_norm": 1.4667145013809204, "learning_rate": 4.651168535994861e-05, "loss": 5.8992, "step": 1412 }, { "epoch": 0.07, "grad_norm": 1.5343049764633179, "learning_rate": 4.6501803448786994e-05, "loss": 5.9115, "step": 1416 }, { "epoch": 0.07, "grad_norm": 1.1951204538345337, "learning_rate": 4.6491921537625376e-05, "loss": 5.8802, "step": 1420 }, { "epoch": 0.07, "grad_norm": 1.566027283668518, "learning_rate": 4.648203962646376e-05, "loss": 5.8612, "step": 1424 }, { "epoch": 0.07, "grad_norm": 1.2942713499069214, "learning_rate": 4.647215771530214e-05, "loss": 5.936, "step": 1428 }, { "epoch": 0.07, "grad_norm": 1.5581068992614746, "learning_rate": 4.646227580414052e-05, "loss": 5.8286, "step": 1432 }, { "epoch": 0.07, "grad_norm": 1.3115367889404297, "learning_rate": 4.6452393892978905e-05, "loss": 5.9605, "step": 1436 }, { "epoch": 0.07, "grad_norm": 1.3432234525680542, "learning_rate": 4.6442511981817287e-05, "loss": 5.8929, "step": 1440 }, { "epoch": 0.07, "grad_norm": 1.789021611213684, "learning_rate": 4.643263007065567e-05, "loss": 6.0817, "step": 1444 }, { "epoch": 0.07, "grad_norm": 1.8558330535888672, "learning_rate": 4.642274815949405e-05, "loss": 6.0362, "step": 1448 }, { "epoch": 0.07, "grad_norm": 1.2429139614105225, "learning_rate": 4.641286624833243e-05, "loss": 6.0426, "step": 1452 }, { "epoch": 0.07, "grad_norm": 1.4920710325241089, "learning_rate": 4.6402984337170815e-05, "loss": 5.8848, "step": 1456 }, { "epoch": 0.07, "grad_norm": 1.456693172454834, "learning_rate": 4.63931024260092e-05, "loss": 6.0129, "step": 1460 }, { "epoch": 0.07, "grad_norm": 1.2984966039657593, "learning_rate": 4.638322051484757e-05, "loss": 5.9807, "step": 1464 }, { "epoch": 0.07, "grad_norm": 1.3711044788360596, "learning_rate": 4.6373338603685954e-05, "loss": 5.9158, "step": 1468 }, { "epoch": 0.07, "grad_norm": 1.432022213935852, "learning_rate": 4.6363456692524336e-05, "loss": 6.0205, "step": 1472 }, { "epoch": 0.07, "grad_norm": 1.549072504043579, "learning_rate": 4.635357478136272e-05, "loss": 5.9811, "step": 1476 }, { "epoch": 0.07, "grad_norm": 1.531638741493225, "learning_rate": 4.63436928702011e-05, "loss": 5.9643, "step": 1480 }, { "epoch": 0.07, "grad_norm": 1.9268430471420288, "learning_rate": 4.633381095903948e-05, "loss": 6.022, "step": 1484 }, { "epoch": 0.07, "grad_norm": 1.491971731185913, "learning_rate": 4.6323929047877865e-05, "loss": 5.9582, "step": 1488 }, { "epoch": 0.07, "grad_norm": 1.253057837486267, "learning_rate": 4.631404713671624e-05, "loss": 5.9783, "step": 1492 }, { "epoch": 0.07, "grad_norm": 1.7390155792236328, "learning_rate": 4.630416522555462e-05, "loss": 5.9987, "step": 1496 }, { "epoch": 0.07, "grad_norm": 1.605100154876709, "learning_rate": 4.6294283314393004e-05, "loss": 6.0706, "step": 1500 }, { "epoch": 0.07, "grad_norm": 1.30108642578125, "learning_rate": 4.6284401403231386e-05, "loss": 6.0712, "step": 1504 }, { "epoch": 0.07, "grad_norm": 1.7230815887451172, "learning_rate": 4.627451949206977e-05, "loss": 6.0353, "step": 1508 }, { "epoch": 0.07, "grad_norm": 1.7896109819412231, "learning_rate": 4.626463758090815e-05, "loss": 5.9901, "step": 1512 }, { "epoch": 0.07, "grad_norm": 1.710947871208191, "learning_rate": 4.625475566974653e-05, "loss": 5.9141, "step": 1516 }, { "epoch": 0.08, "grad_norm": 1.5355948209762573, "learning_rate": 4.6244873758584914e-05, "loss": 5.9802, "step": 1520 }, { "epoch": 0.08, "grad_norm": 1.4352582693099976, "learning_rate": 4.623499184742329e-05, "loss": 6.0397, "step": 1524 }, { "epoch": 0.08, "grad_norm": 1.4116641283035278, "learning_rate": 4.622510993626167e-05, "loss": 6.0213, "step": 1528 }, { "epoch": 0.08, "grad_norm": 1.2762186527252197, "learning_rate": 4.6215228025100054e-05, "loss": 5.8323, "step": 1532 }, { "epoch": 0.08, "grad_norm": 1.8467053174972534, "learning_rate": 4.6205346113938436e-05, "loss": 5.8623, "step": 1536 }, { "epoch": 0.08, "grad_norm": 1.5691808462142944, "learning_rate": 4.619546420277682e-05, "loss": 6.0056, "step": 1540 }, { "epoch": 0.08, "grad_norm": 1.9999240636825562, "learning_rate": 4.61855822916152e-05, "loss": 6.1042, "step": 1544 }, { "epoch": 0.08, "grad_norm": 1.9879581928253174, "learning_rate": 4.617570038045358e-05, "loss": 5.8972, "step": 1548 }, { "epoch": 0.08, "grad_norm": 1.6930572986602783, "learning_rate": 4.6165818469291964e-05, "loss": 5.8287, "step": 1552 }, { "epoch": 0.08, "grad_norm": 2.064516544342041, "learning_rate": 4.6155936558130346e-05, "loss": 5.986, "step": 1556 }, { "epoch": 0.08, "grad_norm": 1.5297229290008545, "learning_rate": 4.614605464696873e-05, "loss": 6.0793, "step": 1560 }, { "epoch": 0.08, "grad_norm": 1.3446491956710815, "learning_rate": 4.613617273580711e-05, "loss": 5.971, "step": 1564 }, { "epoch": 0.08, "grad_norm": 1.3736571073532104, "learning_rate": 4.612629082464549e-05, "loss": 6.049, "step": 1568 }, { "epoch": 0.08, "grad_norm": 1.439300298690796, "learning_rate": 4.6116408913483874e-05, "loss": 6.0757, "step": 1572 }, { "epoch": 0.08, "grad_norm": 1.766449213027954, "learning_rate": 4.610652700232225e-05, "loss": 5.8482, "step": 1576 }, { "epoch": 0.08, "grad_norm": 1.3712018728256226, "learning_rate": 4.609664509116063e-05, "loss": 5.9108, "step": 1580 }, { "epoch": 0.08, "grad_norm": 1.6353716850280762, "learning_rate": 4.6086763179999014e-05, "loss": 5.9461, "step": 1584 }, { "epoch": 0.08, "grad_norm": 1.549001932144165, "learning_rate": 4.6076881268837396e-05, "loss": 5.9503, "step": 1588 }, { "epoch": 0.08, "grad_norm": 1.8643873929977417, "learning_rate": 4.606699935767578e-05, "loss": 5.8876, "step": 1592 }, { "epoch": 0.08, "grad_norm": 3.1906590461730957, "learning_rate": 4.605711744651416e-05, "loss": 5.8155, "step": 1596 }, { "epoch": 0.08, "grad_norm": 1.4951763153076172, "learning_rate": 4.604723553535254e-05, "loss": 6.0645, "step": 1600 }, { "epoch": 0.08, "grad_norm": 1.5761932134628296, "learning_rate": 4.6037353624190924e-05, "loss": 5.8113, "step": 1604 }, { "epoch": 0.08, "grad_norm": 1.858544111251831, "learning_rate": 4.60274717130293e-05, "loss": 5.9492, "step": 1608 }, { "epoch": 0.08, "grad_norm": 1.2514946460723877, "learning_rate": 4.601758980186768e-05, "loss": 6.0415, "step": 1612 }, { "epoch": 0.08, "grad_norm": 1.4306968450546265, "learning_rate": 4.600770789070606e-05, "loss": 6.0258, "step": 1616 }, { "epoch": 0.08, "grad_norm": 1.4032137393951416, "learning_rate": 4.5997825979544445e-05, "loss": 5.9323, "step": 1620 }, { "epoch": 0.08, "grad_norm": 1.424999713897705, "learning_rate": 4.598794406838283e-05, "loss": 5.937, "step": 1624 }, { "epoch": 0.08, "grad_norm": 1.3444308042526245, "learning_rate": 4.597806215722121e-05, "loss": 5.8967, "step": 1628 }, { "epoch": 0.08, "grad_norm": 1.4193394184112549, "learning_rate": 4.596818024605959e-05, "loss": 5.9547, "step": 1632 }, { "epoch": 0.08, "grad_norm": 1.2748864889144897, "learning_rate": 4.595829833489797e-05, "loss": 5.9361, "step": 1636 }, { "epoch": 0.08, "grad_norm": 1.469801664352417, "learning_rate": 4.594841642373635e-05, "loss": 5.8623, "step": 1640 }, { "epoch": 0.08, "grad_norm": 1.563642978668213, "learning_rate": 4.593853451257473e-05, "loss": 6.0724, "step": 1644 }, { "epoch": 0.08, "grad_norm": 1.504905104637146, "learning_rate": 4.592865260141311e-05, "loss": 5.9072, "step": 1648 }, { "epoch": 0.08, "grad_norm": 1.543295979499817, "learning_rate": 4.5918770690251495e-05, "loss": 5.9527, "step": 1652 }, { "epoch": 0.08, "grad_norm": 2.244002103805542, "learning_rate": 4.590888877908988e-05, "loss": 6.0182, "step": 1656 }, { "epoch": 0.08, "grad_norm": 2.3683278560638428, "learning_rate": 4.589900686792826e-05, "loss": 6.022, "step": 1660 }, { "epoch": 0.08, "grad_norm": 1.5250694751739502, "learning_rate": 4.588912495676664e-05, "loss": 5.9414, "step": 1664 }, { "epoch": 0.08, "grad_norm": 1.940976619720459, "learning_rate": 4.5879243045605023e-05, "loss": 5.9301, "step": 1668 }, { "epoch": 0.08, "grad_norm": 1.503818154335022, "learning_rate": 4.5869361134443405e-05, "loss": 5.8361, "step": 1672 }, { "epoch": 0.08, "grad_norm": 2.5099518299102783, "learning_rate": 4.585947922328179e-05, "loss": 5.8945, "step": 1676 }, { "epoch": 0.08, "grad_norm": 1.6731679439544678, "learning_rate": 4.584959731212017e-05, "loss": 5.9091, "step": 1680 }, { "epoch": 0.08, "grad_norm": 1.2417348623275757, "learning_rate": 4.583971540095855e-05, "loss": 5.9407, "step": 1684 }, { "epoch": 0.08, "grad_norm": 1.4417905807495117, "learning_rate": 4.5829833489796934e-05, "loss": 6.0244, "step": 1688 }, { "epoch": 0.08, "grad_norm": 1.8846923112869263, "learning_rate": 4.581995157863531e-05, "loss": 5.9297, "step": 1692 }, { "epoch": 0.08, "grad_norm": 1.6629383563995361, "learning_rate": 4.581006966747369e-05, "loss": 5.845, "step": 1696 }, { "epoch": 0.08, "grad_norm": 1.4970371723175049, "learning_rate": 4.580018775631207e-05, "loss": 5.9948, "step": 1700 }, { "epoch": 0.08, "grad_norm": 2.547971248626709, "learning_rate": 4.5790305845150455e-05, "loss": 5.9322, "step": 1704 }, { "epoch": 0.08, "grad_norm": 2.2521891593933105, "learning_rate": 4.578042393398884e-05, "loss": 5.9448, "step": 1708 }, { "epoch": 0.08, "grad_norm": 1.8124247789382935, "learning_rate": 4.577054202282722e-05, "loss": 5.9226, "step": 1712 }, { "epoch": 0.08, "grad_norm": 2.33345890045166, "learning_rate": 4.57606601116656e-05, "loss": 6.0771, "step": 1716 }, { "epoch": 0.08, "grad_norm": 2.122553825378418, "learning_rate": 4.575077820050398e-05, "loss": 5.8754, "step": 1720 }, { "epoch": 0.09, "grad_norm": 1.5283949375152588, "learning_rate": 4.574089628934236e-05, "loss": 5.8644, "step": 1724 }, { "epoch": 0.09, "grad_norm": 1.6564345359802246, "learning_rate": 4.573101437818074e-05, "loss": 5.8691, "step": 1728 }, { "epoch": 0.09, "grad_norm": 1.6714503765106201, "learning_rate": 4.572113246701912e-05, "loss": 5.9227, "step": 1732 }, { "epoch": 0.09, "grad_norm": 1.543869972229004, "learning_rate": 4.5711250555857505e-05, "loss": 5.9588, "step": 1736 }, { "epoch": 0.09, "grad_norm": 1.248793601989746, "learning_rate": 4.570136864469589e-05, "loss": 5.9065, "step": 1740 }, { "epoch": 0.09, "grad_norm": 1.3636749982833862, "learning_rate": 4.569148673353427e-05, "loss": 5.8001, "step": 1744 }, { "epoch": 0.09, "grad_norm": 1.2990843057632446, "learning_rate": 4.568160482237265e-05, "loss": 5.9017, "step": 1748 }, { "epoch": 0.09, "grad_norm": 1.4160529375076294, "learning_rate": 4.5671722911211026e-05, "loss": 5.7955, "step": 1752 }, { "epoch": 0.09, "grad_norm": 1.5701916217803955, "learning_rate": 4.566184100004941e-05, "loss": 5.9576, "step": 1756 }, { "epoch": 0.09, "grad_norm": 1.5187253952026367, "learning_rate": 4.565195908888779e-05, "loss": 5.9054, "step": 1760 }, { "epoch": 0.09, "grad_norm": 1.6530662775039673, "learning_rate": 4.564207717772617e-05, "loss": 5.9385, "step": 1764 }, { "epoch": 0.09, "grad_norm": 1.481063723564148, "learning_rate": 4.5632195266564555e-05, "loss": 5.8584, "step": 1768 }, { "epoch": 0.09, "grad_norm": 1.2179416418075562, "learning_rate": 4.562231335540294e-05, "loss": 5.9652, "step": 1772 }, { "epoch": 0.09, "grad_norm": 1.4156241416931152, "learning_rate": 4.561243144424132e-05, "loss": 5.7772, "step": 1776 }, { "epoch": 0.09, "grad_norm": 1.5597620010375977, "learning_rate": 4.56025495330797e-05, "loss": 5.9441, "step": 1780 }, { "epoch": 0.09, "grad_norm": 1.4729033708572388, "learning_rate": 4.559266762191808e-05, "loss": 5.9495, "step": 1784 }, { "epoch": 0.09, "grad_norm": 1.4252592325210571, "learning_rate": 4.5582785710756465e-05, "loss": 5.8379, "step": 1788 }, { "epoch": 0.09, "grad_norm": 1.3946348428726196, "learning_rate": 4.557290379959485e-05, "loss": 5.9435, "step": 1792 }, { "epoch": 0.09, "grad_norm": 1.366830587387085, "learning_rate": 4.556302188843323e-05, "loss": 5.9755, "step": 1796 }, { "epoch": 0.09, "grad_norm": 1.2371442317962646, "learning_rate": 4.555313997727161e-05, "loss": 5.9288, "step": 1800 }, { "epoch": 0.09, "grad_norm": 1.3472713232040405, "learning_rate": 4.5543258066109986e-05, "loss": 5.9537, "step": 1804 }, { "epoch": 0.09, "grad_norm": 1.9578423500061035, "learning_rate": 4.553337615494837e-05, "loss": 6.0531, "step": 1808 }, { "epoch": 0.09, "grad_norm": 1.4482135772705078, "learning_rate": 4.552349424378675e-05, "loss": 5.8764, "step": 1812 }, { "epoch": 0.09, "grad_norm": 1.4886963367462158, "learning_rate": 4.551361233262513e-05, "loss": 6.0265, "step": 1816 }, { "epoch": 0.09, "grad_norm": 1.89201819896698, "learning_rate": 4.5503730421463515e-05, "loss": 5.8338, "step": 1820 }, { "epoch": 0.09, "grad_norm": 1.207160234451294, "learning_rate": 4.54938485103019e-05, "loss": 5.8131, "step": 1824 }, { "epoch": 0.09, "grad_norm": 1.4633376598358154, "learning_rate": 4.548396659914028e-05, "loss": 5.8665, "step": 1828 }, { "epoch": 0.09, "grad_norm": 1.4453964233398438, "learning_rate": 4.547408468797866e-05, "loss": 5.963, "step": 1832 }, { "epoch": 0.09, "grad_norm": 2.0406370162963867, "learning_rate": 4.5464202776817036e-05, "loss": 5.8779, "step": 1836 }, { "epoch": 0.09, "grad_norm": 1.6090114116668701, "learning_rate": 4.545432086565542e-05, "loss": 5.906, "step": 1840 }, { "epoch": 0.09, "grad_norm": 1.6481086015701294, "learning_rate": 4.54444389544938e-05, "loss": 6.0581, "step": 1844 }, { "epoch": 0.09, "grad_norm": 1.5279346704483032, "learning_rate": 4.543455704333218e-05, "loss": 5.937, "step": 1848 }, { "epoch": 0.09, "grad_norm": 1.580449104309082, "learning_rate": 4.5424675132170564e-05, "loss": 5.8628, "step": 1852 }, { "epoch": 0.09, "grad_norm": 1.5227487087249756, "learning_rate": 4.5414793221008946e-05, "loss": 5.8597, "step": 1856 }, { "epoch": 0.09, "grad_norm": 1.3211569786071777, "learning_rate": 4.540491130984733e-05, "loss": 5.9294, "step": 1860 }, { "epoch": 0.09, "grad_norm": 1.5065453052520752, "learning_rate": 4.5395029398685704e-05, "loss": 5.9311, "step": 1864 }, { "epoch": 0.09, "grad_norm": 1.6894686222076416, "learning_rate": 4.5385147487524086e-05, "loss": 5.9489, "step": 1868 }, { "epoch": 0.09, "grad_norm": 1.4861186742782593, "learning_rate": 4.537526557636247e-05, "loss": 5.9022, "step": 1872 }, { "epoch": 0.09, "grad_norm": 1.7374814748764038, "learning_rate": 4.536538366520085e-05, "loss": 5.9511, "step": 1876 }, { "epoch": 0.09, "grad_norm": 1.7111232280731201, "learning_rate": 4.535550175403923e-05, "loss": 5.9008, "step": 1880 }, { "epoch": 0.09, "grad_norm": 1.887349009513855, "learning_rate": 4.5345619842877614e-05, "loss": 5.7785, "step": 1884 }, { "epoch": 0.09, "grad_norm": 1.397018313407898, "learning_rate": 4.5335737931715996e-05, "loss": 5.8256, "step": 1888 }, { "epoch": 0.09, "grad_norm": 1.312874674797058, "learning_rate": 4.532585602055438e-05, "loss": 5.7768, "step": 1892 }, { "epoch": 0.09, "grad_norm": 1.6667051315307617, "learning_rate": 4.531597410939276e-05, "loss": 5.9521, "step": 1896 }, { "epoch": 0.09, "grad_norm": 1.3993628025054932, "learning_rate": 4.530609219823114e-05, "loss": 5.8285, "step": 1900 }, { "epoch": 0.09, "grad_norm": 1.5168507099151611, "learning_rate": 4.5296210287069524e-05, "loss": 5.9211, "step": 1904 }, { "epoch": 0.09, "grad_norm": 1.9311803579330444, "learning_rate": 4.5286328375907906e-05, "loss": 6.015, "step": 1908 }, { "epoch": 0.09, "grad_norm": 1.33588707447052, "learning_rate": 4.527644646474629e-05, "loss": 5.8528, "step": 1912 }, { "epoch": 0.09, "grad_norm": 1.831453561782837, "learning_rate": 4.526656455358467e-05, "loss": 5.8254, "step": 1916 }, { "epoch": 0.09, "grad_norm": 1.7024390697479248, "learning_rate": 4.5256682642423046e-05, "loss": 5.8458, "step": 1920 }, { "epoch": 0.1, "grad_norm": 1.5297564268112183, "learning_rate": 4.524680073126143e-05, "loss": 5.8749, "step": 1924 }, { "epoch": 0.1, "grad_norm": 1.4288631677627563, "learning_rate": 4.523691882009981e-05, "loss": 5.8288, "step": 1928 }, { "epoch": 0.1, "grad_norm": 1.723061203956604, "learning_rate": 4.522703690893819e-05, "loss": 5.8773, "step": 1932 }, { "epoch": 0.1, "grad_norm": 1.3699047565460205, "learning_rate": 4.5217154997776574e-05, "loss": 5.94, "step": 1936 }, { "epoch": 0.1, "grad_norm": 1.883584976196289, "learning_rate": 4.5207273086614956e-05, "loss": 5.8437, "step": 1940 }, { "epoch": 0.1, "grad_norm": 1.6371957063674927, "learning_rate": 4.519739117545334e-05, "loss": 5.9447, "step": 1944 }, { "epoch": 0.1, "grad_norm": 1.4362295866012573, "learning_rate": 4.5187509264291713e-05, "loss": 6.0218, "step": 1948 }, { "epoch": 0.1, "grad_norm": 1.9110528230667114, "learning_rate": 4.5177627353130095e-05, "loss": 5.9644, "step": 1952 }, { "epoch": 0.1, "grad_norm": 2.587083101272583, "learning_rate": 4.516774544196848e-05, "loss": 5.9193, "step": 1956 }, { "epoch": 0.1, "grad_norm": 1.5505852699279785, "learning_rate": 4.515786353080686e-05, "loss": 5.8284, "step": 1960 }, { "epoch": 0.1, "grad_norm": 1.5493978261947632, "learning_rate": 4.514798161964524e-05, "loss": 6.0078, "step": 1964 }, { "epoch": 0.1, "grad_norm": 1.5519258975982666, "learning_rate": 4.5138099708483624e-05, "loss": 5.8343, "step": 1968 }, { "epoch": 0.1, "grad_norm": 1.4279431104660034, "learning_rate": 4.5128217797322006e-05, "loss": 5.9317, "step": 1972 }, { "epoch": 0.1, "grad_norm": 1.8912272453308105, "learning_rate": 4.511833588616038e-05, "loss": 5.8355, "step": 1976 }, { "epoch": 0.1, "grad_norm": 1.440687656402588, "learning_rate": 4.510845397499876e-05, "loss": 5.9027, "step": 1980 }, { "epoch": 0.1, "grad_norm": 1.8350549936294556, "learning_rate": 4.5098572063837145e-05, "loss": 5.9534, "step": 1984 }, { "epoch": 0.1, "grad_norm": 1.4987351894378662, "learning_rate": 4.508869015267553e-05, "loss": 6.0618, "step": 1988 }, { "epoch": 0.1, "grad_norm": 1.4456908702850342, "learning_rate": 4.507880824151391e-05, "loss": 5.9174, "step": 1992 }, { "epoch": 0.1, "grad_norm": 1.5561717748641968, "learning_rate": 4.506892633035229e-05, "loss": 5.8602, "step": 1996 }, { "epoch": 0.1, "grad_norm": 1.976826548576355, "learning_rate": 4.5059044419190673e-05, "loss": 5.97, "step": 2000 }, { "epoch": 0.1, "grad_norm": 1.561604380607605, "learning_rate": 4.5049162508029055e-05, "loss": 5.9561, "step": 2004 }, { "epoch": 0.1, "grad_norm": 2.241724967956543, "learning_rate": 4.503928059686743e-05, "loss": 5.9314, "step": 2008 }, { "epoch": 0.1, "grad_norm": 2.012200355529785, "learning_rate": 4.502939868570582e-05, "loss": 5.8255, "step": 2012 }, { "epoch": 0.1, "grad_norm": 1.6281402111053467, "learning_rate": 4.50195167745442e-05, "loss": 5.8459, "step": 2016 }, { "epoch": 0.1, "grad_norm": 1.9645634889602661, "learning_rate": 4.5009634863382584e-05, "loss": 5.805, "step": 2020 }, { "epoch": 0.1, "grad_norm": 1.7598261833190918, "learning_rate": 4.4999752952220966e-05, "loss": 5.7636, "step": 2024 }, { "epoch": 0.1, "grad_norm": 1.8579994440078735, "learning_rate": 4.498987104105935e-05, "loss": 5.9409, "step": 2028 }, { "epoch": 0.1, "grad_norm": 1.3588138818740845, "learning_rate": 4.497998912989772e-05, "loss": 5.8235, "step": 2032 }, { "epoch": 0.1, "grad_norm": 1.471695065498352, "learning_rate": 4.4970107218736105e-05, "loss": 5.9183, "step": 2036 }, { "epoch": 0.1, "grad_norm": 1.500862717628479, "learning_rate": 4.496022530757449e-05, "loss": 5.8246, "step": 2040 }, { "epoch": 0.1, "grad_norm": 1.3388968706130981, "learning_rate": 4.495034339641287e-05, "loss": 5.9709, "step": 2044 }, { "epoch": 0.1, "grad_norm": 1.5488966703414917, "learning_rate": 4.494046148525125e-05, "loss": 5.8914, "step": 2048 }, { "epoch": 0.1, "grad_norm": 1.5115787982940674, "learning_rate": 4.4930579574089633e-05, "loss": 5.8664, "step": 2052 }, { "epoch": 0.1, "grad_norm": 1.632888674736023, "learning_rate": 4.4920697662928015e-05, "loss": 5.9276, "step": 2056 }, { "epoch": 0.1, "grad_norm": 1.3317750692367554, "learning_rate": 4.491081575176639e-05, "loss": 5.8649, "step": 2060 }, { "epoch": 0.1, "grad_norm": 1.7422010898590088, "learning_rate": 4.490093384060477e-05, "loss": 5.8982, "step": 2064 }, { "epoch": 0.1, "grad_norm": 2.052384614944458, "learning_rate": 4.4891051929443155e-05, "loss": 5.9476, "step": 2068 }, { "epoch": 0.1, "grad_norm": 2.492372989654541, "learning_rate": 4.488117001828154e-05, "loss": 5.8919, "step": 2072 }, { "epoch": 0.1, "grad_norm": 1.3981868028640747, "learning_rate": 4.487128810711992e-05, "loss": 5.9484, "step": 2076 }, { "epoch": 0.1, "grad_norm": 1.7076584100723267, "learning_rate": 4.48614061959583e-05, "loss": 5.7792, "step": 2080 }, { "epoch": 0.1, "grad_norm": 1.4366710186004639, "learning_rate": 4.485152428479668e-05, "loss": 5.8862, "step": 2084 }, { "epoch": 0.1, "grad_norm": 1.2299433946609497, "learning_rate": 4.4841642373635065e-05, "loss": 5.7836, "step": 2088 }, { "epoch": 0.1, "grad_norm": 1.4261624813079834, "learning_rate": 4.483176046247344e-05, "loss": 5.9149, "step": 2092 }, { "epoch": 0.1, "grad_norm": 1.5158166885375977, "learning_rate": 4.482187855131182e-05, "loss": 5.8517, "step": 2096 }, { "epoch": 0.1, "grad_norm": 1.3522121906280518, "learning_rate": 4.4811996640150205e-05, "loss": 5.8754, "step": 2100 }, { "epoch": 0.1, "grad_norm": 1.343929648399353, "learning_rate": 4.480211472898859e-05, "loss": 5.771, "step": 2104 }, { "epoch": 0.1, "grad_norm": 1.6901476383209229, "learning_rate": 4.479223281782697e-05, "loss": 5.8675, "step": 2108 }, { "epoch": 0.1, "grad_norm": 1.438178539276123, "learning_rate": 4.478235090666535e-05, "loss": 5.9775, "step": 2112 }, { "epoch": 0.1, "grad_norm": 2.2448184490203857, "learning_rate": 4.477246899550373e-05, "loss": 5.861, "step": 2116 }, { "epoch": 0.1, "grad_norm": 1.3768398761749268, "learning_rate": 4.476258708434211e-05, "loss": 5.8436, "step": 2120 }, { "epoch": 0.1, "grad_norm": 1.5526635646820068, "learning_rate": 4.475270517318049e-05, "loss": 5.8113, "step": 2124 }, { "epoch": 0.11, "grad_norm": 2.0470657348632812, "learning_rate": 4.474282326201888e-05, "loss": 5.8941, "step": 2128 }, { "epoch": 0.11, "grad_norm": 1.404012680053711, "learning_rate": 4.473294135085726e-05, "loss": 5.9352, "step": 2132 }, { "epoch": 0.11, "grad_norm": 1.3800268173217773, "learning_rate": 4.472305943969564e-05, "loss": 5.9212, "step": 2136 }, { "epoch": 0.11, "grad_norm": 1.8839443922042847, "learning_rate": 4.4713177528534025e-05, "loss": 5.8908, "step": 2140 }, { "epoch": 0.11, "grad_norm": 1.8185203075408936, "learning_rate": 4.47032956173724e-05, "loss": 5.7962, "step": 2144 }, { "epoch": 0.11, "grad_norm": 1.6154285669326782, "learning_rate": 4.469341370621078e-05, "loss": 5.8918, "step": 2148 }, { "epoch": 0.11, "grad_norm": 2.3525383472442627, "learning_rate": 4.4683531795049165e-05, "loss": 5.8528, "step": 2152 }, { "epoch": 0.11, "grad_norm": 1.7467882633209229, "learning_rate": 4.467364988388755e-05, "loss": 5.8305, "step": 2156 }, { "epoch": 0.11, "grad_norm": 1.7372554540634155, "learning_rate": 4.466376797272593e-05, "loss": 5.7785, "step": 2160 }, { "epoch": 0.11, "grad_norm": 1.7468005418777466, "learning_rate": 4.465388606156431e-05, "loss": 6.0067, "step": 2164 }, { "epoch": 0.11, "grad_norm": 1.874570608139038, "learning_rate": 4.464400415040269e-05, "loss": 5.9846, "step": 2168 }, { "epoch": 0.11, "grad_norm": 1.5136998891830444, "learning_rate": 4.4634122239241075e-05, "loss": 5.8196, "step": 2172 }, { "epoch": 0.11, "grad_norm": 1.5951347351074219, "learning_rate": 4.462424032807945e-05, "loss": 5.8991, "step": 2176 }, { "epoch": 0.11, "grad_norm": 1.9109349250793457, "learning_rate": 4.461435841691783e-05, "loss": 5.8357, "step": 2180 }, { "epoch": 0.11, "grad_norm": 1.5938990116119385, "learning_rate": 4.4604476505756214e-05, "loss": 5.7961, "step": 2184 }, { "epoch": 0.11, "grad_norm": 1.457306981086731, "learning_rate": 4.4594594594594596e-05, "loss": 5.8715, "step": 2188 }, { "epoch": 0.11, "grad_norm": 1.478857398033142, "learning_rate": 4.458471268343298e-05, "loss": 5.9143, "step": 2192 }, { "epoch": 0.11, "grad_norm": 1.413000464439392, "learning_rate": 4.457483077227136e-05, "loss": 5.8109, "step": 2196 }, { "epoch": 0.11, "grad_norm": 1.348897099494934, "learning_rate": 4.456494886110974e-05, "loss": 5.8774, "step": 2200 }, { "epoch": 0.11, "grad_norm": 1.4481480121612549, "learning_rate": 4.455506694994812e-05, "loss": 5.7836, "step": 2204 }, { "epoch": 0.11, "grad_norm": 2.077380895614624, "learning_rate": 4.45451850387865e-05, "loss": 5.8215, "step": 2208 }, { "epoch": 0.11, "grad_norm": 1.5154703855514526, "learning_rate": 4.453530312762488e-05, "loss": 5.8485, "step": 2212 }, { "epoch": 0.11, "grad_norm": 1.3549476861953735, "learning_rate": 4.4525421216463264e-05, "loss": 5.9692, "step": 2216 }, { "epoch": 0.11, "grad_norm": 1.3713443279266357, "learning_rate": 4.4515539305301646e-05, "loss": 5.8609, "step": 2220 }, { "epoch": 0.11, "grad_norm": 1.3348569869995117, "learning_rate": 4.450565739414003e-05, "loss": 5.831, "step": 2224 }, { "epoch": 0.11, "grad_norm": 1.4860191345214844, "learning_rate": 4.449577548297841e-05, "loss": 6.0256, "step": 2228 }, { "epoch": 0.11, "grad_norm": 1.3161065578460693, "learning_rate": 4.448589357181679e-05, "loss": 5.9999, "step": 2232 }, { "epoch": 0.11, "grad_norm": 1.8355900049209595, "learning_rate": 4.447601166065517e-05, "loss": 5.8118, "step": 2236 }, { "epoch": 0.11, "grad_norm": 1.4929054975509644, "learning_rate": 4.4466129749493556e-05, "loss": 5.9227, "step": 2240 }, { "epoch": 0.11, "grad_norm": 1.6608703136444092, "learning_rate": 4.445624783833194e-05, "loss": 5.8468, "step": 2244 }, { "epoch": 0.11, "grad_norm": 1.5917619466781616, "learning_rate": 4.444636592717032e-05, "loss": 5.9577, "step": 2248 }, { "epoch": 0.11, "grad_norm": 1.5820611715316772, "learning_rate": 4.44364840160087e-05, "loss": 5.8065, "step": 2252 }, { "epoch": 0.11, "grad_norm": 1.6974551677703857, "learning_rate": 4.4426602104847085e-05, "loss": 5.963, "step": 2256 }, { "epoch": 0.11, "grad_norm": 1.5884884595870972, "learning_rate": 4.441672019368546e-05, "loss": 5.7997, "step": 2260 }, { "epoch": 0.11, "grad_norm": 1.5706701278686523, "learning_rate": 4.440683828252384e-05, "loss": 5.8355, "step": 2264 }, { "epoch": 0.11, "grad_norm": 1.5051394701004028, "learning_rate": 4.4396956371362224e-05, "loss": 5.9357, "step": 2268 }, { "epoch": 0.11, "grad_norm": 1.6415704488754272, "learning_rate": 4.4387074460200606e-05, "loss": 5.7755, "step": 2272 }, { "epoch": 0.11, "grad_norm": 1.9928330183029175, "learning_rate": 4.437719254903899e-05, "loss": 6.0354, "step": 2276 }, { "epoch": 0.11, "grad_norm": 1.568490982055664, "learning_rate": 4.436731063787737e-05, "loss": 5.9754, "step": 2280 }, { "epoch": 0.11, "grad_norm": 1.4449853897094727, "learning_rate": 4.435742872671575e-05, "loss": 5.939, "step": 2284 }, { "epoch": 0.11, "grad_norm": 2.227518081665039, "learning_rate": 4.434754681555413e-05, "loss": 5.8591, "step": 2288 }, { "epoch": 0.11, "grad_norm": 1.7351478338241577, "learning_rate": 4.433766490439251e-05, "loss": 5.8574, "step": 2292 }, { "epoch": 0.11, "grad_norm": 2.8627262115478516, "learning_rate": 4.432778299323089e-05, "loss": 5.9735, "step": 2296 }, { "epoch": 0.11, "grad_norm": 1.56145179271698, "learning_rate": 4.4317901082069274e-05, "loss": 5.9248, "step": 2300 }, { "epoch": 0.11, "grad_norm": 1.5302497148513794, "learning_rate": 4.4308019170907656e-05, "loss": 5.8347, "step": 2304 }, { "epoch": 0.11, "grad_norm": 1.597585678100586, "learning_rate": 4.429813725974604e-05, "loss": 5.7803, "step": 2308 }, { "epoch": 0.11, "grad_norm": 1.529463768005371, "learning_rate": 4.428825534858442e-05, "loss": 5.8968, "step": 2312 }, { "epoch": 0.11, "grad_norm": 1.4933751821517944, "learning_rate": 4.42783734374228e-05, "loss": 5.9021, "step": 2316 }, { "epoch": 0.11, "grad_norm": 1.6076233386993408, "learning_rate": 4.426849152626118e-05, "loss": 5.8318, "step": 2320 }, { "epoch": 0.11, "grad_norm": 1.7479904890060425, "learning_rate": 4.425860961509956e-05, "loss": 5.8686, "step": 2324 }, { "epoch": 0.12, "grad_norm": 1.5784941911697388, "learning_rate": 4.424872770393794e-05, "loss": 5.8552, "step": 2328 }, { "epoch": 0.12, "grad_norm": 2.12491512298584, "learning_rate": 4.4238845792776323e-05, "loss": 5.9495, "step": 2332 }, { "epoch": 0.12, "grad_norm": 1.6172250509262085, "learning_rate": 4.4228963881614705e-05, "loss": 5.7925, "step": 2336 }, { "epoch": 0.12, "grad_norm": 1.4207360744476318, "learning_rate": 4.421908197045309e-05, "loss": 5.8818, "step": 2340 }, { "epoch": 0.12, "grad_norm": 2.528517246246338, "learning_rate": 4.420920005929147e-05, "loss": 5.848, "step": 2344 }, { "epoch": 0.12, "grad_norm": 1.4071799516677856, "learning_rate": 4.4199318148129845e-05, "loss": 5.7346, "step": 2348 }, { "epoch": 0.12, "grad_norm": 1.543596863746643, "learning_rate": 4.418943623696823e-05, "loss": 5.87, "step": 2352 }, { "epoch": 0.12, "grad_norm": 1.697926640510559, "learning_rate": 4.4179554325806616e-05, "loss": 5.8042, "step": 2356 }, { "epoch": 0.12, "grad_norm": 1.8649811744689941, "learning_rate": 4.4169672414645e-05, "loss": 5.8247, "step": 2360 }, { "epoch": 0.12, "grad_norm": 1.496875286102295, "learning_rate": 4.415979050348338e-05, "loss": 5.9044, "step": 2364 }, { "epoch": 0.12, "grad_norm": 1.469252586364746, "learning_rate": 4.414990859232176e-05, "loss": 5.8298, "step": 2368 }, { "epoch": 0.12, "grad_norm": 1.4524880647659302, "learning_rate": 4.414002668116014e-05, "loss": 5.8173, "step": 2372 }, { "epoch": 0.12, "grad_norm": 1.2573550939559937, "learning_rate": 4.413014476999852e-05, "loss": 5.8825, "step": 2376 }, { "epoch": 0.12, "grad_norm": 1.3610568046569824, "learning_rate": 4.41202628588369e-05, "loss": 5.9205, "step": 2380 }, { "epoch": 0.12, "grad_norm": 1.3914682865142822, "learning_rate": 4.4110380947675283e-05, "loss": 5.8465, "step": 2384 }, { "epoch": 0.12, "grad_norm": 1.628902792930603, "learning_rate": 4.4100499036513665e-05, "loss": 5.9335, "step": 2388 }, { "epoch": 0.12, "grad_norm": 2.1272637844085693, "learning_rate": 4.409061712535205e-05, "loss": 5.8199, "step": 2392 }, { "epoch": 0.12, "grad_norm": 1.497086524963379, "learning_rate": 4.408073521419043e-05, "loss": 5.7442, "step": 2396 }, { "epoch": 0.12, "grad_norm": 1.6062419414520264, "learning_rate": 4.407085330302881e-05, "loss": 5.7353, "step": 2400 }, { "epoch": 0.12, "grad_norm": 1.6324589252471924, "learning_rate": 4.406097139186719e-05, "loss": 5.8851, "step": 2404 }, { "epoch": 0.12, "grad_norm": 1.7234266996383667, "learning_rate": 4.405108948070557e-05, "loss": 5.9936, "step": 2408 }, { "epoch": 0.12, "grad_norm": 1.7293999195098877, "learning_rate": 4.404120756954395e-05, "loss": 5.8189, "step": 2412 }, { "epoch": 0.12, "grad_norm": 1.554434895515442, "learning_rate": 4.403132565838233e-05, "loss": 5.8536, "step": 2416 }, { "epoch": 0.12, "grad_norm": 1.773876428604126, "learning_rate": 4.4021443747220715e-05, "loss": 5.808, "step": 2420 }, { "epoch": 0.12, "grad_norm": 1.341245174407959, "learning_rate": 4.40115618360591e-05, "loss": 5.785, "step": 2424 }, { "epoch": 0.12, "grad_norm": 1.4888144731521606, "learning_rate": 4.400167992489748e-05, "loss": 5.7738, "step": 2428 }, { "epoch": 0.12, "grad_norm": 1.269622564315796, "learning_rate": 4.3991798013735855e-05, "loss": 5.7985, "step": 2432 }, { "epoch": 0.12, "grad_norm": 2.2829160690307617, "learning_rate": 4.398191610257424e-05, "loss": 5.9229, "step": 2436 }, { "epoch": 0.12, "grad_norm": 1.5251401662826538, "learning_rate": 4.397203419141262e-05, "loss": 5.967, "step": 2440 }, { "epoch": 0.12, "grad_norm": 1.8288617134094238, "learning_rate": 4.3962152280251e-05, "loss": 5.8184, "step": 2444 }, { "epoch": 0.12, "grad_norm": 1.5593011379241943, "learning_rate": 4.395227036908938e-05, "loss": 5.9089, "step": 2448 }, { "epoch": 0.12, "grad_norm": 1.4598174095153809, "learning_rate": 4.3942388457927765e-05, "loss": 5.8489, "step": 2452 }, { "epoch": 0.12, "grad_norm": 1.364039421081543, "learning_rate": 4.393250654676615e-05, "loss": 5.9755, "step": 2456 }, { "epoch": 0.12, "grad_norm": 1.418669581413269, "learning_rate": 4.392262463560453e-05, "loss": 5.8468, "step": 2460 }, { "epoch": 0.12, "grad_norm": 1.7515896558761597, "learning_rate": 4.3912742724442904e-05, "loss": 6.0253, "step": 2464 }, { "epoch": 0.12, "grad_norm": 1.6947784423828125, "learning_rate": 4.3902860813281286e-05, "loss": 5.8654, "step": 2468 }, { "epoch": 0.12, "grad_norm": 1.597745656967163, "learning_rate": 4.3892978902119675e-05, "loss": 5.78, "step": 2472 }, { "epoch": 0.12, "grad_norm": 1.9116005897521973, "learning_rate": 4.388309699095806e-05, "loss": 5.871, "step": 2476 }, { "epoch": 0.12, "grad_norm": 1.5638071298599243, "learning_rate": 4.387321507979644e-05, "loss": 5.8569, "step": 2480 }, { "epoch": 0.12, "grad_norm": 2.321582794189453, "learning_rate": 4.386333316863482e-05, "loss": 5.7426, "step": 2484 }, { "epoch": 0.12, "grad_norm": 1.4646013975143433, "learning_rate": 4.38534512574732e-05, "loss": 5.8682, "step": 2488 }, { "epoch": 0.12, "grad_norm": 1.777219533920288, "learning_rate": 4.384356934631158e-05, "loss": 5.778, "step": 2492 }, { "epoch": 0.12, "grad_norm": 1.7113094329833984, "learning_rate": 4.383368743514996e-05, "loss": 5.8171, "step": 2496 }, { "epoch": 0.12, "grad_norm": 1.6728547811508179, "learning_rate": 4.382380552398834e-05, "loss": 5.8537, "step": 2500 }, { "epoch": 0.12, "grad_norm": 1.5846984386444092, "learning_rate": 4.3813923612826725e-05, "loss": 5.9563, "step": 2504 }, { "epoch": 0.12, "grad_norm": 2.277194023132324, "learning_rate": 4.380404170166511e-05, "loss": 5.9298, "step": 2508 }, { "epoch": 0.12, "grad_norm": 2.1459007263183594, "learning_rate": 4.379415979050349e-05, "loss": 5.9069, "step": 2512 }, { "epoch": 0.12, "grad_norm": 1.9131797552108765, "learning_rate": 4.3784277879341864e-05, "loss": 5.8698, "step": 2516 }, { "epoch": 0.12, "grad_norm": 1.5482133626937866, "learning_rate": 4.3774395968180246e-05, "loss": 5.8516, "step": 2520 }, { "epoch": 0.12, "grad_norm": 1.3043674230575562, "learning_rate": 4.376451405701863e-05, "loss": 5.8176, "step": 2524 }, { "epoch": 0.12, "grad_norm": 1.6391806602478027, "learning_rate": 4.375463214585701e-05, "loss": 5.7712, "step": 2528 }, { "epoch": 0.13, "grad_norm": 1.7865711450576782, "learning_rate": 4.374475023469539e-05, "loss": 5.9933, "step": 2532 }, { "epoch": 0.13, "grad_norm": 1.6309571266174316, "learning_rate": 4.3734868323533775e-05, "loss": 5.7993, "step": 2536 }, { "epoch": 0.13, "grad_norm": 1.9978580474853516, "learning_rate": 4.372498641237216e-05, "loss": 5.8303, "step": 2540 }, { "epoch": 0.13, "grad_norm": 1.507794976234436, "learning_rate": 4.371510450121053e-05, "loss": 5.7701, "step": 2544 }, { "epoch": 0.13, "grad_norm": 1.4123347997665405, "learning_rate": 4.3705222590048914e-05, "loss": 5.9114, "step": 2548 }, { "epoch": 0.13, "grad_norm": 1.467590570449829, "learning_rate": 4.3695340678887296e-05, "loss": 5.893, "step": 2552 }, { "epoch": 0.13, "grad_norm": 1.4081734418869019, "learning_rate": 4.368545876772568e-05, "loss": 5.8997, "step": 2556 }, { "epoch": 0.13, "grad_norm": 1.4744434356689453, "learning_rate": 4.367557685656406e-05, "loss": 5.7802, "step": 2560 }, { "epoch": 0.13, "grad_norm": 1.405860185623169, "learning_rate": 4.366569494540244e-05, "loss": 5.9679, "step": 2564 }, { "epoch": 0.13, "grad_norm": 1.8207783699035645, "learning_rate": 4.3655813034240824e-05, "loss": 5.9814, "step": 2568 }, { "epoch": 0.13, "grad_norm": 1.5685409307479858, "learning_rate": 4.3645931123079206e-05, "loss": 5.9399, "step": 2572 }, { "epoch": 0.13, "grad_norm": 1.218668818473816, "learning_rate": 4.363604921191758e-05, "loss": 5.7926, "step": 2576 }, { "epoch": 0.13, "grad_norm": 1.3519160747528076, "learning_rate": 4.3626167300755964e-05, "loss": 5.7725, "step": 2580 }, { "epoch": 0.13, "grad_norm": 2.0880026817321777, "learning_rate": 4.361628538959435e-05, "loss": 5.9561, "step": 2584 }, { "epoch": 0.13, "grad_norm": 1.6406432390213013, "learning_rate": 4.3606403478432735e-05, "loss": 5.8689, "step": 2588 }, { "epoch": 0.13, "grad_norm": 1.4885075092315674, "learning_rate": 4.359652156727112e-05, "loss": 5.7307, "step": 2592 }, { "epoch": 0.13, "grad_norm": 1.5429643392562866, "learning_rate": 4.35866396561095e-05, "loss": 5.8006, "step": 2596 }, { "epoch": 0.13, "grad_norm": 1.7911728620529175, "learning_rate": 4.3576757744947874e-05, "loss": 5.8635, "step": 2600 }, { "epoch": 0.13, "grad_norm": 2.0220367908477783, "learning_rate": 4.3566875833786256e-05, "loss": 5.8923, "step": 2604 }, { "epoch": 0.13, "grad_norm": 1.5731260776519775, "learning_rate": 4.355699392262464e-05, "loss": 5.9229, "step": 2608 }, { "epoch": 0.13, "grad_norm": 2.0656206607818604, "learning_rate": 4.354711201146302e-05, "loss": 5.9055, "step": 2612 }, { "epoch": 0.13, "grad_norm": 1.5131778717041016, "learning_rate": 4.35372301003014e-05, "loss": 5.8296, "step": 2616 }, { "epoch": 0.13, "grad_norm": 1.953755497932434, "learning_rate": 4.3527348189139784e-05, "loss": 5.7472, "step": 2620 }, { "epoch": 0.13, "grad_norm": 1.496201515197754, "learning_rate": 4.3517466277978166e-05, "loss": 5.8703, "step": 2624 }, { "epoch": 0.13, "grad_norm": 1.6766773462295532, "learning_rate": 4.350758436681654e-05, "loss": 5.8507, "step": 2628 }, { "epoch": 0.13, "grad_norm": 1.5544074773788452, "learning_rate": 4.3497702455654924e-05, "loss": 5.7533, "step": 2632 }, { "epoch": 0.13, "grad_norm": 2.169536828994751, "learning_rate": 4.3487820544493306e-05, "loss": 5.9255, "step": 2636 }, { "epoch": 0.13, "grad_norm": 1.78980553150177, "learning_rate": 4.347793863333169e-05, "loss": 5.8978, "step": 2640 }, { "epoch": 0.13, "grad_norm": 1.4349106550216675, "learning_rate": 4.346805672217007e-05, "loss": 5.7736, "step": 2644 }, { "epoch": 0.13, "grad_norm": 1.5959937572479248, "learning_rate": 4.345817481100845e-05, "loss": 5.8393, "step": 2648 }, { "epoch": 0.13, "grad_norm": 1.2932758331298828, "learning_rate": 4.3448292899846834e-05, "loss": 6.0105, "step": 2652 }, { "epoch": 0.13, "grad_norm": 1.8422064781188965, "learning_rate": 4.3438410988685216e-05, "loss": 5.7793, "step": 2656 }, { "epoch": 0.13, "grad_norm": 1.948499083518982, "learning_rate": 4.342852907752359e-05, "loss": 5.9639, "step": 2660 }, { "epoch": 0.13, "grad_norm": 1.4169591665267944, "learning_rate": 4.3418647166361973e-05, "loss": 5.8024, "step": 2664 }, { "epoch": 0.13, "grad_norm": 2.4500057697296143, "learning_rate": 4.3408765255200356e-05, "loss": 5.8555, "step": 2668 }, { "epoch": 0.13, "grad_norm": 1.5837527513504028, "learning_rate": 4.339888334403874e-05, "loss": 5.9194, "step": 2672 }, { "epoch": 0.13, "grad_norm": 1.8204997777938843, "learning_rate": 4.338900143287712e-05, "loss": 5.9046, "step": 2676 }, { "epoch": 0.13, "grad_norm": 1.7072649002075195, "learning_rate": 4.33791195217155e-05, "loss": 5.7805, "step": 2680 }, { "epoch": 0.13, "grad_norm": 2.0868546962738037, "learning_rate": 4.3369237610553884e-05, "loss": 5.8899, "step": 2684 }, { "epoch": 0.13, "grad_norm": 1.489852786064148, "learning_rate": 4.335935569939226e-05, "loss": 5.8553, "step": 2688 }, { "epoch": 0.13, "grad_norm": 1.7851141691207886, "learning_rate": 4.334947378823064e-05, "loss": 5.7927, "step": 2692 }, { "epoch": 0.13, "grad_norm": 1.4655989408493042, "learning_rate": 4.333959187706902e-05, "loss": 5.9079, "step": 2696 }, { "epoch": 0.13, "grad_norm": 1.4320762157440186, "learning_rate": 4.332970996590741e-05, "loss": 5.9267, "step": 2700 }, { "epoch": 0.13, "grad_norm": 1.6071585416793823, "learning_rate": 4.3319828054745794e-05, "loss": 5.7468, "step": 2704 }, { "epoch": 0.13, "grad_norm": 1.3550992012023926, "learning_rate": 4.3309946143584176e-05, "loss": 5.8188, "step": 2708 }, { "epoch": 0.13, "grad_norm": 2.1684658527374268, "learning_rate": 4.330006423242255e-05, "loss": 5.8959, "step": 2712 }, { "epoch": 0.13, "grad_norm": 1.543056607246399, "learning_rate": 4.3290182321260933e-05, "loss": 5.9079, "step": 2716 }, { "epoch": 0.13, "grad_norm": 1.4535815715789795, "learning_rate": 4.3280300410099316e-05, "loss": 5.814, "step": 2720 }, { "epoch": 0.13, "grad_norm": 1.6012686491012573, "learning_rate": 4.32704184989377e-05, "loss": 5.9343, "step": 2724 }, { "epoch": 0.13, "grad_norm": 1.63141667842865, "learning_rate": 4.326053658777608e-05, "loss": 5.744, "step": 2728 }, { "epoch": 0.13, "grad_norm": 1.621268630027771, "learning_rate": 4.325065467661446e-05, "loss": 5.6779, "step": 2732 }, { "epoch": 0.14, "grad_norm": 1.809212565422058, "learning_rate": 4.3240772765452844e-05, "loss": 5.8384, "step": 2736 }, { "epoch": 0.14, "grad_norm": 1.3587805032730103, "learning_rate": 4.3230890854291226e-05, "loss": 5.7724, "step": 2740 }, { "epoch": 0.14, "grad_norm": 1.7285997867584229, "learning_rate": 4.32210089431296e-05, "loss": 5.8804, "step": 2744 }, { "epoch": 0.14, "grad_norm": 1.9753797054290771, "learning_rate": 4.321112703196798e-05, "loss": 5.8618, "step": 2748 }, { "epoch": 0.14, "grad_norm": 1.357673168182373, "learning_rate": 4.3201245120806365e-05, "loss": 5.6777, "step": 2752 }, { "epoch": 0.14, "grad_norm": 1.5860601663589478, "learning_rate": 4.319136320964475e-05, "loss": 5.7696, "step": 2756 }, { "epoch": 0.14, "grad_norm": 1.830790638923645, "learning_rate": 4.318148129848313e-05, "loss": 5.7376, "step": 2760 }, { "epoch": 0.14, "grad_norm": 1.4875571727752686, "learning_rate": 4.317159938732151e-05, "loss": 5.8386, "step": 2764 }, { "epoch": 0.14, "grad_norm": 1.5012223720550537, "learning_rate": 4.3161717476159893e-05, "loss": 5.8406, "step": 2768 }, { "epoch": 0.14, "grad_norm": 1.872456669807434, "learning_rate": 4.315183556499827e-05, "loss": 5.7977, "step": 2772 }, { "epoch": 0.14, "grad_norm": 1.4684159755706787, "learning_rate": 4.314195365383665e-05, "loss": 5.6846, "step": 2776 }, { "epoch": 0.14, "grad_norm": 1.8385950326919556, "learning_rate": 4.313207174267503e-05, "loss": 5.9036, "step": 2780 }, { "epoch": 0.14, "grad_norm": 1.6362018585205078, "learning_rate": 4.3122189831513415e-05, "loss": 5.8381, "step": 2784 }, { "epoch": 0.14, "grad_norm": 1.5058547258377075, "learning_rate": 4.31123079203518e-05, "loss": 5.8647, "step": 2788 }, { "epoch": 0.14, "grad_norm": 2.060148239135742, "learning_rate": 4.310242600919018e-05, "loss": 5.7431, "step": 2792 }, { "epoch": 0.14, "grad_norm": 1.9664667844772339, "learning_rate": 4.309254409802856e-05, "loss": 5.8879, "step": 2796 }, { "epoch": 0.14, "grad_norm": 1.9889500141143799, "learning_rate": 4.308266218686694e-05, "loss": 5.8715, "step": 2800 }, { "epoch": 0.14, "grad_norm": 1.3932100534439087, "learning_rate": 4.307278027570532e-05, "loss": 5.6858, "step": 2804 }, { "epoch": 0.14, "grad_norm": 1.6131174564361572, "learning_rate": 4.30628983645437e-05, "loss": 5.9675, "step": 2808 }, { "epoch": 0.14, "grad_norm": 1.9435076713562012, "learning_rate": 4.305301645338208e-05, "loss": 5.8918, "step": 2812 }, { "epoch": 0.14, "grad_norm": 1.5480365753173828, "learning_rate": 4.304313454222047e-05, "loss": 5.7797, "step": 2816 }, { "epoch": 0.14, "grad_norm": 1.5039936304092407, "learning_rate": 4.3033252631058853e-05, "loss": 5.7075, "step": 2820 }, { "epoch": 0.14, "grad_norm": 1.4041931629180908, "learning_rate": 4.3023370719897236e-05, "loss": 5.6871, "step": 2824 }, { "epoch": 0.14, "grad_norm": 1.9822477102279663, "learning_rate": 4.301348880873561e-05, "loss": 5.8225, "step": 2828 }, { "epoch": 0.14, "grad_norm": 1.6121152639389038, "learning_rate": 4.300360689757399e-05, "loss": 5.7735, "step": 2832 }, { "epoch": 0.14, "grad_norm": 1.6461482048034668, "learning_rate": 4.2993724986412375e-05, "loss": 5.8867, "step": 2836 }, { "epoch": 0.14, "grad_norm": 1.8447121381759644, "learning_rate": 4.298384307525076e-05, "loss": 5.7835, "step": 2840 }, { "epoch": 0.14, "grad_norm": 1.4918212890625, "learning_rate": 4.297396116408914e-05, "loss": 5.6962, "step": 2844 }, { "epoch": 0.14, "grad_norm": 1.7599409818649292, "learning_rate": 4.296407925292752e-05, "loss": 5.8473, "step": 2848 }, { "epoch": 0.14, "grad_norm": 1.9731839895248413, "learning_rate": 4.29541973417659e-05, "loss": 5.904, "step": 2852 }, { "epoch": 0.14, "grad_norm": 1.9410591125488281, "learning_rate": 4.294431543060428e-05, "loss": 5.8863, "step": 2856 }, { "epoch": 0.14, "grad_norm": 1.942104458808899, "learning_rate": 4.293443351944266e-05, "loss": 5.7006, "step": 2860 }, { "epoch": 0.14, "grad_norm": 1.6088027954101562, "learning_rate": 4.292455160828104e-05, "loss": 5.7491, "step": 2864 }, { "epoch": 0.14, "grad_norm": 1.5795434713363647, "learning_rate": 4.2914669697119425e-05, "loss": 5.8635, "step": 2868 }, { "epoch": 0.14, "grad_norm": 1.7501459121704102, "learning_rate": 4.290478778595781e-05, "loss": 5.8032, "step": 2872 }, { "epoch": 0.14, "grad_norm": 1.9903556108474731, "learning_rate": 4.289490587479619e-05, "loss": 5.885, "step": 2876 }, { "epoch": 0.14, "grad_norm": 1.539027452468872, "learning_rate": 4.288502396363457e-05, "loss": 5.6591, "step": 2880 }, { "epoch": 0.14, "grad_norm": 1.6801403760910034, "learning_rate": 4.287514205247295e-05, "loss": 5.8648, "step": 2884 }, { "epoch": 0.14, "grad_norm": 1.8741987943649292, "learning_rate": 4.286526014131133e-05, "loss": 5.9236, "step": 2888 }, { "epoch": 0.14, "grad_norm": 1.261087417602539, "learning_rate": 4.285537823014971e-05, "loss": 5.7465, "step": 2892 }, { "epoch": 0.14, "grad_norm": 1.9104219675064087, "learning_rate": 4.284549631898809e-05, "loss": 5.7858, "step": 2896 }, { "epoch": 0.14, "grad_norm": 1.8742128610610962, "learning_rate": 4.2835614407826474e-05, "loss": 5.8395, "step": 2900 }, { "epoch": 0.14, "grad_norm": 1.9993197917938232, "learning_rate": 4.2825732496664856e-05, "loss": 5.7205, "step": 2904 }, { "epoch": 0.14, "grad_norm": 1.668923258781433, "learning_rate": 4.281585058550324e-05, "loss": 5.8264, "step": 2908 }, { "epoch": 0.14, "grad_norm": 2.509131669998169, "learning_rate": 4.280596867434162e-05, "loss": 5.8287, "step": 2912 }, { "epoch": 0.14, "grad_norm": 1.6229509115219116, "learning_rate": 4.2796086763179996e-05, "loss": 5.8989, "step": 2916 }, { "epoch": 0.14, "grad_norm": 1.9508914947509766, "learning_rate": 4.278620485201838e-05, "loss": 5.7111, "step": 2920 }, { "epoch": 0.14, "grad_norm": 1.3820146322250366, "learning_rate": 4.277632294085676e-05, "loss": 5.8116, "step": 2924 }, { "epoch": 0.14, "grad_norm": 1.623271107673645, "learning_rate": 4.276644102969514e-05, "loss": 5.7792, "step": 2928 }, { "epoch": 0.14, "grad_norm": 1.7664297819137573, "learning_rate": 4.275655911853353e-05, "loss": 5.8097, "step": 2932 }, { "epoch": 0.15, "grad_norm": 1.8661329746246338, "learning_rate": 4.274667720737191e-05, "loss": 5.8191, "step": 2936 }, { "epoch": 0.15, "grad_norm": 1.6165112257003784, "learning_rate": 4.273679529621029e-05, "loss": 5.8802, "step": 2940 }, { "epoch": 0.15, "grad_norm": 1.5878552198410034, "learning_rate": 4.272691338504867e-05, "loss": 5.8965, "step": 2944 }, { "epoch": 0.15, "grad_norm": 3.6278650760650635, "learning_rate": 4.271703147388705e-05, "loss": 5.6925, "step": 2948 }, { "epoch": 0.15, "grad_norm": 1.6811779737472534, "learning_rate": 4.2707149562725434e-05, "loss": 5.7483, "step": 2952 }, { "epoch": 0.15, "grad_norm": 1.4222928285598755, "learning_rate": 4.2697267651563816e-05, "loss": 5.7322, "step": 2956 }, { "epoch": 0.15, "grad_norm": 1.6275368928909302, "learning_rate": 4.26873857404022e-05, "loss": 5.803, "step": 2960 }, { "epoch": 0.15, "grad_norm": 1.4676285982131958, "learning_rate": 4.267750382924058e-05, "loss": 5.8874, "step": 2964 }, { "epoch": 0.15, "grad_norm": 1.3239818811416626, "learning_rate": 4.266762191807896e-05, "loss": 5.8061, "step": 2968 }, { "epoch": 0.15, "grad_norm": 1.5904514789581299, "learning_rate": 4.265774000691734e-05, "loss": 5.8226, "step": 2972 }, { "epoch": 0.15, "grad_norm": 1.6302812099456787, "learning_rate": 4.264785809575572e-05, "loss": 5.8038, "step": 2976 }, { "epoch": 0.15, "grad_norm": 1.495665192604065, "learning_rate": 4.26379761845941e-05, "loss": 5.8282, "step": 2980 }, { "epoch": 0.15, "grad_norm": 1.776206612586975, "learning_rate": 4.2628094273432484e-05, "loss": 5.8956, "step": 2984 }, { "epoch": 0.15, "grad_norm": 2.3062102794647217, "learning_rate": 4.2618212362270866e-05, "loss": 5.7843, "step": 2988 }, { "epoch": 0.15, "grad_norm": 1.649765968322754, "learning_rate": 4.260833045110925e-05, "loss": 5.7521, "step": 2992 }, { "epoch": 0.15, "grad_norm": 1.4618674516677856, "learning_rate": 4.259844853994763e-05, "loss": 5.8289, "step": 2996 }, { "epoch": 0.15, "grad_norm": 1.6106091737747192, "learning_rate": 4.2588566628786006e-05, "loss": 5.8524, "step": 3000 }, { "epoch": 0.15, "grad_norm": 1.6770355701446533, "learning_rate": 4.257868471762439e-05, "loss": 5.8803, "step": 3004 }, { "epoch": 0.15, "grad_norm": 1.5975035429000854, "learning_rate": 4.256880280646277e-05, "loss": 5.8038, "step": 3008 }, { "epoch": 0.15, "grad_norm": 1.5957070589065552, "learning_rate": 4.255892089530115e-05, "loss": 5.7577, "step": 3012 }, { "epoch": 0.15, "grad_norm": 1.676400065422058, "learning_rate": 4.2549038984139534e-05, "loss": 5.7606, "step": 3016 }, { "epoch": 0.15, "grad_norm": 1.4680049419403076, "learning_rate": 4.2539157072977916e-05, "loss": 5.8903, "step": 3020 }, { "epoch": 0.15, "grad_norm": 1.5760340690612793, "learning_rate": 4.25292751618163e-05, "loss": 5.7209, "step": 3024 }, { "epoch": 0.15, "grad_norm": 1.6681965589523315, "learning_rate": 4.251939325065468e-05, "loss": 5.7767, "step": 3028 }, { "epoch": 0.15, "grad_norm": 1.4011784791946411, "learning_rate": 4.2509511339493055e-05, "loss": 5.7752, "step": 3032 }, { "epoch": 0.15, "grad_norm": 1.6614855527877808, "learning_rate": 4.249962942833144e-05, "loss": 5.8396, "step": 3036 }, { "epoch": 0.15, "grad_norm": 1.5414800643920898, "learning_rate": 4.248974751716982e-05, "loss": 5.8238, "step": 3040 }, { "epoch": 0.15, "grad_norm": 1.9518810510635376, "learning_rate": 4.247986560600821e-05, "loss": 5.9185, "step": 3044 }, { "epoch": 0.15, "grad_norm": 1.8648451566696167, "learning_rate": 4.246998369484659e-05, "loss": 5.9562, "step": 3048 }, { "epoch": 0.15, "grad_norm": 2.052186965942383, "learning_rate": 4.246010178368497e-05, "loss": 5.7948, "step": 3052 }, { "epoch": 0.15, "grad_norm": 1.8737437725067139, "learning_rate": 4.245021987252335e-05, "loss": 5.7977, "step": 3056 }, { "epoch": 0.15, "grad_norm": 1.7005298137664795, "learning_rate": 4.244033796136173e-05, "loss": 5.7203, "step": 3060 }, { "epoch": 0.15, "grad_norm": 1.91958487033844, "learning_rate": 4.243045605020011e-05, "loss": 5.798, "step": 3064 }, { "epoch": 0.15, "grad_norm": 1.6794556379318237, "learning_rate": 4.2420574139038494e-05, "loss": 5.8465, "step": 3068 }, { "epoch": 0.15, "grad_norm": 1.8262078762054443, "learning_rate": 4.2410692227876876e-05, "loss": 5.7919, "step": 3072 }, { "epoch": 0.15, "grad_norm": 1.3758339881896973, "learning_rate": 4.240081031671526e-05, "loss": 5.8541, "step": 3076 }, { "epoch": 0.15, "grad_norm": 1.6782643795013428, "learning_rate": 4.239092840555364e-05, "loss": 5.7396, "step": 3080 }, { "epoch": 0.15, "grad_norm": 1.5164580345153809, "learning_rate": 4.2381046494392015e-05, "loss": 5.8555, "step": 3084 }, { "epoch": 0.15, "grad_norm": 1.664754033088684, "learning_rate": 4.23711645832304e-05, "loss": 5.7878, "step": 3088 }, { "epoch": 0.15, "grad_norm": 1.615749716758728, "learning_rate": 4.236128267206878e-05, "loss": 5.8452, "step": 3092 }, { "epoch": 0.15, "grad_norm": 1.699905514717102, "learning_rate": 4.235140076090716e-05, "loss": 5.6857, "step": 3096 }, { "epoch": 0.15, "grad_norm": 1.5537385940551758, "learning_rate": 4.2341518849745543e-05, "loss": 5.7966, "step": 3100 }, { "epoch": 0.15, "grad_norm": 1.6133285760879517, "learning_rate": 4.2331636938583926e-05, "loss": 5.7937, "step": 3104 }, { "epoch": 0.15, "grad_norm": 1.6025242805480957, "learning_rate": 4.232175502742231e-05, "loss": 5.847, "step": 3108 }, { "epoch": 0.15, "grad_norm": 2.157740831375122, "learning_rate": 4.231187311626069e-05, "loss": 5.8554, "step": 3112 }, { "epoch": 0.15, "grad_norm": 1.3504477739334106, "learning_rate": 4.2301991205099065e-05, "loss": 5.864, "step": 3116 }, { "epoch": 0.15, "grad_norm": 1.835391879081726, "learning_rate": 4.229210929393745e-05, "loss": 5.8793, "step": 3120 }, { "epoch": 0.15, "grad_norm": 1.7732737064361572, "learning_rate": 4.228222738277583e-05, "loss": 5.8299, "step": 3124 }, { "epoch": 0.15, "grad_norm": 1.3115538358688354, "learning_rate": 4.227234547161421e-05, "loss": 5.6581, "step": 3128 }, { "epoch": 0.15, "grad_norm": 1.5173206329345703, "learning_rate": 4.226246356045259e-05, "loss": 5.6791, "step": 3132 }, { "epoch": 0.15, "grad_norm": 1.850712537765503, "learning_rate": 4.2252581649290975e-05, "loss": 5.7433, "step": 3136 }, { "epoch": 0.16, "grad_norm": 1.6634035110473633, "learning_rate": 4.224269973812936e-05, "loss": 5.8331, "step": 3140 }, { "epoch": 0.16, "grad_norm": 1.8786728382110596, "learning_rate": 4.223281782696773e-05, "loss": 5.6907, "step": 3144 }, { "epoch": 0.16, "grad_norm": 1.4625319242477417, "learning_rate": 4.2222935915806115e-05, "loss": 5.9035, "step": 3148 }, { "epoch": 0.16, "grad_norm": 1.6950637102127075, "learning_rate": 4.22130540046445e-05, "loss": 5.7991, "step": 3152 }, { "epoch": 0.16, "grad_norm": 1.7324408292770386, "learning_rate": 4.220317209348288e-05, "loss": 5.7367, "step": 3156 }, { "epoch": 0.16, "grad_norm": 2.2330594062805176, "learning_rate": 4.219329018232127e-05, "loss": 5.8253, "step": 3160 }, { "epoch": 0.16, "grad_norm": 1.658172607421875, "learning_rate": 4.218340827115965e-05, "loss": 5.8604, "step": 3164 }, { "epoch": 0.16, "grad_norm": 1.7080429792404175, "learning_rate": 4.2173526359998025e-05, "loss": 5.8495, "step": 3168 }, { "epoch": 0.16, "grad_norm": 1.8790498971939087, "learning_rate": 4.216364444883641e-05, "loss": 5.7018, "step": 3172 }, { "epoch": 0.16, "grad_norm": 2.222698211669922, "learning_rate": 4.215376253767479e-05, "loss": 5.841, "step": 3176 }, { "epoch": 0.16, "grad_norm": 1.7230134010314941, "learning_rate": 4.214388062651317e-05, "loss": 5.805, "step": 3180 }, { "epoch": 0.16, "grad_norm": 2.234403133392334, "learning_rate": 4.213399871535155e-05, "loss": 5.9041, "step": 3184 }, { "epoch": 0.16, "grad_norm": 2.1015374660491943, "learning_rate": 4.2124116804189935e-05, "loss": 5.9052, "step": 3188 }, { "epoch": 0.16, "grad_norm": 1.6418051719665527, "learning_rate": 4.211423489302832e-05, "loss": 5.7498, "step": 3192 }, { "epoch": 0.16, "grad_norm": 1.524634599685669, "learning_rate": 4.210435298186669e-05, "loss": 5.8123, "step": 3196 }, { "epoch": 0.16, "grad_norm": 1.4214069843292236, "learning_rate": 4.2094471070705075e-05, "loss": 5.7962, "step": 3200 }, { "epoch": 0.16, "grad_norm": 2.6622087955474854, "learning_rate": 4.208458915954346e-05, "loss": 5.8462, "step": 3204 }, { "epoch": 0.16, "grad_norm": 1.3304983377456665, "learning_rate": 4.207470724838184e-05, "loss": 5.8222, "step": 3208 }, { "epoch": 0.16, "grad_norm": 1.9604005813598633, "learning_rate": 4.206482533722022e-05, "loss": 5.8678, "step": 3212 }, { "epoch": 0.16, "grad_norm": 1.4757952690124512, "learning_rate": 4.20549434260586e-05, "loss": 5.8205, "step": 3216 }, { "epoch": 0.16, "grad_norm": 1.6784671545028687, "learning_rate": 4.2045061514896985e-05, "loss": 5.8077, "step": 3220 }, { "epoch": 0.16, "grad_norm": 2.231874465942383, "learning_rate": 4.203517960373537e-05, "loss": 5.7779, "step": 3224 }, { "epoch": 0.16, "grad_norm": 2.200303316116333, "learning_rate": 4.202529769257374e-05, "loss": 5.8089, "step": 3228 }, { "epoch": 0.16, "grad_norm": 1.9842939376831055, "learning_rate": 4.2015415781412124e-05, "loss": 5.819, "step": 3232 }, { "epoch": 0.16, "grad_norm": 1.8117098808288574, "learning_rate": 4.2005533870250506e-05, "loss": 5.9165, "step": 3236 }, { "epoch": 0.16, "grad_norm": 1.559996247291565, "learning_rate": 4.199565195908889e-05, "loss": 5.7312, "step": 3240 }, { "epoch": 0.16, "grad_norm": 1.4291075468063354, "learning_rate": 4.198577004792727e-05, "loss": 5.8351, "step": 3244 }, { "epoch": 0.16, "grad_norm": 2.1594057083129883, "learning_rate": 4.197588813676565e-05, "loss": 5.8667, "step": 3248 }, { "epoch": 0.16, "grad_norm": 1.4217649698257446, "learning_rate": 4.1966006225604035e-05, "loss": 5.7143, "step": 3252 }, { "epoch": 0.16, "grad_norm": 2.619981288909912, "learning_rate": 4.195612431444241e-05, "loss": 5.8356, "step": 3256 }, { "epoch": 0.16, "grad_norm": 2.002779960632324, "learning_rate": 4.194624240328079e-05, "loss": 5.7442, "step": 3260 }, { "epoch": 0.16, "grad_norm": 1.639796257019043, "learning_rate": 4.1936360492119174e-05, "loss": 5.8934, "step": 3264 }, { "epoch": 0.16, "grad_norm": 1.6575497388839722, "learning_rate": 4.1926478580957556e-05, "loss": 5.7446, "step": 3268 }, { "epoch": 0.16, "grad_norm": 1.411887526512146, "learning_rate": 4.191659666979594e-05, "loss": 5.803, "step": 3272 }, { "epoch": 0.16, "grad_norm": 1.6861897706985474, "learning_rate": 4.190671475863433e-05, "loss": 5.8827, "step": 3276 }, { "epoch": 0.16, "grad_norm": 1.5666462182998657, "learning_rate": 4.18968328474727e-05, "loss": 5.6994, "step": 3280 }, { "epoch": 0.16, "grad_norm": 2.06365704536438, "learning_rate": 4.1886950936311084e-05, "loss": 5.7583, "step": 3284 }, { "epoch": 0.16, "grad_norm": 1.5888551473617554, "learning_rate": 4.1877069025149466e-05, "loss": 5.778, "step": 3288 }, { "epoch": 0.16, "grad_norm": 1.8238952159881592, "learning_rate": 4.186718711398785e-05, "loss": 5.8059, "step": 3292 }, { "epoch": 0.16, "grad_norm": 1.9061448574066162, "learning_rate": 4.185730520282623e-05, "loss": 5.7522, "step": 3296 }, { "epoch": 0.16, "grad_norm": 2.1226706504821777, "learning_rate": 4.184742329166461e-05, "loss": 5.7591, "step": 3300 }, { "epoch": 0.16, "grad_norm": 1.4875596761703491, "learning_rate": 4.1837541380502995e-05, "loss": 5.8178, "step": 3304 }, { "epoch": 0.16, "grad_norm": 1.5928765535354614, "learning_rate": 4.182765946934138e-05, "loss": 5.7657, "step": 3308 }, { "epoch": 0.16, "grad_norm": 1.4694312810897827, "learning_rate": 4.181777755817975e-05, "loss": 5.8106, "step": 3312 }, { "epoch": 0.16, "grad_norm": 1.9643967151641846, "learning_rate": 4.1807895647018134e-05, "loss": 5.8297, "step": 3316 }, { "epoch": 0.16, "grad_norm": 1.6413182020187378, "learning_rate": 4.1798013735856516e-05, "loss": 5.8, "step": 3320 }, { "epoch": 0.16, "grad_norm": 1.6926419734954834, "learning_rate": 4.17881318246949e-05, "loss": 5.7488, "step": 3324 }, { "epoch": 0.16, "grad_norm": 2.3814029693603516, "learning_rate": 4.177824991353328e-05, "loss": 5.8541, "step": 3328 }, { "epoch": 0.16, "grad_norm": 1.9140582084655762, "learning_rate": 4.176836800237166e-05, "loss": 5.7962, "step": 3332 }, { "epoch": 0.16, "grad_norm": 1.598402976989746, "learning_rate": 4.1758486091210044e-05, "loss": 5.6331, "step": 3336 }, { "epoch": 0.17, "grad_norm": 1.7428086996078491, "learning_rate": 4.174860418004842e-05, "loss": 5.8097, "step": 3340 }, { "epoch": 0.17, "grad_norm": 1.6181442737579346, "learning_rate": 4.17387222688868e-05, "loss": 5.7108, "step": 3344 }, { "epoch": 0.17, "grad_norm": 1.6270121335983276, "learning_rate": 4.1728840357725184e-05, "loss": 5.7934, "step": 3348 }, { "epoch": 0.17, "grad_norm": 1.677556037902832, "learning_rate": 4.1718958446563566e-05, "loss": 5.8193, "step": 3352 }, { "epoch": 0.17, "grad_norm": 1.8996258974075317, "learning_rate": 4.170907653540195e-05, "loss": 5.8761, "step": 3356 }, { "epoch": 0.17, "grad_norm": 1.5995745658874512, "learning_rate": 4.169919462424033e-05, "loss": 5.8213, "step": 3360 }, { "epoch": 0.17, "grad_norm": 1.6188803911209106, "learning_rate": 4.168931271307871e-05, "loss": 5.9193, "step": 3364 }, { "epoch": 0.17, "grad_norm": 2.0405361652374268, "learning_rate": 4.1679430801917094e-05, "loss": 5.76, "step": 3368 }, { "epoch": 0.17, "grad_norm": 1.8527336120605469, "learning_rate": 4.166954889075547e-05, "loss": 5.7301, "step": 3372 }, { "epoch": 0.17, "grad_norm": 1.9228570461273193, "learning_rate": 4.165966697959385e-05, "loss": 5.6206, "step": 3376 }, { "epoch": 0.17, "grad_norm": 1.8511258363723755, "learning_rate": 4.1649785068432233e-05, "loss": 5.6747, "step": 3380 }, { "epoch": 0.17, "grad_norm": 2.077301263809204, "learning_rate": 4.1639903157270616e-05, "loss": 5.7165, "step": 3384 }, { "epoch": 0.17, "grad_norm": 1.5718106031417847, "learning_rate": 4.1630021246109004e-05, "loss": 5.8808, "step": 3388 }, { "epoch": 0.17, "grad_norm": 1.6767138242721558, "learning_rate": 4.1620139334947386e-05, "loss": 5.6554, "step": 3392 }, { "epoch": 0.17, "grad_norm": 1.6757532358169556, "learning_rate": 4.161025742378576e-05, "loss": 5.8836, "step": 3396 }, { "epoch": 0.17, "grad_norm": 1.7318501472473145, "learning_rate": 4.1600375512624144e-05, "loss": 5.7989, "step": 3400 }, { "epoch": 0.17, "grad_norm": 1.6216315031051636, "learning_rate": 4.1590493601462526e-05, "loss": 5.7528, "step": 3404 }, { "epoch": 0.17, "grad_norm": 1.916919231414795, "learning_rate": 4.158061169030091e-05, "loss": 5.7544, "step": 3408 }, { "epoch": 0.17, "grad_norm": 2.1574885845184326, "learning_rate": 4.157072977913929e-05, "loss": 5.7093, "step": 3412 }, { "epoch": 0.17, "grad_norm": 1.754546880722046, "learning_rate": 4.156084786797767e-05, "loss": 5.6852, "step": 3416 }, { "epoch": 0.17, "grad_norm": 1.5894404649734497, "learning_rate": 4.1550965956816054e-05, "loss": 5.8409, "step": 3420 }, { "epoch": 0.17, "grad_norm": 1.5836817026138306, "learning_rate": 4.154108404565443e-05, "loss": 5.8131, "step": 3424 }, { "epoch": 0.17, "grad_norm": 2.2382681369781494, "learning_rate": 4.153120213449281e-05, "loss": 5.7432, "step": 3428 }, { "epoch": 0.17, "grad_norm": 1.515018105506897, "learning_rate": 4.1521320223331194e-05, "loss": 5.8258, "step": 3432 }, { "epoch": 0.17, "grad_norm": 1.6593300104141235, "learning_rate": 4.1511438312169576e-05, "loss": 5.6845, "step": 3436 }, { "epoch": 0.17, "grad_norm": 1.5710387229919434, "learning_rate": 4.150155640100796e-05, "loss": 5.6832, "step": 3440 }, { "epoch": 0.17, "grad_norm": 1.8487441539764404, "learning_rate": 4.149167448984634e-05, "loss": 5.9216, "step": 3444 }, { "epoch": 0.17, "grad_norm": 1.8978421688079834, "learning_rate": 4.148179257868472e-05, "loss": 5.8274, "step": 3448 }, { "epoch": 0.17, "grad_norm": 1.6334807872772217, "learning_rate": 4.1471910667523104e-05, "loss": 5.8029, "step": 3452 }, { "epoch": 0.17, "grad_norm": 1.6049920320510864, "learning_rate": 4.146202875636148e-05, "loss": 5.898, "step": 3456 }, { "epoch": 0.17, "grad_norm": 1.874814510345459, "learning_rate": 4.145214684519986e-05, "loss": 5.7016, "step": 3460 }, { "epoch": 0.17, "grad_norm": 1.4894344806671143, "learning_rate": 4.144226493403824e-05, "loss": 5.8736, "step": 3464 }, { "epoch": 0.17, "grad_norm": 1.7190723419189453, "learning_rate": 4.1432383022876625e-05, "loss": 5.8398, "step": 3468 }, { "epoch": 0.17, "grad_norm": 2.1003429889678955, "learning_rate": 4.142250111171501e-05, "loss": 5.725, "step": 3472 }, { "epoch": 0.17, "grad_norm": 1.4103918075561523, "learning_rate": 4.141261920055339e-05, "loss": 5.8784, "step": 3476 }, { "epoch": 0.17, "grad_norm": 1.6862280368804932, "learning_rate": 4.140273728939177e-05, "loss": 5.7469, "step": 3480 }, { "epoch": 0.17, "grad_norm": 1.669593095779419, "learning_rate": 4.139285537823015e-05, "loss": 5.7476, "step": 3484 }, { "epoch": 0.17, "grad_norm": 1.798256754875183, "learning_rate": 4.138297346706853e-05, "loss": 5.729, "step": 3488 }, { "epoch": 0.17, "grad_norm": 1.827129602432251, "learning_rate": 4.137309155590691e-05, "loss": 5.7887, "step": 3492 }, { "epoch": 0.17, "grad_norm": 1.7160648107528687, "learning_rate": 4.136320964474529e-05, "loss": 5.7244, "step": 3496 }, { "epoch": 0.17, "grad_norm": 2.105008363723755, "learning_rate": 4.1353327733583675e-05, "loss": 5.8301, "step": 3500 }, { "epoch": 0.17, "grad_norm": 1.4461700916290283, "learning_rate": 4.1343445822422064e-05, "loss": 5.8252, "step": 3504 }, { "epoch": 0.17, "grad_norm": 1.478606104850769, "learning_rate": 4.133356391126044e-05, "loss": 5.751, "step": 3508 }, { "epoch": 0.17, "grad_norm": 1.9824438095092773, "learning_rate": 4.132368200009882e-05, "loss": 5.7331, "step": 3512 }, { "epoch": 0.17, "grad_norm": 1.8545335531234741, "learning_rate": 4.13138000889372e-05, "loss": 5.8547, "step": 3516 }, { "epoch": 0.17, "grad_norm": 1.836466908454895, "learning_rate": 4.1303918177775585e-05, "loss": 5.7603, "step": 3520 }, { "epoch": 0.17, "grad_norm": 1.3290674686431885, "learning_rate": 4.129403626661397e-05, "loss": 5.8594, "step": 3524 }, { "epoch": 0.17, "grad_norm": 1.4753937721252441, "learning_rate": 4.128415435545235e-05, "loss": 5.8027, "step": 3528 }, { "epoch": 0.17, "grad_norm": 2.090026617050171, "learning_rate": 4.127427244429073e-05, "loss": 5.8225, "step": 3532 }, { "epoch": 0.17, "grad_norm": 2.7022199630737305, "learning_rate": 4.1264390533129114e-05, "loss": 5.8274, "step": 3536 }, { "epoch": 0.17, "grad_norm": 2.0701937675476074, "learning_rate": 4.125450862196749e-05, "loss": 5.8273, "step": 3540 }, { "epoch": 0.18, "grad_norm": 1.5415451526641846, "learning_rate": 4.124462671080587e-05, "loss": 5.7365, "step": 3544 }, { "epoch": 0.18, "grad_norm": 1.871156096458435, "learning_rate": 4.123474479964425e-05, "loss": 5.8028, "step": 3548 }, { "epoch": 0.18, "grad_norm": 1.788110613822937, "learning_rate": 4.1224862888482635e-05, "loss": 5.7425, "step": 3552 }, { "epoch": 0.18, "grad_norm": 1.4837939739227295, "learning_rate": 4.121498097732102e-05, "loss": 5.7204, "step": 3556 }, { "epoch": 0.18, "grad_norm": 1.9719536304473877, "learning_rate": 4.12050990661594e-05, "loss": 5.6699, "step": 3560 }, { "epoch": 0.18, "grad_norm": 1.865614652633667, "learning_rate": 4.119521715499778e-05, "loss": 5.7139, "step": 3564 }, { "epoch": 0.18, "grad_norm": 1.9451555013656616, "learning_rate": 4.1185335243836156e-05, "loss": 5.7739, "step": 3568 }, { "epoch": 0.18, "grad_norm": 2.0122992992401123, "learning_rate": 4.117545333267454e-05, "loss": 5.9195, "step": 3572 }, { "epoch": 0.18, "grad_norm": 1.606345295906067, "learning_rate": 4.116557142151292e-05, "loss": 5.8827, "step": 3576 }, { "epoch": 0.18, "grad_norm": 1.9131412506103516, "learning_rate": 4.11556895103513e-05, "loss": 5.8901, "step": 3580 }, { "epoch": 0.18, "grad_norm": 1.780287742614746, "learning_rate": 4.1145807599189685e-05, "loss": 5.8709, "step": 3584 }, { "epoch": 0.18, "grad_norm": 2.840616226196289, "learning_rate": 4.113592568802807e-05, "loss": 5.8292, "step": 3588 }, { "epoch": 0.18, "grad_norm": 1.641686201095581, "learning_rate": 4.112604377686645e-05, "loss": 5.7915, "step": 3592 }, { "epoch": 0.18, "grad_norm": 1.6334573030471802, "learning_rate": 4.111616186570483e-05, "loss": 5.8706, "step": 3596 }, { "epoch": 0.18, "grad_norm": 2.521855592727661, "learning_rate": 4.1106279954543206e-05, "loss": 5.7234, "step": 3600 }, { "epoch": 0.18, "grad_norm": 1.7530171871185303, "learning_rate": 4.109639804338159e-05, "loss": 5.9393, "step": 3604 }, { "epoch": 0.18, "grad_norm": 1.5222742557525635, "learning_rate": 4.108651613221997e-05, "loss": 5.7107, "step": 3608 }, { "epoch": 0.18, "grad_norm": 1.9538359642028809, "learning_rate": 4.107663422105835e-05, "loss": 5.837, "step": 3612 }, { "epoch": 0.18, "grad_norm": 1.5958921909332275, "learning_rate": 4.1066752309896734e-05, "loss": 5.8572, "step": 3616 }, { "epoch": 0.18, "grad_norm": 1.4122501611709595, "learning_rate": 4.105687039873512e-05, "loss": 5.8037, "step": 3620 }, { "epoch": 0.18, "grad_norm": 1.674170732498169, "learning_rate": 4.10469884875735e-05, "loss": 5.6393, "step": 3624 }, { "epoch": 0.18, "grad_norm": 1.989425778388977, "learning_rate": 4.103710657641188e-05, "loss": 5.7025, "step": 3628 }, { "epoch": 0.18, "grad_norm": 1.4842572212219238, "learning_rate": 4.102722466525026e-05, "loss": 5.7045, "step": 3632 }, { "epoch": 0.18, "grad_norm": 1.6306571960449219, "learning_rate": 4.1017342754088645e-05, "loss": 5.9058, "step": 3636 }, { "epoch": 0.18, "grad_norm": 1.7082542181015015, "learning_rate": 4.100746084292703e-05, "loss": 5.7987, "step": 3640 }, { "epoch": 0.18, "grad_norm": 1.4293181896209717, "learning_rate": 4.099757893176541e-05, "loss": 5.7266, "step": 3644 }, { "epoch": 0.18, "grad_norm": 1.4269033670425415, "learning_rate": 4.098769702060379e-05, "loss": 5.7295, "step": 3648 }, { "epoch": 0.18, "grad_norm": 1.6524076461791992, "learning_rate": 4.0977815109442166e-05, "loss": 5.7705, "step": 3652 }, { "epoch": 0.18, "grad_norm": 1.667291522026062, "learning_rate": 4.096793319828055e-05, "loss": 5.631, "step": 3656 }, { "epoch": 0.18, "grad_norm": 1.8146156072616577, "learning_rate": 4.095805128711893e-05, "loss": 5.8973, "step": 3660 }, { "epoch": 0.18, "grad_norm": 1.884358286857605, "learning_rate": 4.094816937595731e-05, "loss": 5.7693, "step": 3664 }, { "epoch": 0.18, "grad_norm": 1.5343087911605835, "learning_rate": 4.0938287464795694e-05, "loss": 5.7735, "step": 3668 }, { "epoch": 0.18, "grad_norm": 1.6283289194107056, "learning_rate": 4.0928405553634076e-05, "loss": 5.788, "step": 3672 }, { "epoch": 0.18, "grad_norm": 1.5237561464309692, "learning_rate": 4.091852364247246e-05, "loss": 5.6661, "step": 3676 }, { "epoch": 0.18, "grad_norm": 1.8047362565994263, "learning_rate": 4.090864173131084e-05, "loss": 5.6598, "step": 3680 }, { "epoch": 0.18, "grad_norm": 1.6127629280090332, "learning_rate": 4.0898759820149216e-05, "loss": 5.6893, "step": 3684 }, { "epoch": 0.18, "grad_norm": 2.2976841926574707, "learning_rate": 4.08888779089876e-05, "loss": 5.7673, "step": 3688 }, { "epoch": 0.18, "grad_norm": 2.025730609893799, "learning_rate": 4.087899599782598e-05, "loss": 5.7116, "step": 3692 }, { "epoch": 0.18, "grad_norm": 1.5258173942565918, "learning_rate": 4.086911408666436e-05, "loss": 5.8727, "step": 3696 }, { "epoch": 0.18, "grad_norm": 1.983288049697876, "learning_rate": 4.0859232175502744e-05, "loss": 5.8148, "step": 3700 }, { "epoch": 0.18, "grad_norm": 1.6084184646606445, "learning_rate": 4.0849350264341126e-05, "loss": 5.816, "step": 3704 }, { "epoch": 0.18, "grad_norm": 1.739123821258545, "learning_rate": 4.083946835317951e-05, "loss": 5.8155, "step": 3708 }, { "epoch": 0.18, "grad_norm": 1.8195106983184814, "learning_rate": 4.0829586442017884e-05, "loss": 5.7962, "step": 3712 }, { "epoch": 0.18, "grad_norm": 1.5186692476272583, "learning_rate": 4.0819704530856266e-05, "loss": 5.8265, "step": 3716 }, { "epoch": 0.18, "grad_norm": 2.1515772342681885, "learning_rate": 4.080982261969465e-05, "loss": 5.734, "step": 3720 }, { "epoch": 0.18, "grad_norm": 1.9751808643341064, "learning_rate": 4.079994070853303e-05, "loss": 5.7605, "step": 3724 }, { "epoch": 0.18, "grad_norm": 1.5598946809768677, "learning_rate": 4.079005879737141e-05, "loss": 5.6552, "step": 3728 }, { "epoch": 0.18, "grad_norm": 1.642751693725586, "learning_rate": 4.0780176886209794e-05, "loss": 5.7607, "step": 3732 }, { "epoch": 0.18, "grad_norm": 2.0432446002960205, "learning_rate": 4.0770294975048176e-05, "loss": 5.7103, "step": 3736 }, { "epoch": 0.18, "grad_norm": 1.8500975370407104, "learning_rate": 4.076041306388656e-05, "loss": 5.7195, "step": 3740 }, { "epoch": 0.18, "grad_norm": 1.5115128755569458, "learning_rate": 4.075053115272494e-05, "loss": 5.8491, "step": 3744 }, { "epoch": 0.19, "grad_norm": 2.241079092025757, "learning_rate": 4.074064924156332e-05, "loss": 5.8183, "step": 3748 }, { "epoch": 0.19, "grad_norm": 1.3165819644927979, "learning_rate": 4.0730767330401704e-05, "loss": 5.7614, "step": 3752 }, { "epoch": 0.19, "grad_norm": 1.4908742904663086, "learning_rate": 4.0720885419240086e-05, "loss": 5.7343, "step": 3756 }, { "epoch": 0.19, "grad_norm": 1.761338710784912, "learning_rate": 4.071100350807847e-05, "loss": 5.7702, "step": 3760 }, { "epoch": 0.19, "grad_norm": 1.7084866762161255, "learning_rate": 4.0701121596916844e-05, "loss": 5.7966, "step": 3764 }, { "epoch": 0.19, "grad_norm": 2.0340840816497803, "learning_rate": 4.0691239685755226e-05, "loss": 5.7835, "step": 3768 }, { "epoch": 0.19, "grad_norm": 2.2564029693603516, "learning_rate": 4.068135777459361e-05, "loss": 5.7911, "step": 3772 }, { "epoch": 0.19, "grad_norm": 2.2431206703186035, "learning_rate": 4.067147586343199e-05, "loss": 5.7461, "step": 3776 }, { "epoch": 0.19, "grad_norm": 1.6614986658096313, "learning_rate": 4.066159395227037e-05, "loss": 5.8013, "step": 3780 }, { "epoch": 0.19, "grad_norm": 1.8505353927612305, "learning_rate": 4.0651712041108754e-05, "loss": 5.7317, "step": 3784 }, { "epoch": 0.19, "grad_norm": 3.03542160987854, "learning_rate": 4.0641830129947136e-05, "loss": 5.822, "step": 3788 }, { "epoch": 0.19, "grad_norm": 1.5652294158935547, "learning_rate": 4.063194821878552e-05, "loss": 5.8001, "step": 3792 }, { "epoch": 0.19, "grad_norm": 1.7387696504592896, "learning_rate": 4.062206630762389e-05, "loss": 5.7575, "step": 3796 }, { "epoch": 0.19, "grad_norm": 1.7505065202713013, "learning_rate": 4.0612184396462275e-05, "loss": 5.6909, "step": 3800 }, { "epoch": 0.19, "grad_norm": 1.7365105152130127, "learning_rate": 4.060230248530066e-05, "loss": 5.7282, "step": 3804 }, { "epoch": 0.19, "grad_norm": 1.965735912322998, "learning_rate": 4.059242057413904e-05, "loss": 5.7692, "step": 3808 }, { "epoch": 0.19, "grad_norm": 1.409680962562561, "learning_rate": 4.058253866297742e-05, "loss": 5.8235, "step": 3812 }, { "epoch": 0.19, "grad_norm": 1.8310275077819824, "learning_rate": 4.0572656751815804e-05, "loss": 5.7516, "step": 3816 }, { "epoch": 0.19, "grad_norm": 1.537810206413269, "learning_rate": 4.0562774840654186e-05, "loss": 5.6683, "step": 3820 }, { "epoch": 0.19, "grad_norm": 1.7618563175201416, "learning_rate": 4.055289292949256e-05, "loss": 5.7643, "step": 3824 }, { "epoch": 0.19, "grad_norm": 1.603681206703186, "learning_rate": 4.054301101833094e-05, "loss": 5.6578, "step": 3828 }, { "epoch": 0.19, "grad_norm": 2.3885388374328613, "learning_rate": 4.0533129107169325e-05, "loss": 5.8278, "step": 3832 }, { "epoch": 0.19, "grad_norm": 1.6048877239227295, "learning_rate": 4.052324719600771e-05, "loss": 5.8245, "step": 3836 }, { "epoch": 0.19, "grad_norm": 1.443018913269043, "learning_rate": 4.051336528484609e-05, "loss": 5.5956, "step": 3840 }, { "epoch": 0.19, "grad_norm": 1.6389518976211548, "learning_rate": 4.050348337368447e-05, "loss": 5.7129, "step": 3844 }, { "epoch": 0.19, "grad_norm": 1.4876172542572021, "learning_rate": 4.049360146252285e-05, "loss": 5.7235, "step": 3848 }, { "epoch": 0.19, "grad_norm": 1.8396450281143188, "learning_rate": 4.0483719551361235e-05, "loss": 5.8035, "step": 3852 }, { "epoch": 0.19, "grad_norm": 1.4481145143508911, "learning_rate": 4.047383764019962e-05, "loss": 5.793, "step": 3856 }, { "epoch": 0.19, "grad_norm": 1.8424584865570068, "learning_rate": 4.0463955729038e-05, "loss": 5.7926, "step": 3860 }, { "epoch": 0.19, "grad_norm": 2.1682097911834717, "learning_rate": 4.045407381787638e-05, "loss": 5.7651, "step": 3864 }, { "epoch": 0.19, "grad_norm": 1.371147871017456, "learning_rate": 4.0444191906714764e-05, "loss": 5.7553, "step": 3868 }, { "epoch": 0.19, "grad_norm": 1.8529584407806396, "learning_rate": 4.0434309995553146e-05, "loss": 5.738, "step": 3872 }, { "epoch": 0.19, "grad_norm": 2.445864677429199, "learning_rate": 4.042442808439153e-05, "loss": 5.6886, "step": 3876 }, { "epoch": 0.19, "grad_norm": 1.6438809633255005, "learning_rate": 4.04145461732299e-05, "loss": 5.7889, "step": 3880 }, { "epoch": 0.19, "grad_norm": 2.3593320846557617, "learning_rate": 4.0404664262068285e-05, "loss": 5.7318, "step": 3884 }, { "epoch": 0.19, "grad_norm": 1.7897164821624756, "learning_rate": 4.039478235090667e-05, "loss": 5.7785, "step": 3888 }, { "epoch": 0.19, "grad_norm": 1.6130775213241577, "learning_rate": 4.038490043974505e-05, "loss": 5.7774, "step": 3892 }, { "epoch": 0.19, "grad_norm": 1.5768321752548218, "learning_rate": 4.037501852858343e-05, "loss": 5.8277, "step": 3896 }, { "epoch": 0.19, "grad_norm": 1.9530898332595825, "learning_rate": 4.036513661742181e-05, "loss": 5.8019, "step": 3900 }, { "epoch": 0.19, "grad_norm": 1.9102182388305664, "learning_rate": 4.0355254706260195e-05, "loss": 5.7975, "step": 3904 }, { "epoch": 0.19, "grad_norm": 1.7103482484817505, "learning_rate": 4.034537279509857e-05, "loss": 5.7725, "step": 3908 }, { "epoch": 0.19, "grad_norm": 1.5741090774536133, "learning_rate": 4.033549088393695e-05, "loss": 5.7367, "step": 3912 }, { "epoch": 0.19, "grad_norm": 2.2126150131225586, "learning_rate": 4.0325608972775335e-05, "loss": 5.9088, "step": 3916 }, { "epoch": 0.19, "grad_norm": 1.7655351161956787, "learning_rate": 4.031572706161372e-05, "loss": 5.847, "step": 3920 }, { "epoch": 0.19, "grad_norm": 1.7152812480926514, "learning_rate": 4.03058451504521e-05, "loss": 5.7307, "step": 3924 }, { "epoch": 0.19, "grad_norm": 1.767104983329773, "learning_rate": 4.029596323929048e-05, "loss": 5.8074, "step": 3928 }, { "epoch": 0.19, "grad_norm": 1.9230767488479614, "learning_rate": 4.028608132812886e-05, "loss": 5.7124, "step": 3932 }, { "epoch": 0.19, "grad_norm": 1.6367123126983643, "learning_rate": 4.0276199416967245e-05, "loss": 5.7474, "step": 3936 }, { "epoch": 0.19, "grad_norm": 2.0680441856384277, "learning_rate": 4.026631750580562e-05, "loss": 5.8713, "step": 3940 }, { "epoch": 0.19, "grad_norm": 1.7396044731140137, "learning_rate": 4.0256435594644e-05, "loss": 5.7464, "step": 3944 }, { "epoch": 0.2, "grad_norm": 2.2416396141052246, "learning_rate": 4.0246553683482384e-05, "loss": 5.9287, "step": 3948 }, { "epoch": 0.2, "grad_norm": 2.192002534866333, "learning_rate": 4.0236671772320766e-05, "loss": 5.8617, "step": 3952 }, { "epoch": 0.2, "grad_norm": 2.6008896827697754, "learning_rate": 4.022678986115915e-05, "loss": 5.7929, "step": 3956 }, { "epoch": 0.2, "grad_norm": 1.7184627056121826, "learning_rate": 4.021690794999753e-05, "loss": 5.7198, "step": 3960 }, { "epoch": 0.2, "grad_norm": 2.4607460498809814, "learning_rate": 4.020702603883591e-05, "loss": 5.6012, "step": 3964 }, { "epoch": 0.2, "grad_norm": 3.070399761199951, "learning_rate": 4.0197144127674295e-05, "loss": 5.6176, "step": 3968 }, { "epoch": 0.2, "grad_norm": 1.7792775630950928, "learning_rate": 4.018726221651268e-05, "loss": 5.8348, "step": 3972 }, { "epoch": 0.2, "grad_norm": 2.0791399478912354, "learning_rate": 4.017738030535106e-05, "loss": 5.8916, "step": 3976 }, { "epoch": 0.2, "grad_norm": 1.7796858549118042, "learning_rate": 4.016749839418944e-05, "loss": 5.8424, "step": 3980 }, { "epoch": 0.2, "grad_norm": 2.2471671104431152, "learning_rate": 4.015761648302782e-05, "loss": 5.832, "step": 3984 }, { "epoch": 0.2, "grad_norm": 1.6249337196350098, "learning_rate": 4.0147734571866205e-05, "loss": 5.72, "step": 3988 }, { "epoch": 0.2, "grad_norm": 1.6861313581466675, "learning_rate": 4.013785266070458e-05, "loss": 5.7362, "step": 3992 }, { "epoch": 0.2, "grad_norm": 1.8561558723449707, "learning_rate": 4.012797074954296e-05, "loss": 5.7032, "step": 3996 }, { "epoch": 0.2, "grad_norm": 1.6355276107788086, "learning_rate": 4.0118088838381344e-05, "loss": 5.7695, "step": 4000 }, { "epoch": 0.2, "grad_norm": 2.0652620792388916, "learning_rate": 4.0108206927219726e-05, "loss": 5.8284, "step": 4004 }, { "epoch": 0.2, "grad_norm": 1.5604512691497803, "learning_rate": 4.009832501605811e-05, "loss": 5.8084, "step": 4008 }, { "epoch": 0.2, "grad_norm": 1.8975857496261597, "learning_rate": 4.008844310489649e-05, "loss": 5.7599, "step": 4012 }, { "epoch": 0.2, "grad_norm": 1.7917417287826538, "learning_rate": 4.007856119373487e-05, "loss": 5.7057, "step": 4016 }, { "epoch": 0.2, "grad_norm": 1.6700019836425781, "learning_rate": 4.0068679282573255e-05, "loss": 5.7124, "step": 4020 }, { "epoch": 0.2, "grad_norm": 1.7579398155212402, "learning_rate": 4.005879737141163e-05, "loss": 5.7449, "step": 4024 }, { "epoch": 0.2, "grad_norm": 2.5005486011505127, "learning_rate": 4.004891546025001e-05, "loss": 5.6679, "step": 4028 }, { "epoch": 0.2, "grad_norm": 2.033926010131836, "learning_rate": 4.0039033549088394e-05, "loss": 5.8639, "step": 4032 }, { "epoch": 0.2, "grad_norm": 2.0436344146728516, "learning_rate": 4.0029151637926776e-05, "loss": 5.7561, "step": 4036 }, { "epoch": 0.2, "grad_norm": 1.592089295387268, "learning_rate": 4.001926972676516e-05, "loss": 5.7275, "step": 4040 }, { "epoch": 0.2, "grad_norm": 1.860810399055481, "learning_rate": 4.000938781560354e-05, "loss": 5.829, "step": 4044 }, { "epoch": 0.2, "grad_norm": 1.988085150718689, "learning_rate": 3.999950590444192e-05, "loss": 5.7726, "step": 4048 }, { "epoch": 0.2, "grad_norm": 1.7819504737854004, "learning_rate": 3.99896239932803e-05, "loss": 5.6843, "step": 4052 }, { "epoch": 0.2, "grad_norm": 2.2862792015075684, "learning_rate": 3.997974208211868e-05, "loss": 5.7483, "step": 4056 }, { "epoch": 0.2, "grad_norm": 1.5969876050949097, "learning_rate": 3.996986017095706e-05, "loss": 5.7825, "step": 4060 }, { "epoch": 0.2, "grad_norm": 1.5458773374557495, "learning_rate": 3.9959978259795444e-05, "loss": 5.7123, "step": 4064 }, { "epoch": 0.2, "grad_norm": 1.516552209854126, "learning_rate": 3.9950096348633826e-05, "loss": 5.7107, "step": 4068 }, { "epoch": 0.2, "grad_norm": 1.6216940879821777, "learning_rate": 3.994021443747221e-05, "loss": 5.7065, "step": 4072 }, { "epoch": 0.2, "grad_norm": 2.020914077758789, "learning_rate": 3.993033252631059e-05, "loss": 5.938, "step": 4076 }, { "epoch": 0.2, "grad_norm": 1.9217082262039185, "learning_rate": 3.992045061514897e-05, "loss": 5.8464, "step": 4080 }, { "epoch": 0.2, "grad_norm": 1.8421107530593872, "learning_rate": 3.9910568703987354e-05, "loss": 5.672, "step": 4084 }, { "epoch": 0.2, "grad_norm": 2.107970952987671, "learning_rate": 3.9900686792825736e-05, "loss": 5.787, "step": 4088 }, { "epoch": 0.2, "grad_norm": 2.8105857372283936, "learning_rate": 3.989080488166412e-05, "loss": 5.8617, "step": 4092 }, { "epoch": 0.2, "grad_norm": 1.9893261194229126, "learning_rate": 3.98809229705025e-05, "loss": 5.766, "step": 4096 }, { "epoch": 0.2, "grad_norm": 2.125051498413086, "learning_rate": 3.987104105934088e-05, "loss": 5.8332, "step": 4100 }, { "epoch": 0.2, "grad_norm": 1.6055108308792114, "learning_rate": 3.9861159148179264e-05, "loss": 5.7197, "step": 4104 }, { "epoch": 0.2, "grad_norm": 1.727008581161499, "learning_rate": 3.985127723701764e-05, "loss": 5.7569, "step": 4108 }, { "epoch": 0.2, "grad_norm": 1.6093586683273315, "learning_rate": 3.984139532585602e-05, "loss": 5.6682, "step": 4112 }, { "epoch": 0.2, "grad_norm": 2.0254461765289307, "learning_rate": 3.9831513414694404e-05, "loss": 5.7424, "step": 4116 }, { "epoch": 0.2, "grad_norm": 1.8866037130355835, "learning_rate": 3.9821631503532786e-05, "loss": 5.8635, "step": 4120 }, { "epoch": 0.2, "grad_norm": 1.5677746534347534, "learning_rate": 3.981174959237117e-05, "loss": 5.8245, "step": 4124 }, { "epoch": 0.2, "grad_norm": 1.763741374015808, "learning_rate": 3.980186768120955e-05, "loss": 5.7215, "step": 4128 }, { "epoch": 0.2, "grad_norm": 1.9562249183654785, "learning_rate": 3.979198577004793e-05, "loss": 5.7907, "step": 4132 }, { "epoch": 0.2, "grad_norm": 1.8377013206481934, "learning_rate": 3.978210385888631e-05, "loss": 5.6803, "step": 4136 }, { "epoch": 0.2, "grad_norm": 1.684601902961731, "learning_rate": 3.977222194772469e-05, "loss": 5.772, "step": 4140 }, { "epoch": 0.2, "grad_norm": 1.5692558288574219, "learning_rate": 3.976234003656307e-05, "loss": 5.8362, "step": 4144 }, { "epoch": 0.2, "grad_norm": 1.6108436584472656, "learning_rate": 3.9752458125401454e-05, "loss": 5.7818, "step": 4148 }, { "epoch": 0.21, "grad_norm": 1.75248384475708, "learning_rate": 3.9742576214239836e-05, "loss": 5.8399, "step": 4152 }, { "epoch": 0.21, "grad_norm": 1.5307252407073975, "learning_rate": 3.973269430307822e-05, "loss": 5.7638, "step": 4156 }, { "epoch": 0.21, "grad_norm": 1.4091581106185913, "learning_rate": 3.97228123919166e-05, "loss": 5.7551, "step": 4160 }, { "epoch": 0.21, "grad_norm": 1.6635953187942505, "learning_rate": 3.971293048075498e-05, "loss": 5.7285, "step": 4164 }, { "epoch": 0.21, "grad_norm": 1.8554000854492188, "learning_rate": 3.970304856959336e-05, "loss": 5.5641, "step": 4168 }, { "epoch": 0.21, "grad_norm": 1.6862932443618774, "learning_rate": 3.969316665843174e-05, "loss": 5.8047, "step": 4172 }, { "epoch": 0.21, "grad_norm": 1.9776607751846313, "learning_rate": 3.968328474727012e-05, "loss": 5.7278, "step": 4176 }, { "epoch": 0.21, "grad_norm": 1.6065359115600586, "learning_rate": 3.96734028361085e-05, "loss": 5.7115, "step": 4180 }, { "epoch": 0.21, "grad_norm": 2.3388211727142334, "learning_rate": 3.9663520924946885e-05, "loss": 5.6954, "step": 4184 }, { "epoch": 0.21, "grad_norm": 1.7472275495529175, "learning_rate": 3.965363901378527e-05, "loss": 5.834, "step": 4188 }, { "epoch": 0.21, "grad_norm": 1.7328163385391235, "learning_rate": 3.964375710262365e-05, "loss": 5.6081, "step": 4192 }, { "epoch": 0.21, "grad_norm": 2.0004336833953857, "learning_rate": 3.963387519146203e-05, "loss": 5.7344, "step": 4196 }, { "epoch": 0.21, "grad_norm": 2.126734733581543, "learning_rate": 3.9623993280300414e-05, "loss": 5.7677, "step": 4200 }, { "epoch": 0.21, "grad_norm": 1.693859338760376, "learning_rate": 3.9614111369138796e-05, "loss": 5.7142, "step": 4204 }, { "epoch": 0.21, "grad_norm": 1.758854627609253, "learning_rate": 3.960422945797718e-05, "loss": 5.7381, "step": 4208 }, { "epoch": 0.21, "grad_norm": 2.1982223987579346, "learning_rate": 3.959434754681556e-05, "loss": 5.9032, "step": 4212 }, { "epoch": 0.21, "grad_norm": 1.588758111000061, "learning_rate": 3.958446563565394e-05, "loss": 5.6668, "step": 4216 }, { "epoch": 0.21, "grad_norm": 1.8513824939727783, "learning_rate": 3.957458372449232e-05, "loss": 5.5942, "step": 4220 }, { "epoch": 0.21, "grad_norm": 2.1672956943511963, "learning_rate": 3.95647018133307e-05, "loss": 5.8085, "step": 4224 }, { "epoch": 0.21, "grad_norm": 1.9962481260299683, "learning_rate": 3.955481990216908e-05, "loss": 5.7762, "step": 4228 }, { "epoch": 0.21, "grad_norm": 1.5460338592529297, "learning_rate": 3.954493799100746e-05, "loss": 5.7283, "step": 4232 }, { "epoch": 0.21, "grad_norm": 1.5077719688415527, "learning_rate": 3.9535056079845845e-05, "loss": 5.7272, "step": 4236 }, { "epoch": 0.21, "grad_norm": 2.141517400741577, "learning_rate": 3.952517416868423e-05, "loss": 5.7612, "step": 4240 }, { "epoch": 0.21, "grad_norm": 1.6528626680374146, "learning_rate": 3.951529225752261e-05, "loss": 5.6216, "step": 4244 }, { "epoch": 0.21, "grad_norm": 1.8636984825134277, "learning_rate": 3.950541034636099e-05, "loss": 5.8001, "step": 4248 }, { "epoch": 0.21, "grad_norm": 1.4790719747543335, "learning_rate": 3.949552843519937e-05, "loss": 5.7866, "step": 4252 }, { "epoch": 0.21, "grad_norm": 1.534711241722107, "learning_rate": 3.948564652403775e-05, "loss": 5.708, "step": 4256 }, { "epoch": 0.21, "grad_norm": 2.083681344985962, "learning_rate": 3.947576461287613e-05, "loss": 5.6374, "step": 4260 }, { "epoch": 0.21, "grad_norm": 1.5164477825164795, "learning_rate": 3.946588270171451e-05, "loss": 5.7652, "step": 4264 }, { "epoch": 0.21, "grad_norm": 1.6196374893188477, "learning_rate": 3.9456000790552895e-05, "loss": 5.7647, "step": 4268 }, { "epoch": 0.21, "grad_norm": 2.348536491394043, "learning_rate": 3.944611887939128e-05, "loss": 5.6887, "step": 4272 }, { "epoch": 0.21, "grad_norm": 1.8124507665634155, "learning_rate": 3.943623696822966e-05, "loss": 5.8996, "step": 4276 }, { "epoch": 0.21, "grad_norm": 1.919488787651062, "learning_rate": 3.9426355057068034e-05, "loss": 5.8877, "step": 4280 }, { "epoch": 0.21, "grad_norm": 2.190864086151123, "learning_rate": 3.9416473145906416e-05, "loss": 5.6733, "step": 4284 }, { "epoch": 0.21, "grad_norm": 1.941540241241455, "learning_rate": 3.94065912347448e-05, "loss": 5.6711, "step": 4288 }, { "epoch": 0.21, "grad_norm": 1.46822190284729, "learning_rate": 3.939670932358318e-05, "loss": 5.7721, "step": 4292 }, { "epoch": 0.21, "grad_norm": 1.7537801265716553, "learning_rate": 3.938682741242156e-05, "loss": 5.663, "step": 4296 }, { "epoch": 0.21, "grad_norm": 1.6122314929962158, "learning_rate": 3.9376945501259945e-05, "loss": 5.6916, "step": 4300 }, { "epoch": 0.21, "grad_norm": 1.9700013399124146, "learning_rate": 3.936706359009833e-05, "loss": 5.7515, "step": 4304 }, { "epoch": 0.21, "grad_norm": 1.7138198614120483, "learning_rate": 3.935718167893671e-05, "loss": 5.6894, "step": 4308 }, { "epoch": 0.21, "grad_norm": 1.5922127962112427, "learning_rate": 3.934729976777509e-05, "loss": 5.6651, "step": 4312 }, { "epoch": 0.21, "grad_norm": 1.7139687538146973, "learning_rate": 3.933741785661347e-05, "loss": 5.8207, "step": 4316 }, { "epoch": 0.21, "grad_norm": 1.8785958290100098, "learning_rate": 3.9327535945451855e-05, "loss": 5.6558, "step": 4320 }, { "epoch": 0.21, "grad_norm": 1.6499451398849487, "learning_rate": 3.931765403429024e-05, "loss": 5.7528, "step": 4324 }, { "epoch": 0.21, "grad_norm": 1.9110082387924194, "learning_rate": 3.930777212312862e-05, "loss": 5.7717, "step": 4328 }, { "epoch": 0.21, "grad_norm": 1.643752098083496, "learning_rate": 3.9297890211966994e-05, "loss": 5.7846, "step": 4332 }, { "epoch": 0.21, "grad_norm": 2.1606407165527344, "learning_rate": 3.9288008300805377e-05, "loss": 5.7187, "step": 4336 }, { "epoch": 0.21, "grad_norm": 1.6892307996749878, "learning_rate": 3.927812638964376e-05, "loss": 5.7547, "step": 4340 }, { "epoch": 0.21, "grad_norm": 1.863633632659912, "learning_rate": 3.926824447848214e-05, "loss": 5.6712, "step": 4344 }, { "epoch": 0.21, "grad_norm": 1.9984108209609985, "learning_rate": 3.925836256732052e-05, "loss": 5.6589, "step": 4348 }, { "epoch": 0.22, "grad_norm": 1.6699365377426147, "learning_rate": 3.9248480656158905e-05, "loss": 5.7613, "step": 4352 }, { "epoch": 0.22, "grad_norm": 1.7803456783294678, "learning_rate": 3.923859874499729e-05, "loss": 5.7216, "step": 4356 }, { "epoch": 0.22, "grad_norm": 1.8623511791229248, "learning_rate": 3.922871683383567e-05, "loss": 5.6227, "step": 4360 }, { "epoch": 0.22, "grad_norm": 1.5841137170791626, "learning_rate": 3.9218834922674044e-05, "loss": 5.6657, "step": 4364 }, { "epoch": 0.22, "grad_norm": 1.5960050821304321, "learning_rate": 3.9208953011512426e-05, "loss": 5.6565, "step": 4368 }, { "epoch": 0.22, "grad_norm": 1.5138053894042969, "learning_rate": 3.919907110035081e-05, "loss": 5.7149, "step": 4372 }, { "epoch": 0.22, "grad_norm": 2.044208288192749, "learning_rate": 3.918918918918919e-05, "loss": 5.659, "step": 4376 }, { "epoch": 0.22, "grad_norm": 1.7930463552474976, "learning_rate": 3.917930727802757e-05, "loss": 5.6993, "step": 4380 }, { "epoch": 0.22, "grad_norm": 1.9749689102172852, "learning_rate": 3.9169425366865954e-05, "loss": 5.6016, "step": 4384 }, { "epoch": 0.22, "grad_norm": 1.737301230430603, "learning_rate": 3.9159543455704337e-05, "loss": 5.651, "step": 4388 }, { "epoch": 0.22, "grad_norm": 1.506861925125122, "learning_rate": 3.914966154454271e-05, "loss": 5.7291, "step": 4392 }, { "epoch": 0.22, "grad_norm": 1.6121779680252075, "learning_rate": 3.9139779633381094e-05, "loss": 5.6529, "step": 4396 }, { "epoch": 0.22, "grad_norm": 1.6343719959259033, "learning_rate": 3.9129897722219476e-05, "loss": 5.7278, "step": 4400 }, { "epoch": 0.22, "grad_norm": 1.9690921306610107, "learning_rate": 3.912001581105786e-05, "loss": 5.636, "step": 4404 }, { "epoch": 0.22, "grad_norm": 1.6430257558822632, "learning_rate": 3.911013389989624e-05, "loss": 5.7014, "step": 4408 }, { "epoch": 0.22, "grad_norm": 1.506526231765747, "learning_rate": 3.910025198873462e-05, "loss": 5.8489, "step": 4412 }, { "epoch": 0.22, "grad_norm": 1.704164743423462, "learning_rate": 3.9090370077573004e-05, "loss": 5.8284, "step": 4416 }, { "epoch": 0.22, "grad_norm": 1.7875306606292725, "learning_rate": 3.9080488166411386e-05, "loss": 5.633, "step": 4420 }, { "epoch": 0.22, "grad_norm": 1.9106501340866089, "learning_rate": 3.907060625524977e-05, "loss": 5.775, "step": 4424 }, { "epoch": 0.22, "grad_norm": 1.9393559694290161, "learning_rate": 3.906072434408815e-05, "loss": 5.689, "step": 4428 }, { "epoch": 0.22, "grad_norm": 1.8752541542053223, "learning_rate": 3.905084243292653e-05, "loss": 5.7892, "step": 4432 }, { "epoch": 0.22, "grad_norm": 1.5603359937667847, "learning_rate": 3.9040960521764914e-05, "loss": 5.8323, "step": 4436 }, { "epoch": 0.22, "grad_norm": 1.6264493465423584, "learning_rate": 3.9031078610603297e-05, "loss": 5.7913, "step": 4440 }, { "epoch": 0.22, "grad_norm": 1.8390896320343018, "learning_rate": 3.902119669944168e-05, "loss": 5.7695, "step": 4444 }, { "epoch": 0.22, "grad_norm": 1.5323280096054077, "learning_rate": 3.9011314788280054e-05, "loss": 5.7638, "step": 4448 }, { "epoch": 0.22, "grad_norm": 1.8048474788665771, "learning_rate": 3.9001432877118436e-05, "loss": 5.7824, "step": 4452 }, { "epoch": 0.22, "grad_norm": 1.657486915588379, "learning_rate": 3.899155096595682e-05, "loss": 5.7037, "step": 4456 }, { "epoch": 0.22, "grad_norm": 1.7981828451156616, "learning_rate": 3.89816690547952e-05, "loss": 5.6465, "step": 4460 }, { "epoch": 0.22, "grad_norm": 1.5225144624710083, "learning_rate": 3.897178714363358e-05, "loss": 5.8237, "step": 4464 }, { "epoch": 0.22, "grad_norm": 1.4845541715621948, "learning_rate": 3.8961905232471964e-05, "loss": 5.7549, "step": 4468 }, { "epoch": 0.22, "grad_norm": 1.5387517213821411, "learning_rate": 3.8952023321310346e-05, "loss": 5.7666, "step": 4472 }, { "epoch": 0.22, "grad_norm": 1.7411534786224365, "learning_rate": 3.894214141014872e-05, "loss": 5.7204, "step": 4476 }, { "epoch": 0.22, "grad_norm": 1.622908592224121, "learning_rate": 3.8932259498987104e-05, "loss": 5.6133, "step": 4480 }, { "epoch": 0.22, "grad_norm": 1.8925143480300903, "learning_rate": 3.8922377587825486e-05, "loss": 5.7133, "step": 4484 }, { "epoch": 0.22, "grad_norm": 1.7361855506896973, "learning_rate": 3.891249567666387e-05, "loss": 5.7737, "step": 4488 }, { "epoch": 0.22, "grad_norm": 1.588912010192871, "learning_rate": 3.890261376550225e-05, "loss": 5.7253, "step": 4492 }, { "epoch": 0.22, "grad_norm": 1.7690430879592896, "learning_rate": 3.889273185434063e-05, "loss": 5.7693, "step": 4496 }, { "epoch": 0.22, "grad_norm": 2.1007232666015625, "learning_rate": 3.8882849943179014e-05, "loss": 5.733, "step": 4500 }, { "epoch": 0.22, "grad_norm": 2.1150882244110107, "learning_rate": 3.8872968032017396e-05, "loss": 5.6447, "step": 4504 }, { "epoch": 0.22, "grad_norm": 1.85843825340271, "learning_rate": 3.886308612085577e-05, "loss": 5.7652, "step": 4508 }, { "epoch": 0.22, "grad_norm": 1.6900382041931152, "learning_rate": 3.885320420969415e-05, "loss": 5.7937, "step": 4512 }, { "epoch": 0.22, "grad_norm": 1.7769362926483154, "learning_rate": 3.8843322298532535e-05, "loss": 5.7276, "step": 4516 }, { "epoch": 0.22, "grad_norm": 2.092283010482788, "learning_rate": 3.883344038737092e-05, "loss": 5.6712, "step": 4520 }, { "epoch": 0.22, "grad_norm": 1.7213283777236938, "learning_rate": 3.88235584762093e-05, "loss": 5.7979, "step": 4524 }, { "epoch": 0.22, "grad_norm": 1.5792101621627808, "learning_rate": 3.881367656504768e-05, "loss": 5.7273, "step": 4528 }, { "epoch": 0.22, "grad_norm": 1.8439412117004395, "learning_rate": 3.8803794653886064e-05, "loss": 5.8053, "step": 4532 }, { "epoch": 0.22, "grad_norm": 2.0028135776519775, "learning_rate": 3.8793912742724446e-05, "loss": 5.7714, "step": 4536 }, { "epoch": 0.22, "grad_norm": 1.7748719453811646, "learning_rate": 3.878403083156283e-05, "loss": 5.8676, "step": 4540 }, { "epoch": 0.22, "grad_norm": 1.7069579362869263, "learning_rate": 3.877414892040121e-05, "loss": 5.5816, "step": 4544 }, { "epoch": 0.22, "grad_norm": 1.695499062538147, "learning_rate": 3.876426700923959e-05, "loss": 5.8817, "step": 4548 }, { "epoch": 0.22, "grad_norm": 1.573487639427185, "learning_rate": 3.8754385098077974e-05, "loss": 5.792, "step": 4552 }, { "epoch": 0.23, "grad_norm": 1.576565146446228, "learning_rate": 3.8744503186916356e-05, "loss": 5.6382, "step": 4556 }, { "epoch": 0.23, "grad_norm": 2.081883668899536, "learning_rate": 3.873462127575473e-05, "loss": 5.7344, "step": 4560 }, { "epoch": 0.23, "grad_norm": 1.9463318586349487, "learning_rate": 3.872473936459311e-05, "loss": 5.6469, "step": 4564 }, { "epoch": 0.23, "grad_norm": 1.8385138511657715, "learning_rate": 3.8714857453431495e-05, "loss": 5.8192, "step": 4568 }, { "epoch": 0.23, "grad_norm": 1.7394839525222778, "learning_rate": 3.870497554226988e-05, "loss": 5.5372, "step": 4572 }, { "epoch": 0.23, "grad_norm": 1.578692078590393, "learning_rate": 3.869509363110826e-05, "loss": 5.7304, "step": 4576 }, { "epoch": 0.23, "grad_norm": 1.551023006439209, "learning_rate": 3.868521171994664e-05, "loss": 5.8348, "step": 4580 }, { "epoch": 0.23, "grad_norm": 1.7546929121017456, "learning_rate": 3.8675329808785024e-05, "loss": 5.7532, "step": 4584 }, { "epoch": 0.23, "grad_norm": 1.7034144401550293, "learning_rate": 3.8665447897623406e-05, "loss": 5.6995, "step": 4588 }, { "epoch": 0.23, "grad_norm": 1.4249943494796753, "learning_rate": 3.865556598646178e-05, "loss": 5.7219, "step": 4592 }, { "epoch": 0.23, "grad_norm": 1.5602226257324219, "learning_rate": 3.864568407530016e-05, "loss": 5.6544, "step": 4596 }, { "epoch": 0.23, "grad_norm": 1.4957911968231201, "learning_rate": 3.8635802164138545e-05, "loss": 5.7934, "step": 4600 }, { "epoch": 0.23, "grad_norm": 1.6986734867095947, "learning_rate": 3.862592025297693e-05, "loss": 5.7564, "step": 4604 }, { "epoch": 0.23, "grad_norm": 1.6847269535064697, "learning_rate": 3.861603834181531e-05, "loss": 5.583, "step": 4608 }, { "epoch": 0.23, "grad_norm": 1.73738431930542, "learning_rate": 3.860615643065369e-05, "loss": 5.6771, "step": 4612 }, { "epoch": 0.23, "grad_norm": 1.5951757431030273, "learning_rate": 3.859627451949207e-05, "loss": 5.6885, "step": 4616 }, { "epoch": 0.23, "grad_norm": 1.6416071653366089, "learning_rate": 3.858639260833045e-05, "loss": 5.7239, "step": 4620 }, { "epoch": 0.23, "grad_norm": 2.0191540718078613, "learning_rate": 3.857651069716883e-05, "loss": 5.6335, "step": 4624 }, { "epoch": 0.23, "grad_norm": 1.677107810974121, "learning_rate": 3.856662878600721e-05, "loss": 5.6885, "step": 4628 }, { "epoch": 0.23, "grad_norm": 1.619813323020935, "learning_rate": 3.8556746874845595e-05, "loss": 5.6838, "step": 4632 }, { "epoch": 0.23, "grad_norm": 1.8103737831115723, "learning_rate": 3.854686496368398e-05, "loss": 5.6114, "step": 4636 }, { "epoch": 0.23, "grad_norm": 1.6023643016815186, "learning_rate": 3.853698305252236e-05, "loss": 5.718, "step": 4640 }, { "epoch": 0.23, "grad_norm": 1.3481965065002441, "learning_rate": 3.852710114136074e-05, "loss": 5.6867, "step": 4644 }, { "epoch": 0.23, "grad_norm": 1.6693611145019531, "learning_rate": 3.851721923019912e-05, "loss": 5.8371, "step": 4648 }, { "epoch": 0.23, "grad_norm": 1.7420861721038818, "learning_rate": 3.8507337319037505e-05, "loss": 5.7511, "step": 4652 }, { "epoch": 0.23, "grad_norm": 1.6563256978988647, "learning_rate": 3.849745540787589e-05, "loss": 5.64, "step": 4656 }, { "epoch": 0.23, "grad_norm": 1.7449144124984741, "learning_rate": 3.848757349671427e-05, "loss": 5.7528, "step": 4660 }, { "epoch": 0.23, "grad_norm": 1.9364055395126343, "learning_rate": 3.847769158555265e-05, "loss": 5.7674, "step": 4664 }, { "epoch": 0.23, "grad_norm": 1.8317818641662598, "learning_rate": 3.846780967439103e-05, "loss": 5.7468, "step": 4668 }, { "epoch": 0.23, "grad_norm": 1.949170470237732, "learning_rate": 3.8457927763229415e-05, "loss": 5.7701, "step": 4672 }, { "epoch": 0.23, "grad_norm": 1.7268710136413574, "learning_rate": 3.844804585206779e-05, "loss": 5.7698, "step": 4676 }, { "epoch": 0.23, "grad_norm": 1.8129808902740479, "learning_rate": 3.843816394090617e-05, "loss": 5.7412, "step": 4680 }, { "epoch": 0.23, "grad_norm": 1.815779209136963, "learning_rate": 3.8428282029744555e-05, "loss": 5.7174, "step": 4684 }, { "epoch": 0.23, "grad_norm": 1.8246920108795166, "learning_rate": 3.841840011858294e-05, "loss": 5.7602, "step": 4688 }, { "epoch": 0.23, "grad_norm": 1.7265750169754028, "learning_rate": 3.840851820742132e-05, "loss": 5.7137, "step": 4692 }, { "epoch": 0.23, "grad_norm": 2.4503543376922607, "learning_rate": 3.83986362962597e-05, "loss": 5.78, "step": 4696 }, { "epoch": 0.23, "grad_norm": 2.0876848697662354, "learning_rate": 3.838875438509808e-05, "loss": 5.67, "step": 4700 }, { "epoch": 0.23, "grad_norm": 1.5135231018066406, "learning_rate": 3.837887247393646e-05, "loss": 5.7372, "step": 4704 }, { "epoch": 0.23, "grad_norm": 1.9444183111190796, "learning_rate": 3.836899056277484e-05, "loss": 5.6755, "step": 4708 }, { "epoch": 0.23, "grad_norm": 1.8341116905212402, "learning_rate": 3.835910865161322e-05, "loss": 5.7714, "step": 4712 }, { "epoch": 0.23, "grad_norm": 1.6584597826004028, "learning_rate": 3.8349226740451604e-05, "loss": 5.6811, "step": 4716 }, { "epoch": 0.23, "grad_norm": 1.7305800914764404, "learning_rate": 3.8339344829289987e-05, "loss": 5.6905, "step": 4720 }, { "epoch": 0.23, "grad_norm": 1.826827883720398, "learning_rate": 3.832946291812837e-05, "loss": 5.6126, "step": 4724 }, { "epoch": 0.23, "grad_norm": 1.5062451362609863, "learning_rate": 3.831958100696675e-05, "loss": 5.8465, "step": 4728 }, { "epoch": 0.23, "grad_norm": 1.7500250339508057, "learning_rate": 3.830969909580513e-05, "loss": 5.6855, "step": 4732 }, { "epoch": 0.23, "grad_norm": 2.0079612731933594, "learning_rate": 3.829981718464351e-05, "loss": 5.729, "step": 4736 }, { "epoch": 0.23, "grad_norm": 1.8388514518737793, "learning_rate": 3.828993527348189e-05, "loss": 5.7142, "step": 4740 }, { "epoch": 0.23, "grad_norm": 1.8375358581542969, "learning_rate": 3.828005336232027e-05, "loss": 5.6261, "step": 4744 }, { "epoch": 0.23, "grad_norm": 1.785122275352478, "learning_rate": 3.8270171451158654e-05, "loss": 5.7206, "step": 4748 }, { "epoch": 0.23, "grad_norm": 1.7444349527359009, "learning_rate": 3.8260289539997036e-05, "loss": 5.711, "step": 4752 }, { "epoch": 0.23, "grad_norm": 2.0970168113708496, "learning_rate": 3.825040762883542e-05, "loss": 5.7049, "step": 4756 }, { "epoch": 0.24, "grad_norm": 1.8633285760879517, "learning_rate": 3.82405257176738e-05, "loss": 5.7639, "step": 4760 }, { "epoch": 0.24, "grad_norm": 1.7935363054275513, "learning_rate": 3.8230643806512176e-05, "loss": 5.7429, "step": 4764 }, { "epoch": 0.24, "grad_norm": 2.099822521209717, "learning_rate": 3.8220761895350564e-05, "loss": 5.6922, "step": 4768 }, { "epoch": 0.24, "grad_norm": 1.7735134363174438, "learning_rate": 3.8210879984188947e-05, "loss": 5.777, "step": 4772 }, { "epoch": 0.24, "grad_norm": 1.62099027633667, "learning_rate": 3.820099807302733e-05, "loss": 5.7513, "step": 4776 }, { "epoch": 0.24, "grad_norm": 2.413672924041748, "learning_rate": 3.819111616186571e-05, "loss": 5.7412, "step": 4780 }, { "epoch": 0.24, "grad_norm": 2.0373451709747314, "learning_rate": 3.818123425070409e-05, "loss": 5.8356, "step": 4784 }, { "epoch": 0.24, "grad_norm": 1.7266334295272827, "learning_rate": 3.817135233954247e-05, "loss": 5.7796, "step": 4788 }, { "epoch": 0.24, "grad_norm": 1.5502921342849731, "learning_rate": 3.816147042838085e-05, "loss": 5.6788, "step": 4792 }, { "epoch": 0.24, "grad_norm": 1.559435248374939, "learning_rate": 3.815158851721923e-05, "loss": 5.6855, "step": 4796 }, { "epoch": 0.24, "grad_norm": 1.8041141033172607, "learning_rate": 3.8141706606057614e-05, "loss": 5.712, "step": 4800 }, { "epoch": 0.24, "grad_norm": 1.526065468788147, "learning_rate": 3.8131824694895996e-05, "loss": 5.8179, "step": 4804 }, { "epoch": 0.24, "grad_norm": 1.73811674118042, "learning_rate": 3.812194278373438e-05, "loss": 5.6526, "step": 4808 }, { "epoch": 0.24, "grad_norm": 1.927445650100708, "learning_rate": 3.811206087257276e-05, "loss": 5.7575, "step": 4812 }, { "epoch": 0.24, "grad_norm": 1.8947980403900146, "learning_rate": 3.810217896141114e-05, "loss": 5.6713, "step": 4816 }, { "epoch": 0.24, "grad_norm": 1.586437702178955, "learning_rate": 3.809229705024952e-05, "loss": 5.7355, "step": 4820 }, { "epoch": 0.24, "grad_norm": 1.980283498764038, "learning_rate": 3.80824151390879e-05, "loss": 5.6553, "step": 4824 }, { "epoch": 0.24, "grad_norm": 1.615174651145935, "learning_rate": 3.807253322792628e-05, "loss": 5.7573, "step": 4828 }, { "epoch": 0.24, "grad_norm": 2.1818222999572754, "learning_rate": 3.8062651316764664e-05, "loss": 5.6662, "step": 4832 }, { "epoch": 0.24, "grad_norm": 2.164574146270752, "learning_rate": 3.8052769405603046e-05, "loss": 5.6212, "step": 4836 }, { "epoch": 0.24, "grad_norm": 2.612293243408203, "learning_rate": 3.804288749444143e-05, "loss": 5.6983, "step": 4840 }, { "epoch": 0.24, "grad_norm": 1.661539912223816, "learning_rate": 3.803300558327981e-05, "loss": 5.6968, "step": 4844 }, { "epoch": 0.24, "grad_norm": 1.7480438947677612, "learning_rate": 3.8023123672118185e-05, "loss": 5.6364, "step": 4848 }, { "epoch": 0.24, "grad_norm": 1.5679854154586792, "learning_rate": 3.801324176095657e-05, "loss": 5.7243, "step": 4852 }, { "epoch": 0.24, "grad_norm": 1.688027024269104, "learning_rate": 3.800335984979495e-05, "loss": 5.7444, "step": 4856 }, { "epoch": 0.24, "grad_norm": 2.0255541801452637, "learning_rate": 3.799347793863333e-05, "loss": 5.7882, "step": 4860 }, { "epoch": 0.24, "grad_norm": 1.664778232574463, "learning_rate": 3.7983596027471714e-05, "loss": 5.8742, "step": 4864 }, { "epoch": 0.24, "grad_norm": 1.7059557437896729, "learning_rate": 3.7973714116310096e-05, "loss": 5.6959, "step": 4868 }, { "epoch": 0.24, "grad_norm": 1.6631444692611694, "learning_rate": 3.796383220514848e-05, "loss": 5.6701, "step": 4872 }, { "epoch": 0.24, "grad_norm": 2.2237651348114014, "learning_rate": 3.795395029398685e-05, "loss": 5.7526, "step": 4876 }, { "epoch": 0.24, "grad_norm": 1.5631194114685059, "learning_rate": 3.7944068382825235e-05, "loss": 5.6439, "step": 4880 }, { "epoch": 0.24, "grad_norm": 1.7338947057724, "learning_rate": 3.7934186471663624e-05, "loss": 5.699, "step": 4884 }, { "epoch": 0.24, "grad_norm": 1.682901382446289, "learning_rate": 3.7924304560502006e-05, "loss": 5.7413, "step": 4888 }, { "epoch": 0.24, "grad_norm": 1.9311175346374512, "learning_rate": 3.791442264934039e-05, "loss": 5.8328, "step": 4892 }, { "epoch": 0.24, "grad_norm": 1.4761751890182495, "learning_rate": 3.790454073817877e-05, "loss": 5.6389, "step": 4896 }, { "epoch": 0.24, "grad_norm": 1.6680078506469727, "learning_rate": 3.7894658827017145e-05, "loss": 5.81, "step": 4900 }, { "epoch": 0.24, "grad_norm": 2.0462136268615723, "learning_rate": 3.788477691585553e-05, "loss": 5.7153, "step": 4904 }, { "epoch": 0.24, "grad_norm": 1.5725479125976562, "learning_rate": 3.787489500469391e-05, "loss": 5.5877, "step": 4908 }, { "epoch": 0.24, "grad_norm": 1.8569021224975586, "learning_rate": 3.786501309353229e-05, "loss": 5.6218, "step": 4912 }, { "epoch": 0.24, "grad_norm": 1.4288854598999023, "learning_rate": 3.7855131182370674e-05, "loss": 5.6984, "step": 4916 }, { "epoch": 0.24, "grad_norm": 1.483189582824707, "learning_rate": 3.7845249271209056e-05, "loss": 5.7004, "step": 4920 }, { "epoch": 0.24, "grad_norm": 1.9790661334991455, "learning_rate": 3.783536736004744e-05, "loss": 5.6083, "step": 4924 }, { "epoch": 0.24, "grad_norm": 1.6783969402313232, "learning_rate": 3.782548544888582e-05, "loss": 5.7083, "step": 4928 }, { "epoch": 0.24, "grad_norm": 1.763135552406311, "learning_rate": 3.7815603537724195e-05, "loss": 5.6475, "step": 4932 }, { "epoch": 0.24, "grad_norm": 1.8823894262313843, "learning_rate": 3.780572162656258e-05, "loss": 5.741, "step": 4936 }, { "epoch": 0.24, "grad_norm": 1.9821616411209106, "learning_rate": 3.779583971540096e-05, "loss": 5.7479, "step": 4940 }, { "epoch": 0.24, "grad_norm": 1.6714750528335571, "learning_rate": 3.778595780423934e-05, "loss": 5.7442, "step": 4944 }, { "epoch": 0.24, "grad_norm": 1.7158372402191162, "learning_rate": 3.777607589307772e-05, "loss": 5.7097, "step": 4948 }, { "epoch": 0.24, "grad_norm": 1.8387162685394287, "learning_rate": 3.7766193981916105e-05, "loss": 5.534, "step": 4952 }, { "epoch": 0.24, "grad_norm": 1.7854334115982056, "learning_rate": 3.775631207075449e-05, "loss": 5.7142, "step": 4956 }, { "epoch": 0.25, "grad_norm": 1.736302137374878, "learning_rate": 3.774643015959286e-05, "loss": 5.7211, "step": 4960 }, { "epoch": 0.25, "grad_norm": 2.1947836875915527, "learning_rate": 3.7736548248431245e-05, "loss": 5.6862, "step": 4964 }, { "epoch": 0.25, "grad_norm": 1.9635826349258423, "learning_rate": 3.772666633726963e-05, "loss": 5.7067, "step": 4968 }, { "epoch": 0.25, "grad_norm": 1.4694422483444214, "learning_rate": 3.771678442610801e-05, "loss": 5.6205, "step": 4972 }, { "epoch": 0.25, "grad_norm": 2.0147790908813477, "learning_rate": 3.770690251494639e-05, "loss": 5.7691, "step": 4976 }, { "epoch": 0.25, "grad_norm": 1.6146892309188843, "learning_rate": 3.769702060378477e-05, "loss": 5.7103, "step": 4980 }, { "epoch": 0.25, "grad_norm": 2.098802089691162, "learning_rate": 3.7687138692623155e-05, "loss": 5.759, "step": 4984 }, { "epoch": 0.25, "grad_norm": 1.7809268236160278, "learning_rate": 3.767725678146154e-05, "loss": 5.7146, "step": 4988 }, { "epoch": 0.25, "grad_norm": 1.8064768314361572, "learning_rate": 3.766737487029991e-05, "loss": 5.6203, "step": 4992 }, { "epoch": 0.25, "grad_norm": 1.6928107738494873, "learning_rate": 3.76574929591383e-05, "loss": 5.8565, "step": 4996 }, { "epoch": 0.25, "grad_norm": 1.65586256980896, "learning_rate": 3.764761104797668e-05, "loss": 5.7671, "step": 5000 }, { "epoch": 0.25, "grad_norm": 2.0193347930908203, "learning_rate": 3.7637729136815065e-05, "loss": 5.8168, "step": 5004 }, { "epoch": 0.25, "grad_norm": 1.571082592010498, "learning_rate": 3.762784722565345e-05, "loss": 5.6741, "step": 5008 }, { "epoch": 0.25, "grad_norm": 1.5919872522354126, "learning_rate": 3.761796531449183e-05, "loss": 5.7805, "step": 5012 }, { "epoch": 0.25, "grad_norm": 1.8394559621810913, "learning_rate": 3.7608083403330205e-05, "loss": 5.7168, "step": 5016 }, { "epoch": 0.25, "grad_norm": 2.177563190460205, "learning_rate": 3.759820149216859e-05, "loss": 5.8527, "step": 5020 }, { "epoch": 0.25, "grad_norm": 1.9234048128128052, "learning_rate": 3.758831958100697e-05, "loss": 5.6813, "step": 5024 }, { "epoch": 0.25, "grad_norm": 1.7297863960266113, "learning_rate": 3.757843766984535e-05, "loss": 5.5673, "step": 5028 }, { "epoch": 0.25, "grad_norm": 1.6289762258529663, "learning_rate": 3.756855575868373e-05, "loss": 5.6836, "step": 5032 }, { "epoch": 0.25, "grad_norm": 1.9951887130737305, "learning_rate": 3.7558673847522115e-05, "loss": 5.7777, "step": 5036 }, { "epoch": 0.25, "grad_norm": 1.6676596403121948, "learning_rate": 3.75487919363605e-05, "loss": 5.731, "step": 5040 }, { "epoch": 0.25, "grad_norm": 1.914063811302185, "learning_rate": 3.753891002519887e-05, "loss": 5.747, "step": 5044 }, { "epoch": 0.25, "grad_norm": 1.8118635416030884, "learning_rate": 3.7529028114037254e-05, "loss": 5.5851, "step": 5048 }, { "epoch": 0.25, "grad_norm": 1.7621952295303345, "learning_rate": 3.7519146202875637e-05, "loss": 5.8087, "step": 5052 }, { "epoch": 0.25, "grad_norm": 2.0561869144439697, "learning_rate": 3.750926429171402e-05, "loss": 5.7153, "step": 5056 }, { "epoch": 0.25, "grad_norm": 1.6286967992782593, "learning_rate": 3.74993823805524e-05, "loss": 5.7822, "step": 5060 }, { "epoch": 0.25, "grad_norm": 2.714860439300537, "learning_rate": 3.748950046939078e-05, "loss": 5.7273, "step": 5064 }, { "epoch": 0.25, "grad_norm": 1.5165153741836548, "learning_rate": 3.7479618558229165e-05, "loss": 5.6865, "step": 5068 }, { "epoch": 0.25, "grad_norm": 1.6314889192581177, "learning_rate": 3.746973664706755e-05, "loss": 5.91, "step": 5072 }, { "epoch": 0.25, "grad_norm": 2.141998529434204, "learning_rate": 3.745985473590592e-05, "loss": 5.796, "step": 5076 }, { "epoch": 0.25, "grad_norm": 1.727159857749939, "learning_rate": 3.7449972824744304e-05, "loss": 5.7056, "step": 5080 }, { "epoch": 0.25, "grad_norm": 1.6345195770263672, "learning_rate": 3.7440090913582686e-05, "loss": 5.6393, "step": 5084 }, { "epoch": 0.25, "grad_norm": 1.8869349956512451, "learning_rate": 3.743020900242107e-05, "loss": 5.6482, "step": 5088 }, { "epoch": 0.25, "grad_norm": 2.2009177207946777, "learning_rate": 3.742032709125945e-05, "loss": 5.762, "step": 5092 }, { "epoch": 0.25, "grad_norm": 1.64715576171875, "learning_rate": 3.741044518009783e-05, "loss": 5.6038, "step": 5096 }, { "epoch": 0.25, "grad_norm": 1.730360984802246, "learning_rate": 3.7400563268936215e-05, "loss": 5.6776, "step": 5100 }, { "epoch": 0.25, "grad_norm": 1.7169520854949951, "learning_rate": 3.739068135777459e-05, "loss": 5.7149, "step": 5104 }, { "epoch": 0.25, "grad_norm": 1.6068758964538574, "learning_rate": 3.738079944661297e-05, "loss": 5.7772, "step": 5108 }, { "epoch": 0.25, "grad_norm": 2.215623378753662, "learning_rate": 3.737091753545136e-05, "loss": 5.697, "step": 5112 }, { "epoch": 0.25, "grad_norm": 2.080639123916626, "learning_rate": 3.736103562428974e-05, "loss": 5.6851, "step": 5116 }, { "epoch": 0.25, "grad_norm": 2.041546583175659, "learning_rate": 3.7351153713128125e-05, "loss": 5.7803, "step": 5120 }, { "epoch": 0.25, "grad_norm": 1.9273161888122559, "learning_rate": 3.734127180196651e-05, "loss": 5.6727, "step": 5124 }, { "epoch": 0.25, "grad_norm": 1.5295966863632202, "learning_rate": 3.733138989080488e-05, "loss": 5.7002, "step": 5128 }, { "epoch": 0.25, "grad_norm": 1.5543110370635986, "learning_rate": 3.7321507979643264e-05, "loss": 5.7539, "step": 5132 }, { "epoch": 0.25, "grad_norm": 1.867628574371338, "learning_rate": 3.7311626068481646e-05, "loss": 5.6902, "step": 5136 }, { "epoch": 0.25, "grad_norm": 1.7172714471817017, "learning_rate": 3.730174415732003e-05, "loss": 5.7929, "step": 5140 }, { "epoch": 0.25, "grad_norm": 1.8065274953842163, "learning_rate": 3.729186224615841e-05, "loss": 5.7148, "step": 5144 }, { "epoch": 0.25, "grad_norm": 1.6474413871765137, "learning_rate": 3.728198033499679e-05, "loss": 5.6105, "step": 5148 }, { "epoch": 0.25, "grad_norm": 2.2249326705932617, "learning_rate": 3.7272098423835175e-05, "loss": 5.7129, "step": 5152 }, { "epoch": 0.25, "grad_norm": 1.8622581958770752, "learning_rate": 3.7262216512673557e-05, "loss": 5.6788, "step": 5156 }, { "epoch": 0.25, "grad_norm": 1.7544127702713013, "learning_rate": 3.725233460151193e-05, "loss": 5.6447, "step": 5160 }, { "epoch": 0.26, "grad_norm": 1.9091047048568726, "learning_rate": 3.7242452690350314e-05, "loss": 5.6228, "step": 5164 }, { "epoch": 0.26, "grad_norm": 1.5641545057296753, "learning_rate": 3.7232570779188696e-05, "loss": 5.7202, "step": 5168 }, { "epoch": 0.26, "grad_norm": 1.8679791688919067, "learning_rate": 3.722268886802708e-05, "loss": 5.7362, "step": 5172 }, { "epoch": 0.26, "grad_norm": 1.9691587686538696, "learning_rate": 3.721280695686546e-05, "loss": 5.6811, "step": 5176 }, { "epoch": 0.26, "grad_norm": 2.0612921714782715, "learning_rate": 3.720292504570384e-05, "loss": 5.7, "step": 5180 }, { "epoch": 0.26, "grad_norm": 1.4250149726867676, "learning_rate": 3.7193043134542224e-05, "loss": 5.7469, "step": 5184 }, { "epoch": 0.26, "grad_norm": 2.013101577758789, "learning_rate": 3.71831612233806e-05, "loss": 5.6312, "step": 5188 }, { "epoch": 0.26, "grad_norm": 1.8997466564178467, "learning_rate": 3.717327931221898e-05, "loss": 5.6744, "step": 5192 }, { "epoch": 0.26, "grad_norm": 1.4980552196502686, "learning_rate": 3.7163397401057364e-05, "loss": 5.5909, "step": 5196 }, { "epoch": 0.26, "grad_norm": 1.8174835443496704, "learning_rate": 3.7153515489895746e-05, "loss": 5.7095, "step": 5200 }, { "epoch": 0.26, "grad_norm": 1.4909565448760986, "learning_rate": 3.714363357873413e-05, "loss": 5.7022, "step": 5204 }, { "epoch": 0.26, "grad_norm": 1.5424959659576416, "learning_rate": 3.713375166757251e-05, "loss": 5.6704, "step": 5208 }, { "epoch": 0.26, "grad_norm": 1.5841034650802612, "learning_rate": 3.712386975641089e-05, "loss": 5.6546, "step": 5212 }, { "epoch": 0.26, "grad_norm": 1.7105164527893066, "learning_rate": 3.7113987845249274e-05, "loss": 5.6281, "step": 5216 }, { "epoch": 0.26, "grad_norm": 2.422480821609497, "learning_rate": 3.710410593408765e-05, "loss": 5.7145, "step": 5220 }, { "epoch": 0.26, "grad_norm": 1.8925801515579224, "learning_rate": 3.709422402292603e-05, "loss": 5.6512, "step": 5224 }, { "epoch": 0.26, "grad_norm": 1.7350256443023682, "learning_rate": 3.708434211176442e-05, "loss": 5.7257, "step": 5228 }, { "epoch": 0.26, "grad_norm": 1.9060580730438232, "learning_rate": 3.70744602006028e-05, "loss": 5.7056, "step": 5232 }, { "epoch": 0.26, "grad_norm": 1.6758359670639038, "learning_rate": 3.7064578289441184e-05, "loss": 5.795, "step": 5236 }, { "epoch": 0.26, "grad_norm": 1.7977421283721924, "learning_rate": 3.7054696378279566e-05, "loss": 5.6278, "step": 5240 }, { "epoch": 0.26, "grad_norm": 1.7420555353164673, "learning_rate": 3.704481446711794e-05, "loss": 5.619, "step": 5244 }, { "epoch": 0.26, "grad_norm": 1.7721624374389648, "learning_rate": 3.7034932555956324e-05, "loss": 5.5908, "step": 5248 }, { "epoch": 0.26, "grad_norm": 1.9039374589920044, "learning_rate": 3.7025050644794706e-05, "loss": 5.7308, "step": 5252 }, { "epoch": 0.26, "grad_norm": 1.9757115840911865, "learning_rate": 3.701516873363309e-05, "loss": 5.7986, "step": 5256 }, { "epoch": 0.26, "grad_norm": 1.6613463163375854, "learning_rate": 3.700528682247147e-05, "loss": 5.6056, "step": 5260 }, { "epoch": 0.26, "grad_norm": 1.9101234674453735, "learning_rate": 3.699540491130985e-05, "loss": 5.7109, "step": 5264 }, { "epoch": 0.26, "grad_norm": 2.04067325592041, "learning_rate": 3.6985523000148234e-05, "loss": 5.7513, "step": 5268 }, { "epoch": 0.26, "grad_norm": 1.8164728879928589, "learning_rate": 3.697564108898661e-05, "loss": 5.5739, "step": 5272 }, { "epoch": 0.26, "grad_norm": 1.9348748922348022, "learning_rate": 3.696575917782499e-05, "loss": 5.6177, "step": 5276 }, { "epoch": 0.26, "grad_norm": 2.20129132270813, "learning_rate": 3.695587726666337e-05, "loss": 5.7036, "step": 5280 }, { "epoch": 0.26, "grad_norm": 2.0877761840820312, "learning_rate": 3.6945995355501755e-05, "loss": 5.6192, "step": 5284 }, { "epoch": 0.26, "grad_norm": 1.6023980379104614, "learning_rate": 3.693611344434014e-05, "loss": 5.6557, "step": 5288 }, { "epoch": 0.26, "grad_norm": 1.4598197937011719, "learning_rate": 3.692623153317852e-05, "loss": 5.6821, "step": 5292 }, { "epoch": 0.26, "grad_norm": 1.9218881130218506, "learning_rate": 3.69163496220169e-05, "loss": 5.7097, "step": 5296 }, { "epoch": 0.26, "grad_norm": 1.91704261302948, "learning_rate": 3.6906467710855284e-05, "loss": 5.7038, "step": 5300 }, { "epoch": 0.26, "grad_norm": 1.594611406326294, "learning_rate": 3.689658579969366e-05, "loss": 5.5904, "step": 5304 }, { "epoch": 0.26, "grad_norm": 1.570440649986267, "learning_rate": 3.688670388853204e-05, "loss": 5.5548, "step": 5308 }, { "epoch": 0.26, "grad_norm": 1.8532248735427856, "learning_rate": 3.687682197737042e-05, "loss": 5.691, "step": 5312 }, { "epoch": 0.26, "grad_norm": 1.8759549856185913, "learning_rate": 3.6866940066208805e-05, "loss": 5.783, "step": 5316 }, { "epoch": 0.26, "grad_norm": 2.0743143558502197, "learning_rate": 3.685705815504719e-05, "loss": 5.8346, "step": 5320 }, { "epoch": 0.26, "grad_norm": 1.7164572477340698, "learning_rate": 3.684717624388557e-05, "loss": 5.6427, "step": 5324 }, { "epoch": 0.26, "grad_norm": 1.8269315958023071, "learning_rate": 3.683729433272395e-05, "loss": 5.7506, "step": 5328 }, { "epoch": 0.26, "grad_norm": 1.8456995487213135, "learning_rate": 3.6827412421562327e-05, "loss": 5.6247, "step": 5332 }, { "epoch": 0.26, "grad_norm": 1.7062360048294067, "learning_rate": 3.681753051040071e-05, "loss": 5.6777, "step": 5336 }, { "epoch": 0.26, "grad_norm": 1.861578106880188, "learning_rate": 3.68076485992391e-05, "loss": 5.7011, "step": 5340 }, { "epoch": 0.26, "grad_norm": 1.5446622371673584, "learning_rate": 3.679776668807748e-05, "loss": 5.6793, "step": 5344 }, { "epoch": 0.26, "grad_norm": 1.8392596244812012, "learning_rate": 3.678788477691586e-05, "loss": 5.7193, "step": 5348 }, { "epoch": 0.26, "grad_norm": 2.0166003704071045, "learning_rate": 3.6778002865754244e-05, "loss": 5.6509, "step": 5352 }, { "epoch": 0.26, "grad_norm": 1.791098952293396, "learning_rate": 3.676812095459262e-05, "loss": 5.7176, "step": 5356 }, { "epoch": 0.26, "grad_norm": 1.9136872291564941, "learning_rate": 3.6758239043431e-05, "loss": 5.8457, "step": 5360 }, { "epoch": 0.27, "grad_norm": 2.2667651176452637, "learning_rate": 3.674835713226938e-05, "loss": 5.6658, "step": 5364 }, { "epoch": 0.27, "grad_norm": 1.8071796894073486, "learning_rate": 3.6738475221107765e-05, "loss": 5.7392, "step": 5368 }, { "epoch": 0.27, "grad_norm": 1.8109067678451538, "learning_rate": 3.672859330994615e-05, "loss": 5.6318, "step": 5372 }, { "epoch": 0.27, "grad_norm": 1.8517425060272217, "learning_rate": 3.671871139878453e-05, "loss": 5.8122, "step": 5376 }, { "epoch": 0.27, "grad_norm": 1.8264243602752686, "learning_rate": 3.670882948762291e-05, "loss": 5.5755, "step": 5380 }, { "epoch": 0.27, "grad_norm": 1.98093581199646, "learning_rate": 3.669894757646129e-05, "loss": 5.7236, "step": 5384 }, { "epoch": 0.27, "grad_norm": 1.6828770637512207, "learning_rate": 3.668906566529967e-05, "loss": 5.7345, "step": 5388 }, { "epoch": 0.27, "grad_norm": 1.671663761138916, "learning_rate": 3.667918375413805e-05, "loss": 5.5266, "step": 5392 }, { "epoch": 0.27, "grad_norm": 2.236020565032959, "learning_rate": 3.666930184297643e-05, "loss": 5.6844, "step": 5396 }, { "epoch": 0.27, "grad_norm": 2.147383213043213, "learning_rate": 3.6659419931814815e-05, "loss": 5.6782, "step": 5400 }, { "epoch": 0.27, "grad_norm": 2.0428617000579834, "learning_rate": 3.66495380206532e-05, "loss": 5.6924, "step": 5404 }, { "epoch": 0.27, "grad_norm": 1.5944437980651855, "learning_rate": 3.663965610949158e-05, "loss": 5.5994, "step": 5408 }, { "epoch": 0.27, "grad_norm": 1.8434429168701172, "learning_rate": 3.662977419832996e-05, "loss": 5.6628, "step": 5412 }, { "epoch": 0.27, "grad_norm": 1.6614712476730347, "learning_rate": 3.6619892287168336e-05, "loss": 5.7582, "step": 5416 }, { "epoch": 0.27, "grad_norm": 1.9906529188156128, "learning_rate": 3.661001037600672e-05, "loss": 5.8577, "step": 5420 }, { "epoch": 0.27, "grad_norm": 1.6746339797973633, "learning_rate": 3.66001284648451e-05, "loss": 5.6674, "step": 5424 }, { "epoch": 0.27, "grad_norm": 2.0132009983062744, "learning_rate": 3.659024655368348e-05, "loss": 5.75, "step": 5428 }, { "epoch": 0.27, "grad_norm": 1.6676075458526611, "learning_rate": 3.6580364642521865e-05, "loss": 5.7463, "step": 5432 }, { "epoch": 0.27, "grad_norm": 1.644707441329956, "learning_rate": 3.6570482731360247e-05, "loss": 5.5246, "step": 5436 }, { "epoch": 0.27, "grad_norm": 1.6207093000411987, "learning_rate": 3.656060082019863e-05, "loss": 5.6253, "step": 5440 }, { "epoch": 0.27, "grad_norm": 1.9052598476409912, "learning_rate": 3.6550718909037004e-05, "loss": 5.6262, "step": 5444 }, { "epoch": 0.27, "grad_norm": 1.976206660270691, "learning_rate": 3.6540836997875386e-05, "loss": 5.6091, "step": 5448 }, { "epoch": 0.27, "grad_norm": 1.858864665031433, "learning_rate": 3.653095508671377e-05, "loss": 5.6392, "step": 5452 }, { "epoch": 0.27, "grad_norm": 1.7667744159698486, "learning_rate": 3.652107317555216e-05, "loss": 5.7207, "step": 5456 }, { "epoch": 0.27, "grad_norm": 1.4944398403167725, "learning_rate": 3.651119126439054e-05, "loss": 5.8588, "step": 5460 }, { "epoch": 0.27, "grad_norm": 2.0682997703552246, "learning_rate": 3.650130935322892e-05, "loss": 5.7068, "step": 5464 }, { "epoch": 0.27, "grad_norm": 1.81681489944458, "learning_rate": 3.64914274420673e-05, "loss": 5.6977, "step": 5468 }, { "epoch": 0.27, "grad_norm": 1.6798757314682007, "learning_rate": 3.648154553090568e-05, "loss": 5.7233, "step": 5472 }, { "epoch": 0.27, "grad_norm": 1.8230525255203247, "learning_rate": 3.647166361974406e-05, "loss": 5.7243, "step": 5476 }, { "epoch": 0.27, "grad_norm": 1.4133111238479614, "learning_rate": 3.646178170858244e-05, "loss": 5.598, "step": 5480 }, { "epoch": 0.27, "grad_norm": 1.9329841136932373, "learning_rate": 3.6451899797420825e-05, "loss": 5.5917, "step": 5484 }, { "epoch": 0.27, "grad_norm": 1.5919127464294434, "learning_rate": 3.6442017886259207e-05, "loss": 5.6524, "step": 5488 }, { "epoch": 0.27, "grad_norm": 1.8038116693496704, "learning_rate": 3.643213597509759e-05, "loss": 5.6005, "step": 5492 }, { "epoch": 0.27, "grad_norm": 1.781088948249817, "learning_rate": 3.642225406393597e-05, "loss": 5.6194, "step": 5496 }, { "epoch": 0.27, "grad_norm": 2.1184487342834473, "learning_rate": 3.6412372152774346e-05, "loss": 5.7654, "step": 5500 }, { "epoch": 0.27, "grad_norm": 1.6980026960372925, "learning_rate": 3.640249024161273e-05, "loss": 5.6293, "step": 5504 }, { "epoch": 0.27, "grad_norm": 1.9863158464431763, "learning_rate": 3.639260833045111e-05, "loss": 5.852, "step": 5508 }, { "epoch": 0.27, "grad_norm": 2.160196542739868, "learning_rate": 3.638272641928949e-05, "loss": 5.6083, "step": 5512 }, { "epoch": 0.27, "grad_norm": 1.8680537939071655, "learning_rate": 3.6372844508127874e-05, "loss": 5.6863, "step": 5516 }, { "epoch": 0.27, "grad_norm": 2.162466526031494, "learning_rate": 3.6362962596966256e-05, "loss": 5.7776, "step": 5520 }, { "epoch": 0.27, "grad_norm": 1.7698131799697876, "learning_rate": 3.635308068580464e-05, "loss": 5.5958, "step": 5524 }, { "epoch": 0.27, "grad_norm": 1.8697733879089355, "learning_rate": 3.6343198774643014e-05, "loss": 5.6393, "step": 5528 }, { "epoch": 0.27, "grad_norm": 1.4883641004562378, "learning_rate": 3.6333316863481396e-05, "loss": 5.6008, "step": 5532 }, { "epoch": 0.27, "grad_norm": 1.7563178539276123, "learning_rate": 3.632343495231978e-05, "loss": 5.7821, "step": 5536 }, { "epoch": 0.27, "grad_norm": 1.8179692029953003, "learning_rate": 3.631355304115816e-05, "loss": 5.6816, "step": 5540 }, { "epoch": 0.27, "grad_norm": 1.5550754070281982, "learning_rate": 3.630367112999654e-05, "loss": 5.6527, "step": 5544 }, { "epoch": 0.27, "grad_norm": 1.9609134197235107, "learning_rate": 3.6293789218834924e-05, "loss": 5.7238, "step": 5548 }, { "epoch": 0.27, "grad_norm": 1.6006996631622314, "learning_rate": 3.6283907307673306e-05, "loss": 5.6861, "step": 5552 }, { "epoch": 0.27, "grad_norm": 1.9726330041885376, "learning_rate": 3.627402539651169e-05, "loss": 5.7777, "step": 5556 }, { "epoch": 0.27, "grad_norm": 1.6247719526290894, "learning_rate": 3.626414348535006e-05, "loss": 5.7374, "step": 5560 }, { "epoch": 0.27, "grad_norm": 1.60383141040802, "learning_rate": 3.6254261574188445e-05, "loss": 5.7191, "step": 5564 }, { "epoch": 0.28, "grad_norm": 1.6703567504882812, "learning_rate": 3.624437966302683e-05, "loss": 5.5841, "step": 5568 }, { "epoch": 0.28, "grad_norm": 1.7297996282577515, "learning_rate": 3.6234497751865216e-05, "loss": 5.7305, "step": 5572 }, { "epoch": 0.28, "grad_norm": 1.7804654836654663, "learning_rate": 3.62246158407036e-05, "loss": 5.6864, "step": 5576 }, { "epoch": 0.28, "grad_norm": 1.5437934398651123, "learning_rate": 3.621473392954198e-05, "loss": 5.7737, "step": 5580 }, { "epoch": 0.28, "grad_norm": 1.8865420818328857, "learning_rate": 3.6204852018380356e-05, "loss": 5.6338, "step": 5584 }, { "epoch": 0.28, "grad_norm": 2.0955584049224854, "learning_rate": 3.619497010721874e-05, "loss": 5.7677, "step": 5588 }, { "epoch": 0.28, "grad_norm": 1.8487645387649536, "learning_rate": 3.618508819605712e-05, "loss": 5.6992, "step": 5592 }, { "epoch": 0.28, "grad_norm": 1.652840495109558, "learning_rate": 3.61752062848955e-05, "loss": 5.5231, "step": 5596 }, { "epoch": 0.28, "grad_norm": 1.615386724472046, "learning_rate": 3.6165324373733884e-05, "loss": 5.721, "step": 5600 }, { "epoch": 0.28, "grad_norm": 1.860023856163025, "learning_rate": 3.6155442462572266e-05, "loss": 5.7149, "step": 5604 }, { "epoch": 0.28, "grad_norm": 1.7823337316513062, "learning_rate": 3.614556055141065e-05, "loss": 5.7699, "step": 5608 }, { "epoch": 0.28, "grad_norm": 1.9639289379119873, "learning_rate": 3.613567864024902e-05, "loss": 5.6564, "step": 5612 }, { "epoch": 0.28, "grad_norm": 1.6741266250610352, "learning_rate": 3.6125796729087405e-05, "loss": 5.6792, "step": 5616 }, { "epoch": 0.28, "grad_norm": 1.6273597478866577, "learning_rate": 3.611591481792579e-05, "loss": 5.6275, "step": 5620 }, { "epoch": 0.28, "grad_norm": 1.663702368736267, "learning_rate": 3.610603290676417e-05, "loss": 5.6499, "step": 5624 }, { "epoch": 0.28, "grad_norm": 1.3998336791992188, "learning_rate": 3.609615099560255e-05, "loss": 5.5426, "step": 5628 }, { "epoch": 0.28, "grad_norm": 1.5529056787490845, "learning_rate": 3.6086269084440934e-05, "loss": 5.7016, "step": 5632 }, { "epoch": 0.28, "grad_norm": 1.6349724531173706, "learning_rate": 3.6076387173279316e-05, "loss": 5.6359, "step": 5636 }, { "epoch": 0.28, "grad_norm": 1.6268702745437622, "learning_rate": 3.60665052621177e-05, "loss": 5.68, "step": 5640 }, { "epoch": 0.28, "grad_norm": 1.83133065700531, "learning_rate": 3.605662335095607e-05, "loss": 5.644, "step": 5644 }, { "epoch": 0.28, "grad_norm": 1.7102351188659668, "learning_rate": 3.6046741439794455e-05, "loss": 5.7675, "step": 5648 }, { "epoch": 0.28, "grad_norm": 1.901228666305542, "learning_rate": 3.603685952863284e-05, "loss": 5.7194, "step": 5652 }, { "epoch": 0.28, "grad_norm": 1.9542912244796753, "learning_rate": 3.602697761747122e-05, "loss": 5.7244, "step": 5656 }, { "epoch": 0.28, "grad_norm": 1.9861336946487427, "learning_rate": 3.60170957063096e-05, "loss": 5.5597, "step": 5660 }, { "epoch": 0.28, "grad_norm": 1.6458784341812134, "learning_rate": 3.600721379514798e-05, "loss": 5.6276, "step": 5664 }, { "epoch": 0.28, "grad_norm": 2.3147518634796143, "learning_rate": 3.5997331883986365e-05, "loss": 5.623, "step": 5668 }, { "epoch": 0.28, "grad_norm": 1.6608690023422241, "learning_rate": 3.598744997282474e-05, "loss": 5.7577, "step": 5672 }, { "epoch": 0.28, "grad_norm": 1.4289793968200684, "learning_rate": 3.597756806166312e-05, "loss": 5.6755, "step": 5676 }, { "epoch": 0.28, "grad_norm": 1.8258533477783203, "learning_rate": 3.5967686150501505e-05, "loss": 5.6546, "step": 5680 }, { "epoch": 0.28, "grad_norm": 1.589067816734314, "learning_rate": 3.595780423933989e-05, "loss": 5.6725, "step": 5684 }, { "epoch": 0.28, "grad_norm": 1.7325801849365234, "learning_rate": 3.5947922328178276e-05, "loss": 5.7298, "step": 5688 }, { "epoch": 0.28, "grad_norm": 1.7161133289337158, "learning_rate": 3.593804041701666e-05, "loss": 5.5812, "step": 5692 }, { "epoch": 0.28, "grad_norm": 1.6219887733459473, "learning_rate": 3.592815850585503e-05, "loss": 5.5354, "step": 5696 }, { "epoch": 0.28, "grad_norm": 2.099241256713867, "learning_rate": 3.5918276594693415e-05, "loss": 5.7611, "step": 5700 }, { "epoch": 0.28, "grad_norm": 1.576979160308838, "learning_rate": 3.59083946835318e-05, "loss": 5.7276, "step": 5704 }, { "epoch": 0.28, "grad_norm": 1.8135298490524292, "learning_rate": 3.589851277237018e-05, "loss": 5.6826, "step": 5708 }, { "epoch": 0.28, "grad_norm": 1.8552438020706177, "learning_rate": 3.588863086120856e-05, "loss": 5.6452, "step": 5712 }, { "epoch": 0.28, "grad_norm": 1.7755975723266602, "learning_rate": 3.587874895004694e-05, "loss": 5.7432, "step": 5716 }, { "epoch": 0.28, "grad_norm": 1.728273868560791, "learning_rate": 3.5868867038885325e-05, "loss": 5.8236, "step": 5720 }, { "epoch": 0.28, "grad_norm": 2.304983139038086, "learning_rate": 3.585898512772371e-05, "loss": 5.6947, "step": 5724 }, { "epoch": 0.28, "grad_norm": 1.9431263208389282, "learning_rate": 3.584910321656208e-05, "loss": 5.6718, "step": 5728 }, { "epoch": 0.28, "grad_norm": 1.6405879259109497, "learning_rate": 3.5839221305400465e-05, "loss": 5.7134, "step": 5732 }, { "epoch": 0.28, "grad_norm": 1.6948162317276, "learning_rate": 3.582933939423885e-05, "loss": 5.6881, "step": 5736 }, { "epoch": 0.28, "grad_norm": 1.9962084293365479, "learning_rate": 3.581945748307723e-05, "loss": 5.6576, "step": 5740 }, { "epoch": 0.28, "grad_norm": 2.2845137119293213, "learning_rate": 3.580957557191561e-05, "loss": 5.6479, "step": 5744 }, { "epoch": 0.28, "grad_norm": 1.9110279083251953, "learning_rate": 3.579969366075399e-05, "loss": 5.7157, "step": 5748 }, { "epoch": 0.28, "grad_norm": 1.9124606847763062, "learning_rate": 3.5789811749592375e-05, "loss": 5.8124, "step": 5752 }, { "epoch": 0.28, "grad_norm": 1.4642013311386108, "learning_rate": 3.577992983843075e-05, "loss": 5.7739, "step": 5756 }, { "epoch": 0.28, "grad_norm": 1.943257451057434, "learning_rate": 3.577004792726913e-05, "loss": 5.6817, "step": 5760 }, { "epoch": 0.28, "grad_norm": 1.9670331478118896, "learning_rate": 3.5760166016107515e-05, "loss": 5.5489, "step": 5764 }, { "epoch": 0.28, "grad_norm": 1.63418447971344, "learning_rate": 3.5750284104945897e-05, "loss": 5.5785, "step": 5768 }, { "epoch": 0.29, "grad_norm": 1.7041114568710327, "learning_rate": 3.574040219378428e-05, "loss": 5.7375, "step": 5772 }, { "epoch": 0.29, "grad_norm": 2.0340771675109863, "learning_rate": 3.573052028262266e-05, "loss": 5.6921, "step": 5776 }, { "epoch": 0.29, "grad_norm": 1.7758631706237793, "learning_rate": 3.572063837146104e-05, "loss": 5.688, "step": 5780 }, { "epoch": 0.29, "grad_norm": 1.6245428323745728, "learning_rate": 3.5710756460299425e-05, "loss": 5.7504, "step": 5784 }, { "epoch": 0.29, "grad_norm": 1.6857564449310303, "learning_rate": 3.57008745491378e-05, "loss": 5.5688, "step": 5788 }, { "epoch": 0.29, "grad_norm": 1.740413784980774, "learning_rate": 3.569099263797618e-05, "loss": 5.6504, "step": 5792 }, { "epoch": 0.29, "grad_norm": 1.8830409049987793, "learning_rate": 3.5681110726814564e-05, "loss": 5.6287, "step": 5796 }, { "epoch": 0.29, "grad_norm": 1.8001620769500732, "learning_rate": 3.567122881565295e-05, "loss": 5.8408, "step": 5800 }, { "epoch": 0.29, "grad_norm": 1.518395185470581, "learning_rate": 3.5661346904491335e-05, "loss": 5.6564, "step": 5804 }, { "epoch": 0.29, "grad_norm": 1.641120433807373, "learning_rate": 3.565146499332972e-05, "loss": 5.6351, "step": 5808 }, { "epoch": 0.29, "grad_norm": 2.126620292663574, "learning_rate": 3.564158308216809e-05, "loss": 5.6653, "step": 5812 }, { "epoch": 0.29, "grad_norm": 2.0950162410736084, "learning_rate": 3.5631701171006475e-05, "loss": 5.63, "step": 5816 }, { "epoch": 0.29, "grad_norm": 1.8449667692184448, "learning_rate": 3.5621819259844857e-05, "loss": 5.6507, "step": 5820 }, { "epoch": 0.29, "grad_norm": 1.8015673160552979, "learning_rate": 3.561193734868324e-05, "loss": 5.6993, "step": 5824 }, { "epoch": 0.29, "grad_norm": 1.9131306409835815, "learning_rate": 3.560205543752162e-05, "loss": 5.8073, "step": 5828 }, { "epoch": 0.29, "grad_norm": 1.5919278860092163, "learning_rate": 3.559217352636e-05, "loss": 5.5955, "step": 5832 }, { "epoch": 0.29, "grad_norm": 1.8440625667572021, "learning_rate": 3.5582291615198385e-05, "loss": 5.7592, "step": 5836 }, { "epoch": 0.29, "grad_norm": 1.8236738443374634, "learning_rate": 3.557240970403676e-05, "loss": 5.7716, "step": 5840 }, { "epoch": 0.29, "grad_norm": 1.5875698328018188, "learning_rate": 3.556252779287514e-05, "loss": 5.6328, "step": 5844 }, { "epoch": 0.29, "grad_norm": 1.722981572151184, "learning_rate": 3.5552645881713524e-05, "loss": 5.576, "step": 5848 }, { "epoch": 0.29, "grad_norm": 1.9150844812393188, "learning_rate": 3.5542763970551906e-05, "loss": 5.6884, "step": 5852 }, { "epoch": 0.29, "grad_norm": 2.071272373199463, "learning_rate": 3.553288205939029e-05, "loss": 5.6853, "step": 5856 }, { "epoch": 0.29, "grad_norm": 1.585911512374878, "learning_rate": 3.552300014822867e-05, "loss": 5.8361, "step": 5860 }, { "epoch": 0.29, "grad_norm": 1.6303049325942993, "learning_rate": 3.551311823706705e-05, "loss": 5.6433, "step": 5864 }, { "epoch": 0.29, "grad_norm": 1.914119839668274, "learning_rate": 3.5503236325905435e-05, "loss": 5.6434, "step": 5868 }, { "epoch": 0.29, "grad_norm": 1.6995179653167725, "learning_rate": 3.549335441474381e-05, "loss": 5.517, "step": 5872 }, { "epoch": 0.29, "grad_norm": 1.6506705284118652, "learning_rate": 3.548347250358219e-05, "loss": 5.7451, "step": 5876 }, { "epoch": 0.29, "grad_norm": 1.7888357639312744, "learning_rate": 3.5473590592420574e-05, "loss": 5.7428, "step": 5880 }, { "epoch": 0.29, "grad_norm": 1.5513304471969604, "learning_rate": 3.5463708681258956e-05, "loss": 5.6993, "step": 5884 }, { "epoch": 0.29, "grad_norm": 1.9307150840759277, "learning_rate": 3.545382677009734e-05, "loss": 5.7173, "step": 5888 }, { "epoch": 0.29, "grad_norm": 1.4725440740585327, "learning_rate": 3.544394485893572e-05, "loss": 5.7675, "step": 5892 }, { "epoch": 0.29, "grad_norm": 2.1401283740997314, "learning_rate": 3.54340629477741e-05, "loss": 5.6074, "step": 5896 }, { "epoch": 0.29, "grad_norm": 1.7221059799194336, "learning_rate": 3.542418103661248e-05, "loss": 5.5691, "step": 5900 }, { "epoch": 0.29, "grad_norm": 2.4511234760284424, "learning_rate": 3.541429912545086e-05, "loss": 5.8371, "step": 5904 }, { "epoch": 0.29, "grad_norm": 1.6160163879394531, "learning_rate": 3.540441721428924e-05, "loss": 5.5454, "step": 5908 }, { "epoch": 0.29, "grad_norm": 1.6199833154678345, "learning_rate": 3.5394535303127624e-05, "loss": 5.618, "step": 5912 }, { "epoch": 0.29, "grad_norm": 1.7704286575317383, "learning_rate": 3.538465339196601e-05, "loss": 5.6579, "step": 5916 }, { "epoch": 0.29, "grad_norm": 2.315131425857544, "learning_rate": 3.5374771480804395e-05, "loss": 5.6309, "step": 5920 }, { "epoch": 0.29, "grad_norm": 1.7877010107040405, "learning_rate": 3.536488956964277e-05, "loss": 5.7664, "step": 5924 }, { "epoch": 0.29, "grad_norm": 1.655348777770996, "learning_rate": 3.535500765848115e-05, "loss": 5.7179, "step": 5928 }, { "epoch": 0.29, "grad_norm": 2.0273170471191406, "learning_rate": 3.5345125747319534e-05, "loss": 5.6274, "step": 5932 }, { "epoch": 0.29, "grad_norm": 1.6776584386825562, "learning_rate": 3.5335243836157916e-05, "loss": 5.722, "step": 5936 }, { "epoch": 0.29, "grad_norm": 1.862793207168579, "learning_rate": 3.53253619249963e-05, "loss": 5.6436, "step": 5940 }, { "epoch": 0.29, "grad_norm": 1.7735410928726196, "learning_rate": 3.531548001383468e-05, "loss": 5.5571, "step": 5944 }, { "epoch": 0.29, "grad_norm": 1.8442201614379883, "learning_rate": 3.530559810267306e-05, "loss": 5.5682, "step": 5948 }, { "epoch": 0.29, "grad_norm": 1.9948362112045288, "learning_rate": 3.5295716191511444e-05, "loss": 5.6108, "step": 5952 }, { "epoch": 0.29, "grad_norm": 1.7667597532272339, "learning_rate": 3.528583428034982e-05, "loss": 5.6617, "step": 5956 }, { "epoch": 0.29, "grad_norm": 1.569806456565857, "learning_rate": 3.52759523691882e-05, "loss": 5.7098, "step": 5960 }, { "epoch": 0.29, "grad_norm": 1.666054129600525, "learning_rate": 3.5266070458026584e-05, "loss": 5.6734, "step": 5964 }, { "epoch": 0.29, "grad_norm": 1.6367340087890625, "learning_rate": 3.5256188546864966e-05, "loss": 5.6654, "step": 5968 }, { "epoch": 0.3, "grad_norm": 1.5181485414505005, "learning_rate": 3.524630663570335e-05, "loss": 5.6392, "step": 5972 }, { "epoch": 0.3, "grad_norm": 2.067699432373047, "learning_rate": 3.523642472454173e-05, "loss": 5.6753, "step": 5976 }, { "epoch": 0.3, "grad_norm": 1.6809829473495483, "learning_rate": 3.522654281338011e-05, "loss": 5.6829, "step": 5980 }, { "epoch": 0.3, "grad_norm": 1.5863231420516968, "learning_rate": 3.521666090221849e-05, "loss": 5.5417, "step": 5984 }, { "epoch": 0.3, "grad_norm": 1.9677083492279053, "learning_rate": 3.520677899105687e-05, "loss": 5.6524, "step": 5988 }, { "epoch": 0.3, "grad_norm": 1.5074371099472046, "learning_rate": 3.519689707989525e-05, "loss": 5.7254, "step": 5992 }, { "epoch": 0.3, "grad_norm": 1.552756667137146, "learning_rate": 3.518701516873363e-05, "loss": 5.5925, "step": 5996 }, { "epoch": 0.3, "grad_norm": 1.689927101135254, "learning_rate": 3.5177133257572015e-05, "loss": 5.629, "step": 6000 }, { "epoch": 0.3, "grad_norm": 1.6513864994049072, "learning_rate": 3.51672513464104e-05, "loss": 5.7166, "step": 6004 }, { "epoch": 0.3, "grad_norm": 1.8767340183258057, "learning_rate": 3.515736943524878e-05, "loss": 5.7523, "step": 6008 }, { "epoch": 0.3, "grad_norm": 2.079411029815674, "learning_rate": 3.5147487524087155e-05, "loss": 5.7882, "step": 6012 }, { "epoch": 0.3, "grad_norm": 1.992756962776184, "learning_rate": 3.513760561292554e-05, "loss": 5.587, "step": 6016 }, { "epoch": 0.3, "grad_norm": 1.477186679840088, "learning_rate": 3.512772370176392e-05, "loss": 5.7238, "step": 6020 }, { "epoch": 0.3, "grad_norm": 1.6832951307296753, "learning_rate": 3.51178417906023e-05, "loss": 5.627, "step": 6024 }, { "epoch": 0.3, "grad_norm": 1.8202719688415527, "learning_rate": 3.510795987944068e-05, "loss": 5.6409, "step": 6028 }, { "epoch": 0.3, "grad_norm": 1.7379246950149536, "learning_rate": 3.509807796827907e-05, "loss": 5.6045, "step": 6032 }, { "epoch": 0.3, "grad_norm": 1.7630070447921753, "learning_rate": 3.5088196057117454e-05, "loss": 5.6474, "step": 6036 }, { "epoch": 0.3, "grad_norm": 1.8432222604751587, "learning_rate": 3.507831414595583e-05, "loss": 5.694, "step": 6040 }, { "epoch": 0.3, "grad_norm": 2.1126575469970703, "learning_rate": 3.506843223479421e-05, "loss": 5.5952, "step": 6044 }, { "epoch": 0.3, "grad_norm": 1.8852741718292236, "learning_rate": 3.5058550323632593e-05, "loss": 5.4868, "step": 6048 }, { "epoch": 0.3, "grad_norm": 2.05767560005188, "learning_rate": 3.5048668412470975e-05, "loss": 5.6139, "step": 6052 }, { "epoch": 0.3, "grad_norm": 1.8169785737991333, "learning_rate": 3.503878650130936e-05, "loss": 5.6821, "step": 6056 }, { "epoch": 0.3, "grad_norm": 1.8394947052001953, "learning_rate": 3.502890459014774e-05, "loss": 5.7144, "step": 6060 }, { "epoch": 0.3, "grad_norm": 1.533893346786499, "learning_rate": 3.501902267898612e-05, "loss": 5.6372, "step": 6064 }, { "epoch": 0.3, "grad_norm": 1.7650554180145264, "learning_rate": 3.50091407678245e-05, "loss": 5.7372, "step": 6068 }, { "epoch": 0.3, "grad_norm": 1.7776150703430176, "learning_rate": 3.499925885666288e-05, "loss": 5.5772, "step": 6072 }, { "epoch": 0.3, "grad_norm": 1.804993987083435, "learning_rate": 3.498937694550126e-05, "loss": 5.6967, "step": 6076 }, { "epoch": 0.3, "grad_norm": 2.0517046451568604, "learning_rate": 3.497949503433964e-05, "loss": 5.7905, "step": 6080 }, { "epoch": 0.3, "grad_norm": 1.6444928646087646, "learning_rate": 3.4969613123178025e-05, "loss": 5.5937, "step": 6084 }, { "epoch": 0.3, "grad_norm": 2.079911231994629, "learning_rate": 3.495973121201641e-05, "loss": 5.6773, "step": 6088 }, { "epoch": 0.3, "grad_norm": 1.929654598236084, "learning_rate": 3.494984930085479e-05, "loss": 5.7453, "step": 6092 }, { "epoch": 0.3, "grad_norm": 2.0268120765686035, "learning_rate": 3.4939967389693165e-05, "loss": 5.6351, "step": 6096 }, { "epoch": 0.3, "grad_norm": 1.7415413856506348, "learning_rate": 3.493008547853155e-05, "loss": 5.5666, "step": 6100 }, { "epoch": 0.3, "grad_norm": 1.8862464427947998, "learning_rate": 3.492020356736993e-05, "loss": 5.7066, "step": 6104 }, { "epoch": 0.3, "grad_norm": 1.8561365604400635, "learning_rate": 3.491032165620831e-05, "loss": 5.6259, "step": 6108 }, { "epoch": 0.3, "grad_norm": 1.681982398033142, "learning_rate": 3.490043974504669e-05, "loss": 5.5725, "step": 6112 }, { "epoch": 0.3, "grad_norm": 1.4180033206939697, "learning_rate": 3.4890557833885075e-05, "loss": 5.6196, "step": 6116 }, { "epoch": 0.3, "grad_norm": 1.7663787603378296, "learning_rate": 3.488067592272346e-05, "loss": 5.617, "step": 6120 }, { "epoch": 0.3, "grad_norm": 1.6531801223754883, "learning_rate": 3.487079401156184e-05, "loss": 5.5927, "step": 6124 }, { "epoch": 0.3, "grad_norm": 1.739673376083374, "learning_rate": 3.4860912100400214e-05, "loss": 5.6717, "step": 6128 }, { "epoch": 0.3, "grad_norm": 2.0703117847442627, "learning_rate": 3.4851030189238596e-05, "loss": 5.6708, "step": 6132 }, { "epoch": 0.3, "grad_norm": 1.5977106094360352, "learning_rate": 3.484114827807698e-05, "loss": 5.6917, "step": 6136 }, { "epoch": 0.3, "grad_norm": 1.7111542224884033, "learning_rate": 3.483126636691536e-05, "loss": 5.5998, "step": 6140 }, { "epoch": 0.3, "grad_norm": 2.138233184814453, "learning_rate": 3.482138445575375e-05, "loss": 5.6539, "step": 6144 }, { "epoch": 0.3, "grad_norm": 1.5501015186309814, "learning_rate": 3.481150254459213e-05, "loss": 5.6418, "step": 6148 }, { "epoch": 0.3, "grad_norm": 1.7464877367019653, "learning_rate": 3.480162063343051e-05, "loss": 5.5607, "step": 6152 }, { "epoch": 0.3, "grad_norm": 2.0313351154327393, "learning_rate": 3.479173872226889e-05, "loss": 5.7242, "step": 6156 }, { "epoch": 0.3, "grad_norm": 2.1652917861938477, "learning_rate": 3.478185681110727e-05, "loss": 5.6722, "step": 6160 }, { "epoch": 0.3, "grad_norm": 2.6227622032165527, "learning_rate": 3.477197489994565e-05, "loss": 5.5609, "step": 6164 }, { "epoch": 0.3, "grad_norm": 2.144803047180176, "learning_rate": 3.4762092988784035e-05, "loss": 5.6724, "step": 6168 }, { "epoch": 0.3, "grad_norm": 2.0227320194244385, "learning_rate": 3.475221107762242e-05, "loss": 5.6686, "step": 6172 }, { "epoch": 0.31, "grad_norm": 1.794042944908142, "learning_rate": 3.47423291664608e-05, "loss": 5.6792, "step": 6176 }, { "epoch": 0.31, "grad_norm": 1.6213692426681519, "learning_rate": 3.4732447255299174e-05, "loss": 5.6446, "step": 6180 }, { "epoch": 0.31, "grad_norm": 1.9633342027664185, "learning_rate": 3.4722565344137556e-05, "loss": 5.6725, "step": 6184 }, { "epoch": 0.31, "grad_norm": 1.4858800172805786, "learning_rate": 3.471268343297594e-05, "loss": 5.7152, "step": 6188 }, { "epoch": 0.31, "grad_norm": 1.4575623273849487, "learning_rate": 3.470280152181432e-05, "loss": 5.6749, "step": 6192 }, { "epoch": 0.31, "grad_norm": 2.1202147006988525, "learning_rate": 3.46929196106527e-05, "loss": 5.7791, "step": 6196 }, { "epoch": 0.31, "grad_norm": 1.9096757173538208, "learning_rate": 3.4683037699491085e-05, "loss": 5.4813, "step": 6200 }, { "epoch": 0.31, "grad_norm": 2.282806873321533, "learning_rate": 3.467315578832947e-05, "loss": 5.6761, "step": 6204 }, { "epoch": 0.31, "grad_norm": 1.6041266918182373, "learning_rate": 3.466327387716785e-05, "loss": 5.7067, "step": 6208 }, { "epoch": 0.31, "grad_norm": 1.9719665050506592, "learning_rate": 3.4653391966006224e-05, "loss": 5.6565, "step": 6212 }, { "epoch": 0.31, "grad_norm": 1.8574167490005493, "learning_rate": 3.4643510054844606e-05, "loss": 5.6941, "step": 6216 }, { "epoch": 0.31, "grad_norm": 1.8125001192092896, "learning_rate": 3.463362814368299e-05, "loss": 5.6066, "step": 6220 }, { "epoch": 0.31, "grad_norm": 1.7597132921218872, "learning_rate": 3.462374623252137e-05, "loss": 5.7592, "step": 6224 }, { "epoch": 0.31, "grad_norm": 2.0065159797668457, "learning_rate": 3.461386432135975e-05, "loss": 5.7911, "step": 6228 }, { "epoch": 0.31, "grad_norm": 2.03913950920105, "learning_rate": 3.4603982410198134e-05, "loss": 5.6571, "step": 6232 }, { "epoch": 0.31, "grad_norm": 2.114107608795166, "learning_rate": 3.4594100499036516e-05, "loss": 5.7014, "step": 6236 }, { "epoch": 0.31, "grad_norm": 1.7447437047958374, "learning_rate": 3.458421858787489e-05, "loss": 5.712, "step": 6240 }, { "epoch": 0.31, "grad_norm": 1.8154895305633545, "learning_rate": 3.4574336676713274e-05, "loss": 5.7241, "step": 6244 }, { "epoch": 0.31, "grad_norm": 2.1502370834350586, "learning_rate": 3.4564454765551656e-05, "loss": 5.6274, "step": 6248 }, { "epoch": 0.31, "grad_norm": 1.6285412311553955, "learning_rate": 3.455457285439004e-05, "loss": 5.6201, "step": 6252 }, { "epoch": 0.31, "grad_norm": 1.825987696647644, "learning_rate": 3.454469094322842e-05, "loss": 5.5851, "step": 6256 }, { "epoch": 0.31, "grad_norm": 2.224214553833008, "learning_rate": 3.453480903206681e-05, "loss": 5.7344, "step": 6260 }, { "epoch": 0.31, "grad_norm": 1.8334683179855347, "learning_rate": 3.4524927120905184e-05, "loss": 5.6966, "step": 6264 }, { "epoch": 0.31, "grad_norm": 1.599949598312378, "learning_rate": 3.4515045209743566e-05, "loss": 5.6679, "step": 6268 }, { "epoch": 0.31, "grad_norm": 1.9811068773269653, "learning_rate": 3.450516329858195e-05, "loss": 5.7008, "step": 6272 }, { "epoch": 0.31, "grad_norm": 2.3280673027038574, "learning_rate": 3.449528138742033e-05, "loss": 5.7588, "step": 6276 }, { "epoch": 0.31, "grad_norm": 1.8196971416473389, "learning_rate": 3.448539947625871e-05, "loss": 5.6388, "step": 6280 }, { "epoch": 0.31, "grad_norm": 1.4781452417373657, "learning_rate": 3.4475517565097094e-05, "loss": 5.6588, "step": 6284 }, { "epoch": 0.31, "grad_norm": 2.2682340145111084, "learning_rate": 3.4465635653935476e-05, "loss": 5.6705, "step": 6288 }, { "epoch": 0.31, "grad_norm": 1.7862980365753174, "learning_rate": 3.445575374277386e-05, "loss": 5.6001, "step": 6292 }, { "epoch": 0.31, "grad_norm": 1.870069980621338, "learning_rate": 3.4445871831612234e-05, "loss": 5.6938, "step": 6296 }, { "epoch": 0.31, "grad_norm": 2.120589017868042, "learning_rate": 3.4435989920450616e-05, "loss": 5.633, "step": 6300 }, { "epoch": 0.31, "grad_norm": 1.6841241121292114, "learning_rate": 3.4426108009289e-05, "loss": 5.6739, "step": 6304 }, { "epoch": 0.31, "grad_norm": 1.8620730638504028, "learning_rate": 3.441622609812738e-05, "loss": 5.6794, "step": 6308 }, { "epoch": 0.31, "grad_norm": 1.8764095306396484, "learning_rate": 3.440634418696576e-05, "loss": 5.724, "step": 6312 }, { "epoch": 0.31, "grad_norm": 1.4684795141220093, "learning_rate": 3.4396462275804144e-05, "loss": 5.6413, "step": 6316 }, { "epoch": 0.31, "grad_norm": 1.5952019691467285, "learning_rate": 3.4386580364642526e-05, "loss": 5.6075, "step": 6320 }, { "epoch": 0.31, "grad_norm": 1.8536378145217896, "learning_rate": 3.43766984534809e-05, "loss": 5.6342, "step": 6324 }, { "epoch": 0.31, "grad_norm": 1.8524278402328491, "learning_rate": 3.4366816542319283e-05, "loss": 5.6754, "step": 6328 }, { "epoch": 0.31, "grad_norm": 1.715560793876648, "learning_rate": 3.4356934631157665e-05, "loss": 5.7763, "step": 6332 }, { "epoch": 0.31, "grad_norm": 1.8335529565811157, "learning_rate": 3.434705271999605e-05, "loss": 5.6376, "step": 6336 }, { "epoch": 0.31, "grad_norm": 1.4588648080825806, "learning_rate": 3.433717080883443e-05, "loss": 5.6589, "step": 6340 }, { "epoch": 0.31, "grad_norm": 2.085669755935669, "learning_rate": 3.432728889767281e-05, "loss": 5.6664, "step": 6344 }, { "epoch": 0.31, "grad_norm": 1.9969209432601929, "learning_rate": 3.4317406986511194e-05, "loss": 5.6533, "step": 6348 }, { "epoch": 0.31, "grad_norm": 2.169795513153076, "learning_rate": 3.4307525075349576e-05, "loss": 5.7483, "step": 6352 }, { "epoch": 0.31, "grad_norm": 1.6342527866363525, "learning_rate": 3.429764316418795e-05, "loss": 5.6761, "step": 6356 }, { "epoch": 0.31, "grad_norm": 1.7355843782424927, "learning_rate": 3.428776125302633e-05, "loss": 5.7214, "step": 6360 }, { "epoch": 0.31, "grad_norm": 1.6192421913146973, "learning_rate": 3.4277879341864715e-05, "loss": 5.629, "step": 6364 }, { "epoch": 0.31, "grad_norm": 1.8149397373199463, "learning_rate": 3.42679974307031e-05, "loss": 5.6567, "step": 6368 }, { "epoch": 0.31, "grad_norm": 1.9818474054336548, "learning_rate": 3.425811551954148e-05, "loss": 5.6311, "step": 6372 }, { "epoch": 0.32, "grad_norm": 1.8166816234588623, "learning_rate": 3.424823360837987e-05, "loss": 5.6253, "step": 6376 }, { "epoch": 0.32, "grad_norm": 1.760532021522522, "learning_rate": 3.4238351697218243e-05, "loss": 5.5845, "step": 6380 }, { "epoch": 0.32, "grad_norm": 2.153517961502075, "learning_rate": 3.4228469786056625e-05, "loss": 5.5968, "step": 6384 }, { "epoch": 0.32, "grad_norm": 1.7822462320327759, "learning_rate": 3.421858787489501e-05, "loss": 5.547, "step": 6388 }, { "epoch": 0.32, "grad_norm": 1.9486993551254272, "learning_rate": 3.420870596373339e-05, "loss": 5.7527, "step": 6392 }, { "epoch": 0.32, "grad_norm": 1.8310834169387817, "learning_rate": 3.419882405257177e-05, "loss": 5.6288, "step": 6396 }, { "epoch": 0.32, "grad_norm": 1.9566413164138794, "learning_rate": 3.4188942141410154e-05, "loss": 5.5835, "step": 6400 }, { "epoch": 0.32, "grad_norm": 1.8529417514801025, "learning_rate": 3.4179060230248536e-05, "loss": 5.6979, "step": 6404 }, { "epoch": 0.32, "grad_norm": 1.4912470579147339, "learning_rate": 3.416917831908691e-05, "loss": 5.6043, "step": 6408 }, { "epoch": 0.32, "grad_norm": 1.7634273767471313, "learning_rate": 3.415929640792529e-05, "loss": 5.7066, "step": 6412 }, { "epoch": 0.32, "grad_norm": 1.8041954040527344, "learning_rate": 3.4149414496763675e-05, "loss": 5.6787, "step": 6416 }, { "epoch": 0.32, "grad_norm": 1.9450712203979492, "learning_rate": 3.413953258560206e-05, "loss": 5.6414, "step": 6420 }, { "epoch": 0.32, "grad_norm": 2.1971383094787598, "learning_rate": 3.412965067444044e-05, "loss": 5.6731, "step": 6424 }, { "epoch": 0.32, "grad_norm": 1.7693517208099365, "learning_rate": 3.411976876327882e-05, "loss": 5.6599, "step": 6428 }, { "epoch": 0.32, "grad_norm": 2.282921075820923, "learning_rate": 3.4109886852117203e-05, "loss": 5.6397, "step": 6432 }, { "epoch": 0.32, "grad_norm": 1.8509403467178345, "learning_rate": 3.4100004940955585e-05, "loss": 5.6985, "step": 6436 }, { "epoch": 0.32, "grad_norm": 1.8916041851043701, "learning_rate": 3.409012302979396e-05, "loss": 5.6531, "step": 6440 }, { "epoch": 0.32, "grad_norm": 1.615857481956482, "learning_rate": 3.408024111863234e-05, "loss": 5.605, "step": 6444 }, { "epoch": 0.32, "grad_norm": 1.6781501770019531, "learning_rate": 3.4070359207470725e-05, "loss": 5.6553, "step": 6448 }, { "epoch": 0.32, "grad_norm": 1.7929623126983643, "learning_rate": 3.406047729630911e-05, "loss": 5.6905, "step": 6452 }, { "epoch": 0.32, "grad_norm": 1.9167546033859253, "learning_rate": 3.405059538514749e-05, "loss": 5.614, "step": 6456 }, { "epoch": 0.32, "grad_norm": 1.7820324897766113, "learning_rate": 3.404071347398587e-05, "loss": 5.6548, "step": 6460 }, { "epoch": 0.32, "grad_norm": 2.117344379425049, "learning_rate": 3.403083156282425e-05, "loss": 5.6407, "step": 6464 }, { "epoch": 0.32, "grad_norm": 1.829023838043213, "learning_rate": 3.402094965166263e-05, "loss": 5.5939, "step": 6468 }, { "epoch": 0.32, "grad_norm": 1.7248526811599731, "learning_rate": 3.401106774050101e-05, "loss": 5.8424, "step": 6472 }, { "epoch": 0.32, "grad_norm": 1.576296329498291, "learning_rate": 3.400118582933939e-05, "loss": 5.5122, "step": 6476 }, { "epoch": 0.32, "grad_norm": 1.5682412385940552, "learning_rate": 3.3991303918177775e-05, "loss": 5.6437, "step": 6480 }, { "epoch": 0.32, "grad_norm": 1.5419074296951294, "learning_rate": 3.398142200701616e-05, "loss": 5.6232, "step": 6484 }, { "epoch": 0.32, "grad_norm": 2.059434652328491, "learning_rate": 3.3971540095854545e-05, "loss": 5.6276, "step": 6488 }, { "epoch": 0.32, "grad_norm": 1.67318856716156, "learning_rate": 3.396165818469292e-05, "loss": 5.6936, "step": 6492 }, { "epoch": 0.32, "grad_norm": 1.6248462200164795, "learning_rate": 3.39517762735313e-05, "loss": 5.6305, "step": 6496 }, { "epoch": 0.32, "grad_norm": 1.914443016052246, "learning_rate": 3.3941894362369685e-05, "loss": 5.5594, "step": 6500 }, { "epoch": 0.32, "grad_norm": 1.6861499547958374, "learning_rate": 3.393201245120807e-05, "loss": 5.6522, "step": 6504 }, { "epoch": 0.32, "grad_norm": 1.8825210332870483, "learning_rate": 3.392213054004645e-05, "loss": 5.7009, "step": 6508 }, { "epoch": 0.32, "grad_norm": 1.8397966623306274, "learning_rate": 3.391224862888483e-05, "loss": 5.5723, "step": 6512 }, { "epoch": 0.32, "grad_norm": 1.7531960010528564, "learning_rate": 3.390236671772321e-05, "loss": 5.7406, "step": 6516 }, { "epoch": 0.32, "grad_norm": 1.8257700204849243, "learning_rate": 3.3892484806561595e-05, "loss": 5.6363, "step": 6520 }, { "epoch": 0.32, "grad_norm": 1.8056398630142212, "learning_rate": 3.388260289539997e-05, "loss": 5.6686, "step": 6524 }, { "epoch": 0.32, "grad_norm": 1.9961930513381958, "learning_rate": 3.387272098423835e-05, "loss": 5.5821, "step": 6528 }, { "epoch": 0.32, "grad_norm": 1.7457338571548462, "learning_rate": 3.3862839073076735e-05, "loss": 5.7391, "step": 6532 }, { "epoch": 0.32, "grad_norm": 1.8142015933990479, "learning_rate": 3.385295716191512e-05, "loss": 5.6761, "step": 6536 }, { "epoch": 0.32, "grad_norm": 1.6556874513626099, "learning_rate": 3.38430752507535e-05, "loss": 5.7818, "step": 6540 }, { "epoch": 0.32, "grad_norm": 1.9555789232254028, "learning_rate": 3.383319333959188e-05, "loss": 5.7287, "step": 6544 }, { "epoch": 0.32, "grad_norm": 1.5307425260543823, "learning_rate": 3.382331142843026e-05, "loss": 5.6996, "step": 6548 }, { "epoch": 0.32, "grad_norm": 2.0899040699005127, "learning_rate": 3.381342951726864e-05, "loss": 5.6487, "step": 6552 }, { "epoch": 0.32, "grad_norm": 1.913406491279602, "learning_rate": 3.380354760610702e-05, "loss": 5.5607, "step": 6556 }, { "epoch": 0.32, "grad_norm": 1.7522472143173218, "learning_rate": 3.37936656949454e-05, "loss": 5.5921, "step": 6560 }, { "epoch": 0.32, "grad_norm": 1.8658267259597778, "learning_rate": 3.3783783783783784e-05, "loss": 5.6933, "step": 6564 }, { "epoch": 0.32, "grad_norm": 1.596774697303772, "learning_rate": 3.3773901872622166e-05, "loss": 5.6883, "step": 6568 }, { "epoch": 0.32, "grad_norm": 1.769968032836914, "learning_rate": 3.376401996146055e-05, "loss": 5.6984, "step": 6572 }, { "epoch": 0.32, "grad_norm": 2.154975175857544, "learning_rate": 3.375413805029893e-05, "loss": 5.654, "step": 6576 }, { "epoch": 0.33, "grad_norm": 1.5026295185089111, "learning_rate": 3.3744256139137306e-05, "loss": 5.536, "step": 6580 }, { "epoch": 0.33, "grad_norm": 1.9100069999694824, "learning_rate": 3.373437422797569e-05, "loss": 5.7633, "step": 6584 }, { "epoch": 0.33, "grad_norm": 1.7092385292053223, "learning_rate": 3.372449231681407e-05, "loss": 5.6036, "step": 6588 }, { "epoch": 0.33, "grad_norm": 1.4627431631088257, "learning_rate": 3.371461040565245e-05, "loss": 5.6126, "step": 6592 }, { "epoch": 0.33, "grad_norm": 1.7545133829116821, "learning_rate": 3.3704728494490834e-05, "loss": 5.6248, "step": 6596 }, { "epoch": 0.33, "grad_norm": 1.8125765323638916, "learning_rate": 3.3694846583329216e-05, "loss": 5.6628, "step": 6600 }, { "epoch": 0.33, "grad_norm": 1.6120585203170776, "learning_rate": 3.3684964672167605e-05, "loss": 5.595, "step": 6604 }, { "epoch": 0.33, "grad_norm": 1.8188247680664062, "learning_rate": 3.367508276100598e-05, "loss": 5.5898, "step": 6608 }, { "epoch": 0.33, "grad_norm": 1.7864772081375122, "learning_rate": 3.366520084984436e-05, "loss": 5.6559, "step": 6612 }, { "epoch": 0.33, "grad_norm": 2.448668956756592, "learning_rate": 3.3655318938682744e-05, "loss": 5.7192, "step": 6616 }, { "epoch": 0.33, "grad_norm": 1.7164603471755981, "learning_rate": 3.3645437027521126e-05, "loss": 5.613, "step": 6620 }, { "epoch": 0.33, "grad_norm": 1.6809614896774292, "learning_rate": 3.363555511635951e-05, "loss": 5.7096, "step": 6624 }, { "epoch": 0.33, "grad_norm": 1.6097911596298218, "learning_rate": 3.362567320519789e-05, "loss": 5.7016, "step": 6628 }, { "epoch": 0.33, "grad_norm": 2.04894757270813, "learning_rate": 3.361579129403627e-05, "loss": 5.6882, "step": 6632 }, { "epoch": 0.33, "grad_norm": 1.7453163862228394, "learning_rate": 3.360590938287465e-05, "loss": 5.7116, "step": 6636 }, { "epoch": 0.33, "grad_norm": 2.0974841117858887, "learning_rate": 3.359602747171303e-05, "loss": 5.685, "step": 6640 }, { "epoch": 0.33, "grad_norm": 1.907728672027588, "learning_rate": 3.358614556055141e-05, "loss": 5.6337, "step": 6644 }, { "epoch": 0.33, "grad_norm": 1.708357334136963, "learning_rate": 3.3576263649389794e-05, "loss": 5.7255, "step": 6648 }, { "epoch": 0.33, "grad_norm": 1.9592260122299194, "learning_rate": 3.3566381738228176e-05, "loss": 5.7785, "step": 6652 }, { "epoch": 0.33, "grad_norm": 1.7609105110168457, "learning_rate": 3.355649982706656e-05, "loss": 5.6758, "step": 6656 }, { "epoch": 0.33, "grad_norm": 1.6743985414505005, "learning_rate": 3.354661791590494e-05, "loss": 5.5952, "step": 6660 }, { "epoch": 0.33, "grad_norm": 1.8798311948776245, "learning_rate": 3.3536736004743315e-05, "loss": 5.7004, "step": 6664 }, { "epoch": 0.33, "grad_norm": 2.2910819053649902, "learning_rate": 3.35268540935817e-05, "loss": 5.5182, "step": 6668 }, { "epoch": 0.33, "grad_norm": 1.6029813289642334, "learning_rate": 3.351697218242008e-05, "loss": 5.7538, "step": 6672 }, { "epoch": 0.33, "grad_norm": 1.7451902627944946, "learning_rate": 3.350709027125846e-05, "loss": 5.7265, "step": 6676 }, { "epoch": 0.33, "grad_norm": 1.8161840438842773, "learning_rate": 3.3497208360096844e-05, "loss": 5.5846, "step": 6680 }, { "epoch": 0.33, "grad_norm": 2.196413993835449, "learning_rate": 3.3487326448935226e-05, "loss": 5.7234, "step": 6684 }, { "epoch": 0.33, "grad_norm": 1.707971215248108, "learning_rate": 3.347744453777361e-05, "loss": 5.6536, "step": 6688 }, { "epoch": 0.33, "grad_norm": 1.8692013025283813, "learning_rate": 3.346756262661199e-05, "loss": 5.6981, "step": 6692 }, { "epoch": 0.33, "grad_norm": 1.7000290155410767, "learning_rate": 3.3457680715450365e-05, "loss": 5.5349, "step": 6696 }, { "epoch": 0.33, "grad_norm": 1.7592395544052124, "learning_rate": 3.344779880428875e-05, "loss": 5.5299, "step": 6700 }, { "epoch": 0.33, "grad_norm": 1.9749822616577148, "learning_rate": 3.343791689312713e-05, "loss": 5.6149, "step": 6704 }, { "epoch": 0.33, "grad_norm": 1.9164543151855469, "learning_rate": 3.342803498196551e-05, "loss": 5.6748, "step": 6708 }, { "epoch": 0.33, "grad_norm": 1.7371500730514526, "learning_rate": 3.3418153070803893e-05, "loss": 5.632, "step": 6712 }, { "epoch": 0.33, "grad_norm": 2.007580041885376, "learning_rate": 3.3408271159642275e-05, "loss": 5.6878, "step": 6716 }, { "epoch": 0.33, "grad_norm": 2.049675703048706, "learning_rate": 3.339838924848066e-05, "loss": 5.6347, "step": 6720 }, { "epoch": 0.33, "grad_norm": 1.5192993879318237, "learning_rate": 3.338850733731904e-05, "loss": 5.5949, "step": 6724 }, { "epoch": 0.33, "grad_norm": 1.7811031341552734, "learning_rate": 3.337862542615742e-05, "loss": 5.5796, "step": 6728 }, { "epoch": 0.33, "grad_norm": 2.0031299591064453, "learning_rate": 3.3368743514995804e-05, "loss": 5.6231, "step": 6732 }, { "epoch": 0.33, "grad_norm": 1.9391804933547974, "learning_rate": 3.3358861603834186e-05, "loss": 5.6029, "step": 6736 }, { "epoch": 0.33, "grad_norm": 1.847928762435913, "learning_rate": 3.334897969267257e-05, "loss": 5.6513, "step": 6740 }, { "epoch": 0.33, "grad_norm": 2.11073637008667, "learning_rate": 3.333909778151095e-05, "loss": 5.7021, "step": 6744 }, { "epoch": 0.33, "grad_norm": 1.9884241819381714, "learning_rate": 3.3329215870349325e-05, "loss": 5.6343, "step": 6748 }, { "epoch": 0.33, "grad_norm": 2.0201947689056396, "learning_rate": 3.331933395918771e-05, "loss": 5.6233, "step": 6752 }, { "epoch": 0.33, "grad_norm": 1.783299207687378, "learning_rate": 3.330945204802609e-05, "loss": 5.6181, "step": 6756 }, { "epoch": 0.33, "grad_norm": 1.9971978664398193, "learning_rate": 3.329957013686447e-05, "loss": 5.6573, "step": 6760 }, { "epoch": 0.33, "grad_norm": 2.188537120819092, "learning_rate": 3.3289688225702853e-05, "loss": 5.5245, "step": 6764 }, { "epoch": 0.33, "grad_norm": 1.8785967826843262, "learning_rate": 3.3279806314541235e-05, "loss": 5.6986, "step": 6768 }, { "epoch": 0.33, "grad_norm": 2.19769024848938, "learning_rate": 3.326992440337962e-05, "loss": 5.5032, "step": 6772 }, { "epoch": 0.33, "grad_norm": 2.074648857116699, "learning_rate": 3.3260042492218e-05, "loss": 5.7612, "step": 6776 }, { "epoch": 0.33, "grad_norm": 1.9131178855895996, "learning_rate": 3.3250160581056375e-05, "loss": 5.6805, "step": 6780 }, { "epoch": 0.34, "grad_norm": 2.2468371391296387, "learning_rate": 3.324027866989476e-05, "loss": 5.6507, "step": 6784 }, { "epoch": 0.34, "grad_norm": 2.0636277198791504, "learning_rate": 3.323039675873314e-05, "loss": 5.7031, "step": 6788 }, { "epoch": 0.34, "grad_norm": 1.6496247053146362, "learning_rate": 3.322051484757152e-05, "loss": 5.5691, "step": 6792 }, { "epoch": 0.34, "grad_norm": 1.7620829343795776, "learning_rate": 3.32106329364099e-05, "loss": 5.6188, "step": 6796 }, { "epoch": 0.34, "grad_norm": 1.8972188234329224, "learning_rate": 3.3200751025248285e-05, "loss": 5.6746, "step": 6800 }, { "epoch": 0.34, "grad_norm": 1.8980793952941895, "learning_rate": 3.319086911408667e-05, "loss": 5.696, "step": 6804 }, { "epoch": 0.34, "grad_norm": 1.681355357170105, "learning_rate": 3.318098720292504e-05, "loss": 5.629, "step": 6808 }, { "epoch": 0.34, "grad_norm": 1.9827635288238525, "learning_rate": 3.3171105291763425e-05, "loss": 5.6046, "step": 6812 }, { "epoch": 0.34, "grad_norm": 1.6929621696472168, "learning_rate": 3.316122338060181e-05, "loss": 5.7076, "step": 6816 }, { "epoch": 0.34, "grad_norm": 1.706438660621643, "learning_rate": 3.315134146944019e-05, "loss": 5.6982, "step": 6820 }, { "epoch": 0.34, "grad_norm": 1.9626370668411255, "learning_rate": 3.314145955827857e-05, "loss": 5.6642, "step": 6824 }, { "epoch": 0.34, "grad_norm": 1.926810383796692, "learning_rate": 3.313157764711695e-05, "loss": 5.6876, "step": 6828 }, { "epoch": 0.34, "grad_norm": 2.1014394760131836, "learning_rate": 3.3121695735955335e-05, "loss": 5.6713, "step": 6832 }, { "epoch": 0.34, "grad_norm": 1.9832298755645752, "learning_rate": 3.311181382479372e-05, "loss": 5.5695, "step": 6836 }, { "epoch": 0.34, "grad_norm": 1.867805004119873, "learning_rate": 3.31019319136321e-05, "loss": 5.598, "step": 6840 }, { "epoch": 0.34, "grad_norm": 1.7277621030807495, "learning_rate": 3.309205000247048e-05, "loss": 5.6008, "step": 6844 }, { "epoch": 0.34, "grad_norm": 2.0435431003570557, "learning_rate": 3.308216809130886e-05, "loss": 5.5801, "step": 6848 }, { "epoch": 0.34, "grad_norm": 1.6969705820083618, "learning_rate": 3.3072286180147245e-05, "loss": 5.5302, "step": 6852 }, { "epoch": 0.34, "grad_norm": 1.8054941892623901, "learning_rate": 3.306240426898563e-05, "loss": 5.6352, "step": 6856 }, { "epoch": 0.34, "grad_norm": 1.7074542045593262, "learning_rate": 3.305252235782401e-05, "loss": 5.5968, "step": 6860 }, { "epoch": 0.34, "grad_norm": 1.8238881826400757, "learning_rate": 3.3042640446662385e-05, "loss": 5.6272, "step": 6864 }, { "epoch": 0.34, "grad_norm": 1.9226677417755127, "learning_rate": 3.303275853550077e-05, "loss": 5.6381, "step": 6868 }, { "epoch": 0.34, "grad_norm": 1.5835906267166138, "learning_rate": 3.302287662433915e-05, "loss": 5.489, "step": 6872 }, { "epoch": 0.34, "grad_norm": 1.7852261066436768, "learning_rate": 3.301299471317753e-05, "loss": 5.7686, "step": 6876 }, { "epoch": 0.34, "grad_norm": 2.1643717288970947, "learning_rate": 3.300311280201591e-05, "loss": 5.7131, "step": 6880 }, { "epoch": 0.34, "grad_norm": 1.8683335781097412, "learning_rate": 3.2993230890854295e-05, "loss": 5.6893, "step": 6884 }, { "epoch": 0.34, "grad_norm": 1.8597195148468018, "learning_rate": 3.298334897969268e-05, "loss": 5.5943, "step": 6888 }, { "epoch": 0.34, "grad_norm": 1.9217149019241333, "learning_rate": 3.297346706853105e-05, "loss": 5.6866, "step": 6892 }, { "epoch": 0.34, "grad_norm": 1.7497360706329346, "learning_rate": 3.2963585157369434e-05, "loss": 5.6797, "step": 6896 }, { "epoch": 0.34, "grad_norm": 1.8368375301361084, "learning_rate": 3.2953703246207816e-05, "loss": 5.5535, "step": 6900 }, { "epoch": 0.34, "grad_norm": 1.6737070083618164, "learning_rate": 3.29438213350462e-05, "loss": 5.5282, "step": 6904 }, { "epoch": 0.34, "grad_norm": 1.5021297931671143, "learning_rate": 3.293393942388458e-05, "loss": 5.6176, "step": 6908 }, { "epoch": 0.34, "grad_norm": 1.836517572402954, "learning_rate": 3.292405751272296e-05, "loss": 5.6304, "step": 6912 }, { "epoch": 0.34, "grad_norm": 1.9427759647369385, "learning_rate": 3.2914175601561345e-05, "loss": 5.6843, "step": 6916 }, { "epoch": 0.34, "grad_norm": 2.1088333129882812, "learning_rate": 3.290429369039973e-05, "loss": 5.6674, "step": 6920 }, { "epoch": 0.34, "grad_norm": 1.9784494638442993, "learning_rate": 3.28944117792381e-05, "loss": 5.6965, "step": 6924 }, { "epoch": 0.34, "grad_norm": 1.666114091873169, "learning_rate": 3.2884529868076484e-05, "loss": 5.5627, "step": 6928 }, { "epoch": 0.34, "grad_norm": 1.7081410884857178, "learning_rate": 3.2874647956914866e-05, "loss": 5.7073, "step": 6932 }, { "epoch": 0.34, "grad_norm": 1.857577919960022, "learning_rate": 3.286476604575325e-05, "loss": 5.6804, "step": 6936 }, { "epoch": 0.34, "grad_norm": 1.952774167060852, "learning_rate": 3.285488413459163e-05, "loss": 5.6911, "step": 6940 }, { "epoch": 0.34, "grad_norm": 1.695110559463501, "learning_rate": 3.284500222343001e-05, "loss": 5.5463, "step": 6944 }, { "epoch": 0.34, "grad_norm": 1.7788817882537842, "learning_rate": 3.2835120312268394e-05, "loss": 5.7336, "step": 6948 }, { "epoch": 0.34, "grad_norm": 2.170694589614868, "learning_rate": 3.2825238401106776e-05, "loss": 5.6662, "step": 6952 }, { "epoch": 0.34, "grad_norm": 1.9008727073669434, "learning_rate": 3.281535648994516e-05, "loss": 5.6276, "step": 6956 }, { "epoch": 0.34, "grad_norm": 2.079054355621338, "learning_rate": 3.280547457878354e-05, "loss": 5.6058, "step": 6960 }, { "epoch": 0.34, "grad_norm": 1.6386624574661255, "learning_rate": 3.279559266762192e-05, "loss": 5.5782, "step": 6964 }, { "epoch": 0.34, "grad_norm": 1.9276303052902222, "learning_rate": 3.2785710756460305e-05, "loss": 5.6403, "step": 6968 }, { "epoch": 0.34, "grad_norm": 1.7339591979980469, "learning_rate": 3.277582884529869e-05, "loss": 5.6689, "step": 6972 }, { "epoch": 0.34, "grad_norm": 1.9973163604736328, "learning_rate": 3.276594693413706e-05, "loss": 5.6529, "step": 6976 }, { "epoch": 0.34, "grad_norm": 1.5551352500915527, "learning_rate": 3.2756065022975444e-05, "loss": 5.609, "step": 6980 }, { "epoch": 0.35, "grad_norm": 1.8106021881103516, "learning_rate": 3.2746183111813826e-05, "loss": 5.5107, "step": 6984 }, { "epoch": 0.35, "grad_norm": 1.6934738159179688, "learning_rate": 3.273630120065221e-05, "loss": 5.6064, "step": 6988 }, { "epoch": 0.35, "grad_norm": 1.6493241786956787, "learning_rate": 3.272641928949059e-05, "loss": 5.6641, "step": 6992 }, { "epoch": 0.35, "grad_norm": 1.817825436592102, "learning_rate": 3.271653737832897e-05, "loss": 5.5306, "step": 6996 }, { "epoch": 0.35, "grad_norm": 1.5873377323150635, "learning_rate": 3.2706655467167354e-05, "loss": 5.6717, "step": 7000 }, { "epoch": 0.35, "grad_norm": 1.584267497062683, "learning_rate": 3.2696773556005736e-05, "loss": 5.5343, "step": 7004 }, { "epoch": 0.35, "grad_norm": 1.52151620388031, "learning_rate": 3.268689164484411e-05, "loss": 5.612, "step": 7008 }, { "epoch": 0.35, "grad_norm": 1.8319940567016602, "learning_rate": 3.2677009733682494e-05, "loss": 5.611, "step": 7012 }, { "epoch": 0.35, "grad_norm": 1.779436469078064, "learning_rate": 3.2667127822520876e-05, "loss": 5.5905, "step": 7016 }, { "epoch": 0.35, "grad_norm": 1.5194777250289917, "learning_rate": 3.265724591135926e-05, "loss": 5.6275, "step": 7020 }, { "epoch": 0.35, "grad_norm": 1.6653060913085938, "learning_rate": 3.264736400019764e-05, "loss": 5.6553, "step": 7024 }, { "epoch": 0.35, "grad_norm": 1.6240752935409546, "learning_rate": 3.263748208903602e-05, "loss": 5.6956, "step": 7028 }, { "epoch": 0.35, "grad_norm": 1.6351805925369263, "learning_rate": 3.2627600177874404e-05, "loss": 5.6165, "step": 7032 }, { "epoch": 0.35, "grad_norm": 1.8824491500854492, "learning_rate": 3.261771826671278e-05, "loss": 5.5225, "step": 7036 }, { "epoch": 0.35, "grad_norm": 1.713706612586975, "learning_rate": 3.260783635555116e-05, "loss": 5.6428, "step": 7040 }, { "epoch": 0.35, "grad_norm": 2.0658748149871826, "learning_rate": 3.2597954444389543e-05, "loss": 5.6175, "step": 7044 }, { "epoch": 0.35, "grad_norm": 1.7719210386276245, "learning_rate": 3.2588072533227926e-05, "loss": 5.6819, "step": 7048 }, { "epoch": 0.35, "grad_norm": 2.1328206062316895, "learning_rate": 3.257819062206631e-05, "loss": 5.5427, "step": 7052 }, { "epoch": 0.35, "grad_norm": 1.5215253829956055, "learning_rate": 3.256830871090469e-05, "loss": 5.7679, "step": 7056 }, { "epoch": 0.35, "grad_norm": 2.001835584640503, "learning_rate": 3.255842679974307e-05, "loss": 5.748, "step": 7060 }, { "epoch": 0.35, "grad_norm": 1.5775867700576782, "learning_rate": 3.2548544888581454e-05, "loss": 5.5609, "step": 7064 }, { "epoch": 0.35, "grad_norm": 1.7867364883422852, "learning_rate": 3.2538662977419836e-05, "loss": 5.6232, "step": 7068 }, { "epoch": 0.35, "grad_norm": 2.010613203048706, "learning_rate": 3.252878106625822e-05, "loss": 5.6611, "step": 7072 }, { "epoch": 0.35, "grad_norm": 2.0896737575531006, "learning_rate": 3.25188991550966e-05, "loss": 5.6961, "step": 7076 }, { "epoch": 0.35, "grad_norm": 1.7435530424118042, "learning_rate": 3.250901724393498e-05, "loss": 5.816, "step": 7080 }, { "epoch": 0.35, "grad_norm": 2.452756643295288, "learning_rate": 3.2499135332773364e-05, "loss": 5.6278, "step": 7084 }, { "epoch": 0.35, "grad_norm": 1.5467629432678223, "learning_rate": 3.2489253421611746e-05, "loss": 5.7114, "step": 7088 }, { "epoch": 0.35, "grad_norm": 1.8225852251052856, "learning_rate": 3.247937151045012e-05, "loss": 5.5523, "step": 7092 }, { "epoch": 0.35, "grad_norm": 1.9562902450561523, "learning_rate": 3.2469489599288503e-05, "loss": 5.7284, "step": 7096 }, { "epoch": 0.35, "grad_norm": 2.379361152648926, "learning_rate": 3.2459607688126886e-05, "loss": 5.5628, "step": 7100 }, { "epoch": 0.35, "grad_norm": 1.9397469758987427, "learning_rate": 3.244972577696527e-05, "loss": 5.6596, "step": 7104 }, { "epoch": 0.35, "grad_norm": 1.7778300046920776, "learning_rate": 3.243984386580365e-05, "loss": 5.7102, "step": 7108 }, { "epoch": 0.35, "grad_norm": 2.671886920928955, "learning_rate": 3.242996195464203e-05, "loss": 5.7244, "step": 7112 }, { "epoch": 0.35, "grad_norm": 1.7230006456375122, "learning_rate": 3.2420080043480414e-05, "loss": 5.7147, "step": 7116 }, { "epoch": 0.35, "grad_norm": 1.751182198524475, "learning_rate": 3.241019813231879e-05, "loss": 5.7059, "step": 7120 }, { "epoch": 0.35, "grad_norm": 1.7081093788146973, "learning_rate": 3.240031622115717e-05, "loss": 5.5171, "step": 7124 }, { "epoch": 0.35, "grad_norm": 1.7573639154434204, "learning_rate": 3.239043430999555e-05, "loss": 5.6948, "step": 7128 }, { "epoch": 0.35, "grad_norm": 1.7177603244781494, "learning_rate": 3.2380552398833935e-05, "loss": 5.5412, "step": 7132 }, { "epoch": 0.35, "grad_norm": 1.8538342714309692, "learning_rate": 3.237067048767232e-05, "loss": 5.645, "step": 7136 }, { "epoch": 0.35, "grad_norm": 2.174427032470703, "learning_rate": 3.23607885765107e-05, "loss": 5.6742, "step": 7140 }, { "epoch": 0.35, "grad_norm": 2.461571455001831, "learning_rate": 3.235090666534908e-05, "loss": 5.6642, "step": 7144 }, { "epoch": 0.35, "grad_norm": 1.8825712203979492, "learning_rate": 3.234102475418746e-05, "loss": 5.6553, "step": 7148 }, { "epoch": 0.35, "grad_norm": 1.5318057537078857, "learning_rate": 3.233114284302584e-05, "loss": 5.6551, "step": 7152 }, { "epoch": 0.35, "grad_norm": 2.0473759174346924, "learning_rate": 3.232126093186422e-05, "loss": 5.6036, "step": 7156 }, { "epoch": 0.35, "grad_norm": 2.186314821243286, "learning_rate": 3.23113790207026e-05, "loss": 5.762, "step": 7160 }, { "epoch": 0.35, "grad_norm": 1.6777757406234741, "learning_rate": 3.2301497109540985e-05, "loss": 5.6549, "step": 7164 }, { "epoch": 0.35, "grad_norm": 1.951010823249817, "learning_rate": 3.229161519837937e-05, "loss": 5.7491, "step": 7168 }, { "epoch": 0.35, "grad_norm": 1.590847134590149, "learning_rate": 3.228173328721775e-05, "loss": 5.52, "step": 7172 }, { "epoch": 0.35, "grad_norm": 2.1618316173553467, "learning_rate": 3.227185137605613e-05, "loss": 5.5362, "step": 7176 }, { "epoch": 0.35, "grad_norm": 1.874100685119629, "learning_rate": 3.226196946489451e-05, "loss": 5.5656, "step": 7180 }, { "epoch": 0.35, "grad_norm": 1.7269055843353271, "learning_rate": 3.2252087553732895e-05, "loss": 5.5793, "step": 7184 }, { "epoch": 0.36, "grad_norm": 1.7733286619186401, "learning_rate": 3.224220564257128e-05, "loss": 5.6867, "step": 7188 }, { "epoch": 0.36, "grad_norm": 1.6358729600906372, "learning_rate": 3.223232373140966e-05, "loss": 5.8119, "step": 7192 }, { "epoch": 0.36, "grad_norm": 1.663542628288269, "learning_rate": 3.222244182024804e-05, "loss": 5.585, "step": 7196 }, { "epoch": 0.36, "grad_norm": 2.0216832160949707, "learning_rate": 3.2212559909086423e-05, "loss": 5.5331, "step": 7200 }, { "epoch": 0.36, "grad_norm": 1.9756247997283936, "learning_rate": 3.22026779979248e-05, "loss": 5.7117, "step": 7204 }, { "epoch": 0.36, "grad_norm": 2.0371248722076416, "learning_rate": 3.219279608676318e-05, "loss": 5.6412, "step": 7208 }, { "epoch": 0.36, "grad_norm": 1.8078463077545166, "learning_rate": 3.218291417560156e-05, "loss": 5.5826, "step": 7212 }, { "epoch": 0.36, "grad_norm": 1.6436288356781006, "learning_rate": 3.2173032264439945e-05, "loss": 5.5825, "step": 7216 }, { "epoch": 0.36, "grad_norm": 2.0301754474639893, "learning_rate": 3.216315035327833e-05, "loss": 5.7317, "step": 7220 }, { "epoch": 0.36, "grad_norm": 2.0184414386749268, "learning_rate": 3.215326844211671e-05, "loss": 5.5583, "step": 7224 }, { "epoch": 0.36, "grad_norm": 1.9053844213485718, "learning_rate": 3.214338653095509e-05, "loss": 5.6326, "step": 7228 }, { "epoch": 0.36, "grad_norm": 1.7837705612182617, "learning_rate": 3.2133504619793466e-05, "loss": 5.6659, "step": 7232 }, { "epoch": 0.36, "grad_norm": 2.1691172122955322, "learning_rate": 3.212362270863185e-05, "loss": 5.6534, "step": 7236 }, { "epoch": 0.36, "grad_norm": 1.6231029033660889, "learning_rate": 3.211374079747023e-05, "loss": 5.5752, "step": 7240 }, { "epoch": 0.36, "grad_norm": 1.6797888278961182, "learning_rate": 3.210385888630861e-05, "loss": 5.5594, "step": 7244 }, { "epoch": 0.36, "grad_norm": 2.0142295360565186, "learning_rate": 3.2093976975146995e-05, "loss": 5.5551, "step": 7248 }, { "epoch": 0.36, "grad_norm": 1.6823694705963135, "learning_rate": 3.208409506398538e-05, "loss": 5.5654, "step": 7252 }, { "epoch": 0.36, "grad_norm": 2.035011053085327, "learning_rate": 3.207421315282376e-05, "loss": 5.753, "step": 7256 }, { "epoch": 0.36, "grad_norm": 2.0539419651031494, "learning_rate": 3.206433124166214e-05, "loss": 5.481, "step": 7260 }, { "epoch": 0.36, "grad_norm": 1.8874220848083496, "learning_rate": 3.2054449330500516e-05, "loss": 5.5776, "step": 7264 }, { "epoch": 0.36, "grad_norm": 2.067695140838623, "learning_rate": 3.20445674193389e-05, "loss": 5.7252, "step": 7268 }, { "epoch": 0.36, "grad_norm": 1.9559959173202515, "learning_rate": 3.203468550817728e-05, "loss": 5.7496, "step": 7272 }, { "epoch": 0.36, "grad_norm": 1.7679195404052734, "learning_rate": 3.202480359701566e-05, "loss": 5.5574, "step": 7276 }, { "epoch": 0.36, "grad_norm": 1.7871371507644653, "learning_rate": 3.2014921685854044e-05, "loss": 5.611, "step": 7280 }, { "epoch": 0.36, "grad_norm": 2.027100086212158, "learning_rate": 3.2005039774692426e-05, "loss": 5.6949, "step": 7284 }, { "epoch": 0.36, "grad_norm": 2.3536198139190674, "learning_rate": 3.199515786353081e-05, "loss": 5.5937, "step": 7288 }, { "epoch": 0.36, "grad_norm": 1.9089040756225586, "learning_rate": 3.198527595236919e-05, "loss": 5.742, "step": 7292 }, { "epoch": 0.36, "grad_norm": 1.9175152778625488, "learning_rate": 3.197539404120757e-05, "loss": 5.7345, "step": 7296 }, { "epoch": 0.36, "grad_norm": 2.3115925788879395, "learning_rate": 3.1965512130045955e-05, "loss": 5.5774, "step": 7300 }, { "epoch": 0.36, "grad_norm": 1.8105931282043457, "learning_rate": 3.195563021888434e-05, "loss": 5.5753, "step": 7304 }, { "epoch": 0.36, "grad_norm": 2.1161036491394043, "learning_rate": 3.194574830772272e-05, "loss": 5.6975, "step": 7308 }, { "epoch": 0.36, "grad_norm": 1.7692967653274536, "learning_rate": 3.19358663965611e-05, "loss": 5.6869, "step": 7312 }, { "epoch": 0.36, "grad_norm": 1.7916195392608643, "learning_rate": 3.1925984485399476e-05, "loss": 5.7502, "step": 7316 }, { "epoch": 0.36, "grad_norm": 1.5649669170379639, "learning_rate": 3.191610257423786e-05, "loss": 5.6049, "step": 7320 }, { "epoch": 0.36, "grad_norm": 1.5286930799484253, "learning_rate": 3.190622066307624e-05, "loss": 5.5598, "step": 7324 }, { "epoch": 0.36, "grad_norm": 1.7816998958587646, "learning_rate": 3.189633875191462e-05, "loss": 5.5383, "step": 7328 }, { "epoch": 0.36, "grad_norm": 1.7937431335449219, "learning_rate": 3.1886456840753004e-05, "loss": 5.6448, "step": 7332 }, { "epoch": 0.36, "grad_norm": 1.801894187927246, "learning_rate": 3.1876574929591386e-05, "loss": 5.638, "step": 7336 }, { "epoch": 0.36, "grad_norm": 1.8095769882202148, "learning_rate": 3.186669301842977e-05, "loss": 5.7017, "step": 7340 }, { "epoch": 0.36, "grad_norm": 1.600150465965271, "learning_rate": 3.185681110726815e-05, "loss": 5.639, "step": 7344 }, { "epoch": 0.36, "grad_norm": 1.827073574066162, "learning_rate": 3.1846929196106526e-05, "loss": 5.6148, "step": 7348 }, { "epoch": 0.36, "grad_norm": 1.7427455186843872, "learning_rate": 3.183704728494491e-05, "loss": 5.6816, "step": 7352 }, { "epoch": 0.36, "grad_norm": 1.5865864753723145, "learning_rate": 3.182716537378329e-05, "loss": 5.6173, "step": 7356 }, { "epoch": 0.36, "grad_norm": 2.4280364513397217, "learning_rate": 3.181728346262167e-05, "loss": 5.7189, "step": 7360 }, { "epoch": 0.36, "grad_norm": 1.7864285707473755, "learning_rate": 3.1807401551460054e-05, "loss": 5.7009, "step": 7364 }, { "epoch": 0.36, "grad_norm": 1.9539587497711182, "learning_rate": 3.1797519640298436e-05, "loss": 5.6526, "step": 7368 }, { "epoch": 0.36, "grad_norm": 1.8817050457000732, "learning_rate": 3.178763772913682e-05, "loss": 5.5829, "step": 7372 }, { "epoch": 0.36, "grad_norm": 1.9259823560714722, "learning_rate": 3.1777755817975193e-05, "loss": 5.5862, "step": 7376 }, { "epoch": 0.36, "grad_norm": 1.8345321416854858, "learning_rate": 3.1767873906813576e-05, "loss": 5.689, "step": 7380 }, { "epoch": 0.36, "grad_norm": 1.9445204734802246, "learning_rate": 3.175799199565196e-05, "loss": 5.5377, "step": 7384 }, { "epoch": 0.37, "grad_norm": 1.9036486148834229, "learning_rate": 3.174811008449034e-05, "loss": 5.6994, "step": 7388 }, { "epoch": 0.37, "grad_norm": 1.7880631685256958, "learning_rate": 3.173822817332872e-05, "loss": 5.5478, "step": 7392 }, { "epoch": 0.37, "grad_norm": 1.7411099672317505, "learning_rate": 3.1728346262167104e-05, "loss": 5.6932, "step": 7396 }, { "epoch": 0.37, "grad_norm": 2.0032172203063965, "learning_rate": 3.1718464351005486e-05, "loss": 5.6742, "step": 7400 }, { "epoch": 0.37, "grad_norm": 1.963858723640442, "learning_rate": 3.170858243984387e-05, "loss": 5.6343, "step": 7404 }, { "epoch": 0.37, "grad_norm": 1.7364962100982666, "learning_rate": 3.169870052868225e-05, "loss": 5.6262, "step": 7408 }, { "epoch": 0.37, "grad_norm": 1.885438084602356, "learning_rate": 3.168881861752063e-05, "loss": 5.6402, "step": 7412 }, { "epoch": 0.37, "grad_norm": 1.7210749387741089, "learning_rate": 3.1678936706359014e-05, "loss": 5.6839, "step": 7416 }, { "epoch": 0.37, "grad_norm": 1.7860082387924194, "learning_rate": 3.1669054795197396e-05, "loss": 5.5595, "step": 7420 }, { "epoch": 0.37, "grad_norm": 1.878787636756897, "learning_rate": 3.165917288403578e-05, "loss": 5.6312, "step": 7424 }, { "epoch": 0.37, "grad_norm": 1.6183253526687622, "learning_rate": 3.164929097287416e-05, "loss": 5.6786, "step": 7428 }, { "epoch": 0.37, "grad_norm": 1.8205534219741821, "learning_rate": 3.1639409061712536e-05, "loss": 5.6006, "step": 7432 }, { "epoch": 0.37, "grad_norm": 1.9625846147537231, "learning_rate": 3.162952715055092e-05, "loss": 5.5243, "step": 7436 }, { "epoch": 0.37, "grad_norm": 1.967149257659912, "learning_rate": 3.16196452393893e-05, "loss": 5.599, "step": 7440 }, { "epoch": 0.37, "grad_norm": 2.0951106548309326, "learning_rate": 3.160976332822768e-05, "loss": 5.6613, "step": 7444 }, { "epoch": 0.37, "grad_norm": 1.8848724365234375, "learning_rate": 3.1599881417066064e-05, "loss": 5.7463, "step": 7448 }, { "epoch": 0.37, "grad_norm": 1.793150544166565, "learning_rate": 3.1589999505904446e-05, "loss": 5.7087, "step": 7452 }, { "epoch": 0.37, "grad_norm": 1.5212810039520264, "learning_rate": 3.158011759474283e-05, "loss": 5.6169, "step": 7456 }, { "epoch": 0.37, "grad_norm": 1.9898124933242798, "learning_rate": 3.15702356835812e-05, "loss": 5.5823, "step": 7460 }, { "epoch": 0.37, "grad_norm": 1.9081612825393677, "learning_rate": 3.1560353772419585e-05, "loss": 5.673, "step": 7464 }, { "epoch": 0.37, "grad_norm": 2.049330234527588, "learning_rate": 3.155047186125797e-05, "loss": 5.6275, "step": 7468 }, { "epoch": 0.37, "grad_norm": 1.874483346939087, "learning_rate": 3.154058995009635e-05, "loss": 5.7012, "step": 7472 }, { "epoch": 0.37, "grad_norm": 1.674302101135254, "learning_rate": 3.153070803893473e-05, "loss": 5.6507, "step": 7476 }, { "epoch": 0.37, "grad_norm": 2.1794581413269043, "learning_rate": 3.1520826127773113e-05, "loss": 5.6312, "step": 7480 }, { "epoch": 0.37, "grad_norm": 1.8025480508804321, "learning_rate": 3.1510944216611496e-05, "loss": 5.5184, "step": 7484 }, { "epoch": 0.37, "grad_norm": 1.8189462423324585, "learning_rate": 3.150106230544988e-05, "loss": 5.558, "step": 7488 }, { "epoch": 0.37, "grad_norm": 1.8474452495574951, "learning_rate": 3.149118039428825e-05, "loss": 5.5896, "step": 7492 }, { "epoch": 0.37, "grad_norm": 1.8259671926498413, "learning_rate": 3.1481298483126635e-05, "loss": 5.6092, "step": 7496 }, { "epoch": 0.37, "grad_norm": 1.675440788269043, "learning_rate": 3.147141657196502e-05, "loss": 5.5084, "step": 7500 }, { "epoch": 0.37, "grad_norm": 2.073160409927368, "learning_rate": 3.14615346608034e-05, "loss": 5.6257, "step": 7504 }, { "epoch": 0.37, "grad_norm": 1.9725701808929443, "learning_rate": 3.145165274964178e-05, "loss": 5.5622, "step": 7508 }, { "epoch": 0.37, "grad_norm": 2.097548007965088, "learning_rate": 3.144177083848016e-05, "loss": 5.5948, "step": 7512 }, { "epoch": 0.37, "grad_norm": 1.8091487884521484, "learning_rate": 3.1431888927318545e-05, "loss": 5.6698, "step": 7516 }, { "epoch": 0.37, "grad_norm": 1.8227310180664062, "learning_rate": 3.142200701615692e-05, "loss": 5.5319, "step": 7520 }, { "epoch": 0.37, "grad_norm": 1.9433192014694214, "learning_rate": 3.141212510499531e-05, "loss": 5.6118, "step": 7524 }, { "epoch": 0.37, "grad_norm": 1.7683522701263428, "learning_rate": 3.140224319383369e-05, "loss": 5.5843, "step": 7528 }, { "epoch": 0.37, "grad_norm": 1.8720110654830933, "learning_rate": 3.1392361282672073e-05, "loss": 5.626, "step": 7532 }, { "epoch": 0.37, "grad_norm": 1.7826591730117798, "learning_rate": 3.1382479371510456e-05, "loss": 5.6833, "step": 7536 }, { "epoch": 0.37, "grad_norm": 1.870161533355713, "learning_rate": 3.137259746034884e-05, "loss": 5.7177, "step": 7540 }, { "epoch": 0.37, "grad_norm": 2.076082706451416, "learning_rate": 3.136271554918721e-05, "loss": 5.563, "step": 7544 }, { "epoch": 0.37, "grad_norm": 1.7204762697219849, "learning_rate": 3.1352833638025595e-05, "loss": 5.5931, "step": 7548 }, { "epoch": 0.37, "grad_norm": 2.109579563140869, "learning_rate": 3.134295172686398e-05, "loss": 5.6927, "step": 7552 }, { "epoch": 0.37, "grad_norm": 1.7823752164840698, "learning_rate": 3.133306981570236e-05, "loss": 5.501, "step": 7556 }, { "epoch": 0.37, "grad_norm": 1.9205490350723267, "learning_rate": 3.132318790454074e-05, "loss": 5.5822, "step": 7560 }, { "epoch": 0.37, "grad_norm": 2.1453466415405273, "learning_rate": 3.131330599337912e-05, "loss": 5.6234, "step": 7564 }, { "epoch": 0.37, "grad_norm": 2.079413652420044, "learning_rate": 3.1303424082217505e-05, "loss": 5.5851, "step": 7568 }, { "epoch": 0.37, "grad_norm": 1.7375316619873047, "learning_rate": 3.129354217105589e-05, "loss": 5.51, "step": 7572 }, { "epoch": 0.37, "grad_norm": 1.7538070678710938, "learning_rate": 3.128366025989426e-05, "loss": 5.5565, "step": 7576 }, { "epoch": 0.37, "grad_norm": 1.8971534967422485, "learning_rate": 3.1273778348732645e-05, "loss": 5.6077, "step": 7580 }, { "epoch": 0.37, "grad_norm": 1.975240707397461, "learning_rate": 3.126389643757103e-05, "loss": 5.8038, "step": 7584 }, { "epoch": 0.37, "grad_norm": 1.9399492740631104, "learning_rate": 3.125401452640941e-05, "loss": 5.6554, "step": 7588 }, { "epoch": 0.38, "grad_norm": 2.004110336303711, "learning_rate": 3.124413261524779e-05, "loss": 5.6242, "step": 7592 }, { "epoch": 0.38, "grad_norm": 1.8853540420532227, "learning_rate": 3.123425070408617e-05, "loss": 5.7207, "step": 7596 }, { "epoch": 0.38, "grad_norm": 1.765731930732727, "learning_rate": 3.1224368792924555e-05, "loss": 5.5813, "step": 7600 }, { "epoch": 0.38, "grad_norm": 1.8963857889175415, "learning_rate": 3.121448688176293e-05, "loss": 5.6913, "step": 7604 }, { "epoch": 0.38, "grad_norm": 1.8706163167953491, "learning_rate": 3.120460497060131e-05, "loss": 5.5406, "step": 7608 }, { "epoch": 0.38, "grad_norm": 1.6852182149887085, "learning_rate": 3.1194723059439694e-05, "loss": 5.582, "step": 7612 }, { "epoch": 0.38, "grad_norm": 1.8383783102035522, "learning_rate": 3.1184841148278076e-05, "loss": 5.664, "step": 7616 }, { "epoch": 0.38, "grad_norm": 1.8068432807922363, "learning_rate": 3.117495923711646e-05, "loss": 5.6969, "step": 7620 }, { "epoch": 0.38, "grad_norm": 2.020507335662842, "learning_rate": 3.116507732595484e-05, "loss": 5.5544, "step": 7624 }, { "epoch": 0.38, "grad_norm": 1.7080477476119995, "learning_rate": 3.115519541479322e-05, "loss": 5.7007, "step": 7628 }, { "epoch": 0.38, "grad_norm": 1.6896196603775024, "learning_rate": 3.11453135036316e-05, "loss": 5.5952, "step": 7632 }, { "epoch": 0.38, "grad_norm": 2.0468990802764893, "learning_rate": 3.113543159246998e-05, "loss": 5.735, "step": 7636 }, { "epoch": 0.38, "grad_norm": 1.9977657794952393, "learning_rate": 3.112554968130837e-05, "loss": 5.7005, "step": 7640 }, { "epoch": 0.38, "grad_norm": 2.3371498584747314, "learning_rate": 3.111566777014675e-05, "loss": 5.6963, "step": 7644 }, { "epoch": 0.38, "grad_norm": 1.8989230394363403, "learning_rate": 3.110578585898513e-05, "loss": 5.5717, "step": 7648 }, { "epoch": 0.38, "grad_norm": 2.1724061965942383, "learning_rate": 3.1095903947823515e-05, "loss": 5.5531, "step": 7652 }, { "epoch": 0.38, "grad_norm": 1.7669955492019653, "learning_rate": 3.10860220366619e-05, "loss": 5.6975, "step": 7656 }, { "epoch": 0.38, "grad_norm": 2.1776323318481445, "learning_rate": 3.107614012550027e-05, "loss": 5.7217, "step": 7660 }, { "epoch": 0.38, "grad_norm": 1.5925564765930176, "learning_rate": 3.1066258214338654e-05, "loss": 5.6475, "step": 7664 }, { "epoch": 0.38, "grad_norm": 1.862136721611023, "learning_rate": 3.1056376303177036e-05, "loss": 5.6022, "step": 7668 }, { "epoch": 0.38, "grad_norm": 1.893431305885315, "learning_rate": 3.104649439201542e-05, "loss": 5.5991, "step": 7672 }, { "epoch": 0.38, "grad_norm": 1.9323973655700684, "learning_rate": 3.10366124808538e-05, "loss": 5.737, "step": 7676 }, { "epoch": 0.38, "grad_norm": 1.630057454109192, "learning_rate": 3.102673056969218e-05, "loss": 5.5885, "step": 7680 }, { "epoch": 0.38, "grad_norm": 2.0866503715515137, "learning_rate": 3.1016848658530565e-05, "loss": 5.5873, "step": 7684 }, { "epoch": 0.38, "grad_norm": 1.6265943050384521, "learning_rate": 3.100696674736894e-05, "loss": 5.6085, "step": 7688 }, { "epoch": 0.38, "grad_norm": 2.116450309753418, "learning_rate": 3.099708483620732e-05, "loss": 5.6814, "step": 7692 }, { "epoch": 0.38, "grad_norm": 1.8399298191070557, "learning_rate": 3.0987202925045704e-05, "loss": 5.6584, "step": 7696 }, { "epoch": 0.38, "grad_norm": 1.8756877183914185, "learning_rate": 3.0977321013884086e-05, "loss": 5.7231, "step": 7700 }, { "epoch": 0.38, "grad_norm": 1.9492945671081543, "learning_rate": 3.096743910272247e-05, "loss": 5.6126, "step": 7704 }, { "epoch": 0.38, "grad_norm": 1.8977458477020264, "learning_rate": 3.095755719156085e-05, "loss": 5.4845, "step": 7708 }, { "epoch": 0.38, "grad_norm": 1.920137643814087, "learning_rate": 3.094767528039923e-05, "loss": 5.6911, "step": 7712 }, { "epoch": 0.38, "grad_norm": 1.9091439247131348, "learning_rate": 3.093779336923761e-05, "loss": 5.7215, "step": 7716 }, { "epoch": 0.38, "grad_norm": 1.9468705654144287, "learning_rate": 3.092791145807599e-05, "loss": 5.5602, "step": 7720 }, { "epoch": 0.38, "grad_norm": 2.171674966812134, "learning_rate": 3.091802954691437e-05, "loss": 5.6432, "step": 7724 }, { "epoch": 0.38, "grad_norm": 1.7514827251434326, "learning_rate": 3.0908147635752754e-05, "loss": 5.7022, "step": 7728 }, { "epoch": 0.38, "grad_norm": 1.6173917055130005, "learning_rate": 3.0898265724591136e-05, "loss": 5.6705, "step": 7732 }, { "epoch": 0.38, "grad_norm": 1.7290568351745605, "learning_rate": 3.088838381342952e-05, "loss": 5.5759, "step": 7736 }, { "epoch": 0.38, "grad_norm": 1.6534110307693481, "learning_rate": 3.08785019022679e-05, "loss": 5.7897, "step": 7740 }, { "epoch": 0.38, "grad_norm": 2.1814112663269043, "learning_rate": 3.086861999110628e-05, "loss": 5.6196, "step": 7744 }, { "epoch": 0.38, "grad_norm": 1.8534756898880005, "learning_rate": 3.085873807994466e-05, "loss": 5.5227, "step": 7748 }, { "epoch": 0.38, "grad_norm": 1.9489648342132568, "learning_rate": 3.0848856168783046e-05, "loss": 5.647, "step": 7752 }, { "epoch": 0.38, "grad_norm": 1.976659893989563, "learning_rate": 3.083897425762143e-05, "loss": 5.5386, "step": 7756 }, { "epoch": 0.38, "grad_norm": 2.00985050201416, "learning_rate": 3.082909234645981e-05, "loss": 5.723, "step": 7760 }, { "epoch": 0.38, "grad_norm": 1.8619848489761353, "learning_rate": 3.081921043529819e-05, "loss": 5.7385, "step": 7764 }, { "epoch": 0.38, "grad_norm": 1.6431546211242676, "learning_rate": 3.0809328524136574e-05, "loss": 5.5918, "step": 7768 }, { "epoch": 0.38, "grad_norm": 1.8953239917755127, "learning_rate": 3.079944661297495e-05, "loss": 5.6693, "step": 7772 }, { "epoch": 0.38, "grad_norm": 2.1630382537841797, "learning_rate": 3.078956470181333e-05, "loss": 5.5721, "step": 7776 }, { "epoch": 0.38, "grad_norm": 1.921631932258606, "learning_rate": 3.0779682790651714e-05, "loss": 5.7353, "step": 7780 }, { "epoch": 0.38, "grad_norm": 1.9565848112106323, "learning_rate": 3.0769800879490096e-05, "loss": 5.6735, "step": 7784 }, { "epoch": 0.38, "grad_norm": 2.029181718826294, "learning_rate": 3.075991896832848e-05, "loss": 5.5432, "step": 7788 }, { "epoch": 0.38, "grad_norm": 1.7901182174682617, "learning_rate": 3.075003705716686e-05, "loss": 5.6048, "step": 7792 }, { "epoch": 0.39, "grad_norm": 1.7717857360839844, "learning_rate": 3.074015514600524e-05, "loss": 5.5689, "step": 7796 }, { "epoch": 0.39, "grad_norm": 1.8414852619171143, "learning_rate": 3.073027323484362e-05, "loss": 5.5628, "step": 7800 }, { "epoch": 0.39, "grad_norm": 2.446169376373291, "learning_rate": 3.0720391323682e-05, "loss": 5.6246, "step": 7804 }, { "epoch": 0.39, "grad_norm": 1.7307089567184448, "learning_rate": 3.071050941252038e-05, "loss": 5.6322, "step": 7808 }, { "epoch": 0.39, "grad_norm": 2.0720410346984863, "learning_rate": 3.0700627501358764e-05, "loss": 5.6957, "step": 7812 }, { "epoch": 0.39, "grad_norm": 1.8278499841690063, "learning_rate": 3.0690745590197146e-05, "loss": 5.6488, "step": 7816 }, { "epoch": 0.39, "grad_norm": 1.9019477367401123, "learning_rate": 3.068086367903553e-05, "loss": 5.671, "step": 7820 }, { "epoch": 0.39, "grad_norm": 2.330875873565674, "learning_rate": 3.067098176787391e-05, "loss": 5.7233, "step": 7824 }, { "epoch": 0.39, "grad_norm": 1.958566427230835, "learning_rate": 3.066109985671229e-05, "loss": 5.5485, "step": 7828 }, { "epoch": 0.39, "grad_norm": 1.9183526039123535, "learning_rate": 3.065121794555067e-05, "loss": 5.627, "step": 7832 }, { "epoch": 0.39, "grad_norm": 2.0147671699523926, "learning_rate": 3.064133603438905e-05, "loss": 5.5493, "step": 7836 }, { "epoch": 0.39, "grad_norm": 1.8255884647369385, "learning_rate": 3.063145412322743e-05, "loss": 5.618, "step": 7840 }, { "epoch": 0.39, "grad_norm": 2.188514471054077, "learning_rate": 3.062157221206581e-05, "loss": 5.6623, "step": 7844 }, { "epoch": 0.39, "grad_norm": 1.7055175304412842, "learning_rate": 3.0611690300904195e-05, "loss": 5.5607, "step": 7848 }, { "epoch": 0.39, "grad_norm": 1.6150466203689575, "learning_rate": 3.060180838974258e-05, "loss": 5.5892, "step": 7852 }, { "epoch": 0.39, "grad_norm": 1.7597399950027466, "learning_rate": 3.059192647858096e-05, "loss": 5.6028, "step": 7856 }, { "epoch": 0.39, "grad_norm": 2.0006275177001953, "learning_rate": 3.0582044567419335e-05, "loss": 5.5031, "step": 7860 }, { "epoch": 0.39, "grad_norm": 1.8812612295150757, "learning_rate": 3.057216265625772e-05, "loss": 5.5484, "step": 7864 }, { "epoch": 0.39, "grad_norm": 1.7753983736038208, "learning_rate": 3.0562280745096106e-05, "loss": 5.661, "step": 7868 }, { "epoch": 0.39, "grad_norm": 1.891542673110962, "learning_rate": 3.055239883393449e-05, "loss": 5.5273, "step": 7872 }, { "epoch": 0.39, "grad_norm": 2.026078939437866, "learning_rate": 3.054251692277287e-05, "loss": 5.5718, "step": 7876 }, { "epoch": 0.39, "grad_norm": 2.0182738304138184, "learning_rate": 3.053263501161125e-05, "loss": 5.694, "step": 7880 }, { "epoch": 0.39, "grad_norm": 1.9878616333007812, "learning_rate": 3.052275310044963e-05, "loss": 5.6561, "step": 7884 }, { "epoch": 0.39, "grad_norm": 1.9343925714492798, "learning_rate": 3.0512871189288012e-05, "loss": 5.6317, "step": 7888 }, { "epoch": 0.39, "grad_norm": 1.8456355333328247, "learning_rate": 3.050298927812639e-05, "loss": 5.5619, "step": 7892 }, { "epoch": 0.39, "grad_norm": 1.6567593812942505, "learning_rate": 3.0493107366964773e-05, "loss": 5.5108, "step": 7896 }, { "epoch": 0.39, "grad_norm": 1.7682963609695435, "learning_rate": 3.0483225455803155e-05, "loss": 5.8091, "step": 7900 }, { "epoch": 0.39, "grad_norm": 1.653235912322998, "learning_rate": 3.0473343544641537e-05, "loss": 5.5357, "step": 7904 }, { "epoch": 0.39, "grad_norm": 1.7630318403244019, "learning_rate": 3.0463461633479916e-05, "loss": 5.6776, "step": 7908 }, { "epoch": 0.39, "grad_norm": 2.0381624698638916, "learning_rate": 3.0453579722318298e-05, "loss": 5.6855, "step": 7912 }, { "epoch": 0.39, "grad_norm": 1.6931108236312866, "learning_rate": 3.044369781115668e-05, "loss": 5.6519, "step": 7916 }, { "epoch": 0.39, "grad_norm": 1.7193225622177124, "learning_rate": 3.0433815899995062e-05, "loss": 5.5361, "step": 7920 }, { "epoch": 0.39, "grad_norm": 1.7203538417816162, "learning_rate": 3.042393398883344e-05, "loss": 5.5749, "step": 7924 }, { "epoch": 0.39, "grad_norm": 2.0116119384765625, "learning_rate": 3.0414052077671823e-05, "loss": 5.478, "step": 7928 }, { "epoch": 0.39, "grad_norm": 1.5771147012710571, "learning_rate": 3.0404170166510205e-05, "loss": 5.6352, "step": 7932 }, { "epoch": 0.39, "grad_norm": 1.8036284446716309, "learning_rate": 3.0394288255348584e-05, "loss": 5.5748, "step": 7936 }, { "epoch": 0.39, "grad_norm": 1.650652289390564, "learning_rate": 3.0384406344186966e-05, "loss": 5.6925, "step": 7940 }, { "epoch": 0.39, "grad_norm": 1.871989369392395, "learning_rate": 3.0374524433025348e-05, "loss": 5.4965, "step": 7944 }, { "epoch": 0.39, "grad_norm": 1.7644288539886475, "learning_rate": 3.036464252186373e-05, "loss": 5.521, "step": 7948 }, { "epoch": 0.39, "grad_norm": 1.7584574222564697, "learning_rate": 3.035476061070211e-05, "loss": 5.4997, "step": 7952 }, { "epoch": 0.39, "grad_norm": 1.6933388710021973, "learning_rate": 3.034487869954049e-05, "loss": 5.6415, "step": 7956 }, { "epoch": 0.39, "grad_norm": 1.6390060186386108, "learning_rate": 3.0334996788378873e-05, "loss": 5.6538, "step": 7960 }, { "epoch": 0.39, "grad_norm": 1.8131755590438843, "learning_rate": 3.0325114877217255e-05, "loss": 5.6138, "step": 7964 }, { "epoch": 0.39, "grad_norm": 2.0270628929138184, "learning_rate": 3.0315232966055633e-05, "loss": 5.5461, "step": 7968 }, { "epoch": 0.39, "grad_norm": 1.8326473236083984, "learning_rate": 3.0305351054894015e-05, "loss": 5.5499, "step": 7972 }, { "epoch": 0.39, "grad_norm": 1.9747962951660156, "learning_rate": 3.0295469143732397e-05, "loss": 5.5334, "step": 7976 }, { "epoch": 0.39, "grad_norm": 1.95182204246521, "learning_rate": 3.028558723257078e-05, "loss": 5.5317, "step": 7980 }, { "epoch": 0.39, "grad_norm": 2.177966356277466, "learning_rate": 3.0275705321409165e-05, "loss": 5.5488, "step": 7984 }, { "epoch": 0.39, "grad_norm": 2.0180881023406982, "learning_rate": 3.0265823410247547e-05, "loss": 5.5304, "step": 7988 }, { "epoch": 0.39, "grad_norm": 1.7427732944488525, "learning_rate": 3.0255941499085926e-05, "loss": 5.6641, "step": 7992 }, { "epoch": 0.4, "grad_norm": 1.7346999645233154, "learning_rate": 3.0246059587924308e-05, "loss": 5.6368, "step": 7996 }, { "epoch": 0.4, "grad_norm": 2.121185302734375, "learning_rate": 3.023617767676269e-05, "loss": 5.5946, "step": 8000 }, { "epoch": 0.4, "grad_norm": 1.9402865171432495, "learning_rate": 3.0226295765601072e-05, "loss": 5.679, "step": 8004 }, { "epoch": 0.4, "grad_norm": 2.0164012908935547, "learning_rate": 3.021641385443945e-05, "loss": 5.5902, "step": 8008 }, { "epoch": 0.4, "grad_norm": 1.8733190298080444, "learning_rate": 3.0209002421068237e-05, "loss": 5.532, "step": 8012 }, { "epoch": 0.4, "grad_norm": 1.7538553476333618, "learning_rate": 3.019912050990662e-05, "loss": 5.6426, "step": 8016 }, { "epoch": 0.4, "grad_norm": 1.886910319328308, "learning_rate": 3.0189238598745e-05, "loss": 5.5693, "step": 8020 }, { "epoch": 0.4, "grad_norm": 1.9799367189407349, "learning_rate": 3.0179356687583383e-05, "loss": 5.7278, "step": 8024 }, { "epoch": 0.4, "grad_norm": 1.9877070188522339, "learning_rate": 3.0169474776421762e-05, "loss": 5.5553, "step": 8028 }, { "epoch": 0.4, "grad_norm": 2.2819747924804688, "learning_rate": 3.0159592865260144e-05, "loss": 5.5856, "step": 8032 }, { "epoch": 0.4, "grad_norm": 1.671416997909546, "learning_rate": 3.0149710954098526e-05, "loss": 5.588, "step": 8036 }, { "epoch": 0.4, "grad_norm": 1.724274754524231, "learning_rate": 3.0139829042936908e-05, "loss": 5.5928, "step": 8040 }, { "epoch": 0.4, "grad_norm": 1.6406031847000122, "learning_rate": 3.0129947131775287e-05, "loss": 5.6456, "step": 8044 }, { "epoch": 0.4, "grad_norm": 1.9587162733078003, "learning_rate": 3.012006522061367e-05, "loss": 5.6093, "step": 8048 }, { "epoch": 0.4, "grad_norm": 1.987953543663025, "learning_rate": 3.011018330945205e-05, "loss": 5.8002, "step": 8052 }, { "epoch": 0.4, "grad_norm": 1.800836443901062, "learning_rate": 3.0100301398290433e-05, "loss": 5.7093, "step": 8056 }, { "epoch": 0.4, "grad_norm": 1.9985570907592773, "learning_rate": 3.009041948712881e-05, "loss": 5.6896, "step": 8060 }, { "epoch": 0.4, "grad_norm": 2.0569660663604736, "learning_rate": 3.0080537575967194e-05, "loss": 5.7986, "step": 8064 }, { "epoch": 0.4, "grad_norm": 1.8365398645401, "learning_rate": 3.0070655664805576e-05, "loss": 5.4791, "step": 8068 }, { "epoch": 0.4, "grad_norm": 1.890994668006897, "learning_rate": 3.0060773753643954e-05, "loss": 5.5919, "step": 8072 }, { "epoch": 0.4, "grad_norm": 1.8914673328399658, "learning_rate": 3.0050891842482337e-05, "loss": 5.6298, "step": 8076 }, { "epoch": 0.4, "grad_norm": 2.059929132461548, "learning_rate": 3.004100993132072e-05, "loss": 5.589, "step": 8080 }, { "epoch": 0.4, "grad_norm": 1.552819013595581, "learning_rate": 3.00311280201591e-05, "loss": 5.6104, "step": 8084 }, { "epoch": 0.4, "grad_norm": 2.081338882446289, "learning_rate": 3.002124610899748e-05, "loss": 5.6061, "step": 8088 }, { "epoch": 0.4, "grad_norm": 1.625524640083313, "learning_rate": 3.001136419783586e-05, "loss": 5.6501, "step": 8092 }, { "epoch": 0.4, "grad_norm": 1.818798303604126, "learning_rate": 3.0001482286674243e-05, "loss": 5.6345, "step": 8096 }, { "epoch": 0.4, "grad_norm": 1.9189039468765259, "learning_rate": 2.9991600375512626e-05, "loss": 5.6594, "step": 8100 }, { "epoch": 0.4, "grad_norm": 1.9323093891143799, "learning_rate": 2.9981718464351004e-05, "loss": 5.6704, "step": 8104 }, { "epoch": 0.4, "grad_norm": 1.9228005409240723, "learning_rate": 2.9971836553189386e-05, "loss": 5.7182, "step": 8108 }, { "epoch": 0.4, "grad_norm": 1.8709877729415894, "learning_rate": 2.9961954642027768e-05, "loss": 5.6675, "step": 8112 }, { "epoch": 0.4, "grad_norm": 2.015131711959839, "learning_rate": 2.9952072730866147e-05, "loss": 5.5459, "step": 8116 }, { "epoch": 0.4, "grad_norm": 2.3515000343322754, "learning_rate": 2.994219081970453e-05, "loss": 5.6607, "step": 8120 }, { "epoch": 0.4, "grad_norm": 1.6969714164733887, "learning_rate": 2.993230890854291e-05, "loss": 5.5014, "step": 8124 }, { "epoch": 0.4, "grad_norm": 1.8755816221237183, "learning_rate": 2.9922426997381297e-05, "loss": 5.6539, "step": 8128 }, { "epoch": 0.4, "grad_norm": 1.724385380744934, "learning_rate": 2.991254508621968e-05, "loss": 5.6172, "step": 8132 }, { "epoch": 0.4, "grad_norm": 1.9022701978683472, "learning_rate": 2.990266317505806e-05, "loss": 5.6263, "step": 8136 }, { "epoch": 0.4, "grad_norm": 1.7855650186538696, "learning_rate": 2.9892781263896443e-05, "loss": 5.558, "step": 8140 }, { "epoch": 0.4, "grad_norm": 1.6109340190887451, "learning_rate": 2.988289935273482e-05, "loss": 5.5173, "step": 8144 }, { "epoch": 0.4, "grad_norm": 1.7251406908035278, "learning_rate": 2.9873017441573203e-05, "loss": 5.5703, "step": 8148 }, { "epoch": 0.4, "grad_norm": 1.8107340335845947, "learning_rate": 2.9863135530411586e-05, "loss": 5.6168, "step": 8152 }, { "epoch": 0.4, "grad_norm": 1.5449600219726562, "learning_rate": 2.9853253619249964e-05, "loss": 5.4385, "step": 8156 }, { "epoch": 0.4, "grad_norm": 1.950967788696289, "learning_rate": 2.9843371708088346e-05, "loss": 5.6621, "step": 8160 }, { "epoch": 0.4, "grad_norm": 2.0183985233306885, "learning_rate": 2.983348979692673e-05, "loss": 5.5622, "step": 8164 }, { "epoch": 0.4, "grad_norm": 2.118438243865967, "learning_rate": 2.982360788576511e-05, "loss": 5.7116, "step": 8168 }, { "epoch": 0.4, "grad_norm": 2.065899610519409, "learning_rate": 2.981372597460349e-05, "loss": 5.7335, "step": 8172 }, { "epoch": 0.4, "grad_norm": 2.075225830078125, "learning_rate": 2.980384406344187e-05, "loss": 5.4978, "step": 8176 }, { "epoch": 0.4, "grad_norm": 1.7782163619995117, "learning_rate": 2.9793962152280253e-05, "loss": 5.6495, "step": 8180 }, { "epoch": 0.4, "grad_norm": 1.8186616897583008, "learning_rate": 2.9784080241118635e-05, "loss": 5.5176, "step": 8184 }, { "epoch": 0.4, "grad_norm": 1.926069736480713, "learning_rate": 2.9774198329957014e-05, "loss": 5.7383, "step": 8188 }, { "epoch": 0.4, "grad_norm": 1.8002229928970337, "learning_rate": 2.9764316418795396e-05, "loss": 5.6145, "step": 8192 }, { "epoch": 0.4, "grad_norm": 1.5681378841400146, "learning_rate": 2.9754434507633778e-05, "loss": 5.634, "step": 8196 }, { "epoch": 0.41, "grad_norm": 2.045126438140869, "learning_rate": 2.9744552596472157e-05, "loss": 5.5229, "step": 8200 }, { "epoch": 0.41, "grad_norm": 2.116232395172119, "learning_rate": 2.973467068531054e-05, "loss": 5.6309, "step": 8204 }, { "epoch": 0.41, "grad_norm": 1.5766971111297607, "learning_rate": 2.972478877414892e-05, "loss": 5.5052, "step": 8208 }, { "epoch": 0.41, "grad_norm": 1.6305807828903198, "learning_rate": 2.9714906862987303e-05, "loss": 5.5024, "step": 8212 }, { "epoch": 0.41, "grad_norm": 1.7745639085769653, "learning_rate": 2.970502495182568e-05, "loss": 5.6619, "step": 8216 }, { "epoch": 0.41, "grad_norm": 1.708950400352478, "learning_rate": 2.9695143040664064e-05, "loss": 5.634, "step": 8220 }, { "epoch": 0.41, "grad_norm": 1.8508306741714478, "learning_rate": 2.9685261129502446e-05, "loss": 5.7091, "step": 8224 }, { "epoch": 0.41, "grad_norm": 1.9124394655227661, "learning_rate": 2.9675379218340828e-05, "loss": 5.6441, "step": 8228 }, { "epoch": 0.41, "grad_norm": 1.735961675643921, "learning_rate": 2.9665497307179206e-05, "loss": 5.5941, "step": 8232 }, { "epoch": 0.41, "grad_norm": 2.0475118160247803, "learning_rate": 2.965561539601759e-05, "loss": 5.5674, "step": 8236 }, { "epoch": 0.41, "grad_norm": 1.8113858699798584, "learning_rate": 2.964573348485597e-05, "loss": 5.6697, "step": 8240 }, { "epoch": 0.41, "grad_norm": 1.9009729623794556, "learning_rate": 2.9635851573694356e-05, "loss": 5.5668, "step": 8244 }, { "epoch": 0.41, "grad_norm": 1.8220778703689575, "learning_rate": 2.9625969662532738e-05, "loss": 5.6735, "step": 8248 }, { "epoch": 0.41, "grad_norm": 1.7952344417572021, "learning_rate": 2.961608775137112e-05, "loss": 5.5436, "step": 8252 }, { "epoch": 0.41, "grad_norm": 1.8756263256072998, "learning_rate": 2.96062058402095e-05, "loss": 5.5825, "step": 8256 }, { "epoch": 0.41, "grad_norm": 1.8858979940414429, "learning_rate": 2.959632392904788e-05, "loss": 5.6039, "step": 8260 }, { "epoch": 0.41, "grad_norm": 1.780319333076477, "learning_rate": 2.9586442017886263e-05, "loss": 5.6614, "step": 8264 }, { "epoch": 0.41, "grad_norm": 2.561244487762451, "learning_rate": 2.9576560106724645e-05, "loss": 5.6054, "step": 8268 }, { "epoch": 0.41, "grad_norm": 1.9080662727355957, "learning_rate": 2.9566678195563024e-05, "loss": 5.5438, "step": 8272 }, { "epoch": 0.41, "grad_norm": 1.8043930530548096, "learning_rate": 2.9556796284401406e-05, "loss": 5.5615, "step": 8276 }, { "epoch": 0.41, "grad_norm": 1.8665918111801147, "learning_rate": 2.9546914373239788e-05, "loss": 5.6224, "step": 8280 }, { "epoch": 0.41, "grad_norm": 1.7628755569458008, "learning_rate": 2.9537032462078166e-05, "loss": 5.648, "step": 8284 }, { "epoch": 0.41, "grad_norm": 1.5806515216827393, "learning_rate": 2.952715055091655e-05, "loss": 5.6113, "step": 8288 }, { "epoch": 0.41, "grad_norm": 1.7797776460647583, "learning_rate": 2.951726863975493e-05, "loss": 5.6718, "step": 8292 }, { "epoch": 0.41, "grad_norm": 1.85147225856781, "learning_rate": 2.9507386728593313e-05, "loss": 5.571, "step": 8296 }, { "epoch": 0.41, "grad_norm": 1.8725066184997559, "learning_rate": 2.949750481743169e-05, "loss": 5.6677, "step": 8300 }, { "epoch": 0.41, "grad_norm": 1.6381113529205322, "learning_rate": 2.9487622906270073e-05, "loss": 5.5314, "step": 8304 }, { "epoch": 0.41, "grad_norm": 1.7837311029434204, "learning_rate": 2.9477740995108455e-05, "loss": 5.6066, "step": 8308 }, { "epoch": 0.41, "grad_norm": 1.7460732460021973, "learning_rate": 2.9467859083946837e-05, "loss": 5.5304, "step": 8312 }, { "epoch": 0.41, "grad_norm": 1.7592207193374634, "learning_rate": 2.9457977172785216e-05, "loss": 5.5576, "step": 8316 }, { "epoch": 0.41, "grad_norm": 1.7295989990234375, "learning_rate": 2.9448095261623598e-05, "loss": 5.6019, "step": 8320 }, { "epoch": 0.41, "grad_norm": 2.4635212421417236, "learning_rate": 2.943821335046198e-05, "loss": 5.6384, "step": 8324 }, { "epoch": 0.41, "grad_norm": 1.829713225364685, "learning_rate": 2.9428331439300362e-05, "loss": 5.6704, "step": 8328 }, { "epoch": 0.41, "grad_norm": 2.121614694595337, "learning_rate": 2.941844952813874e-05, "loss": 5.6339, "step": 8332 }, { "epoch": 0.41, "grad_norm": 1.783610224723816, "learning_rate": 2.9408567616977123e-05, "loss": 5.6805, "step": 8336 }, { "epoch": 0.41, "grad_norm": 2.0255215167999268, "learning_rate": 2.9398685705815505e-05, "loss": 5.667, "step": 8340 }, { "epoch": 0.41, "grad_norm": 1.8871128559112549, "learning_rate": 2.9388803794653884e-05, "loss": 5.6883, "step": 8344 }, { "epoch": 0.41, "grad_norm": 1.9648714065551758, "learning_rate": 2.9378921883492266e-05, "loss": 5.6178, "step": 8348 }, { "epoch": 0.41, "grad_norm": 1.8247488737106323, "learning_rate": 2.9369039972330648e-05, "loss": 5.675, "step": 8352 }, { "epoch": 0.41, "grad_norm": 1.8563957214355469, "learning_rate": 2.9359158061169033e-05, "loss": 5.6308, "step": 8356 }, { "epoch": 0.41, "grad_norm": 1.8070034980773926, "learning_rate": 2.9349276150007415e-05, "loss": 5.6328, "step": 8360 }, { "epoch": 0.41, "grad_norm": 1.8198412656784058, "learning_rate": 2.9339394238845797e-05, "loss": 5.6216, "step": 8364 }, { "epoch": 0.41, "grad_norm": 1.7104226350784302, "learning_rate": 2.9329512327684176e-05, "loss": 5.6402, "step": 8368 }, { "epoch": 0.41, "grad_norm": 1.6858009099960327, "learning_rate": 2.9319630416522558e-05, "loss": 5.6666, "step": 8372 }, { "epoch": 0.41, "grad_norm": 1.978938102722168, "learning_rate": 2.930974850536094e-05, "loss": 5.4985, "step": 8376 }, { "epoch": 0.41, "grad_norm": 1.6124422550201416, "learning_rate": 2.9299866594199322e-05, "loss": 5.478, "step": 8380 }, { "epoch": 0.41, "grad_norm": 2.06709623336792, "learning_rate": 2.92899846830377e-05, "loss": 5.5993, "step": 8384 }, { "epoch": 0.41, "grad_norm": 2.138789415359497, "learning_rate": 2.9280102771876083e-05, "loss": 5.6381, "step": 8388 }, { "epoch": 0.41, "grad_norm": 1.7755143642425537, "learning_rate": 2.9270220860714465e-05, "loss": 5.6309, "step": 8392 }, { "epoch": 0.41, "grad_norm": 2.0836946964263916, "learning_rate": 2.9260338949552847e-05, "loss": 5.5439, "step": 8396 }, { "epoch": 0.42, "grad_norm": 1.8185824155807495, "learning_rate": 2.9250457038391226e-05, "loss": 5.5819, "step": 8400 }, { "epoch": 0.42, "grad_norm": 1.9818052053451538, "learning_rate": 2.9240575127229608e-05, "loss": 5.6349, "step": 8404 }, { "epoch": 0.42, "grad_norm": 2.246522903442383, "learning_rate": 2.923069321606799e-05, "loss": 5.6007, "step": 8408 }, { "epoch": 0.42, "grad_norm": 1.9953023195266724, "learning_rate": 2.922081130490637e-05, "loss": 5.643, "step": 8412 }, { "epoch": 0.42, "grad_norm": 1.9882394075393677, "learning_rate": 2.921092939374475e-05, "loss": 5.6082, "step": 8416 }, { "epoch": 0.42, "grad_norm": 1.7919508218765259, "learning_rate": 2.9201047482583133e-05, "loss": 5.6575, "step": 8420 }, { "epoch": 0.42, "grad_norm": 1.6816109418869019, "learning_rate": 2.9191165571421515e-05, "loss": 5.6128, "step": 8424 }, { "epoch": 0.42, "grad_norm": 1.9026116132736206, "learning_rate": 2.9181283660259893e-05, "loss": 5.5993, "step": 8428 }, { "epoch": 0.42, "grad_norm": 2.1336424350738525, "learning_rate": 2.9171401749098276e-05, "loss": 5.5573, "step": 8432 }, { "epoch": 0.42, "grad_norm": 1.6572264432907104, "learning_rate": 2.9161519837936658e-05, "loss": 5.7065, "step": 8436 }, { "epoch": 0.42, "grad_norm": 2.1240315437316895, "learning_rate": 2.915163792677504e-05, "loss": 5.5141, "step": 8440 }, { "epoch": 0.42, "grad_norm": 1.728661298751831, "learning_rate": 2.914175601561342e-05, "loss": 5.621, "step": 8444 }, { "epoch": 0.42, "grad_norm": 2.108468770980835, "learning_rate": 2.91318741044518e-05, "loss": 5.5439, "step": 8448 }, { "epoch": 0.42, "grad_norm": 1.912477731704712, "learning_rate": 2.9121992193290182e-05, "loss": 5.6222, "step": 8452 }, { "epoch": 0.42, "grad_norm": 1.753421425819397, "learning_rate": 2.9112110282128565e-05, "loss": 5.6209, "step": 8456 }, { "epoch": 0.42, "grad_norm": 1.7683593034744263, "learning_rate": 2.9102228370966943e-05, "loss": 5.6178, "step": 8460 }, { "epoch": 0.42, "grad_norm": 1.8550388813018799, "learning_rate": 2.9092346459805325e-05, "loss": 5.6337, "step": 8464 }, { "epoch": 0.42, "grad_norm": 1.6444224119186401, "learning_rate": 2.9082464548643707e-05, "loss": 5.7698, "step": 8468 }, { "epoch": 0.42, "grad_norm": 1.868353247642517, "learning_rate": 2.9072582637482093e-05, "loss": 5.8062, "step": 8472 }, { "epoch": 0.42, "grad_norm": 1.8483880758285522, "learning_rate": 2.9062700726320475e-05, "loss": 5.5449, "step": 8476 }, { "epoch": 0.42, "grad_norm": 2.059861660003662, "learning_rate": 2.9052818815158857e-05, "loss": 5.5988, "step": 8480 }, { "epoch": 0.42, "grad_norm": 1.7102926969528198, "learning_rate": 2.9042936903997236e-05, "loss": 5.5771, "step": 8484 }, { "epoch": 0.42, "grad_norm": 1.8607144355773926, "learning_rate": 2.9033054992835618e-05, "loss": 5.4982, "step": 8488 }, { "epoch": 0.42, "grad_norm": 1.9258161783218384, "learning_rate": 2.9023173081674e-05, "loss": 5.6881, "step": 8492 }, { "epoch": 0.42, "grad_norm": 1.688467264175415, "learning_rate": 2.901329117051238e-05, "loss": 5.6507, "step": 8496 }, { "epoch": 0.42, "grad_norm": 1.7011233568191528, "learning_rate": 2.900340925935076e-05, "loss": 5.59, "step": 8500 }, { "epoch": 0.42, "grad_norm": 1.6969784498214722, "learning_rate": 2.8993527348189142e-05, "loss": 5.6226, "step": 8504 }, { "epoch": 0.42, "grad_norm": 1.669765830039978, "learning_rate": 2.8983645437027525e-05, "loss": 5.5824, "step": 8508 }, { "epoch": 0.42, "grad_norm": 1.9257421493530273, "learning_rate": 2.8973763525865903e-05, "loss": 5.5598, "step": 8512 }, { "epoch": 0.42, "grad_norm": 2.1687793731689453, "learning_rate": 2.8963881614704285e-05, "loss": 5.7037, "step": 8516 }, { "epoch": 0.42, "grad_norm": 1.5989969968795776, "learning_rate": 2.8953999703542667e-05, "loss": 5.5294, "step": 8520 }, { "epoch": 0.42, "grad_norm": 1.8674973249435425, "learning_rate": 2.894411779238105e-05, "loss": 5.4714, "step": 8524 }, { "epoch": 0.42, "grad_norm": 1.83799409866333, "learning_rate": 2.8934235881219428e-05, "loss": 5.6442, "step": 8528 }, { "epoch": 0.42, "grad_norm": 2.0756680965423584, "learning_rate": 2.892435397005781e-05, "loss": 5.6252, "step": 8532 }, { "epoch": 0.42, "grad_norm": 2.1030049324035645, "learning_rate": 2.8914472058896192e-05, "loss": 5.5707, "step": 8536 }, { "epoch": 0.42, "grad_norm": 1.740062952041626, "learning_rate": 2.8904590147734574e-05, "loss": 5.6083, "step": 8540 }, { "epoch": 0.42, "grad_norm": 1.9821360111236572, "learning_rate": 2.8894708236572953e-05, "loss": 5.5575, "step": 8544 }, { "epoch": 0.42, "grad_norm": 2.0628159046173096, "learning_rate": 2.8884826325411335e-05, "loss": 5.5519, "step": 8548 }, { "epoch": 0.42, "grad_norm": 2.3252205848693848, "learning_rate": 2.8874944414249717e-05, "loss": 5.6204, "step": 8552 }, { "epoch": 0.42, "grad_norm": 1.7173527479171753, "learning_rate": 2.8865062503088096e-05, "loss": 5.5685, "step": 8556 }, { "epoch": 0.42, "grad_norm": 1.7327337265014648, "learning_rate": 2.8855180591926478e-05, "loss": 5.6208, "step": 8560 }, { "epoch": 0.42, "grad_norm": 1.7945717573165894, "learning_rate": 2.884529868076486e-05, "loss": 5.6034, "step": 8564 }, { "epoch": 0.42, "grad_norm": 1.8818703889846802, "learning_rate": 2.8835416769603242e-05, "loss": 5.5966, "step": 8568 }, { "epoch": 0.42, "grad_norm": 1.780110478401184, "learning_rate": 2.882553485844162e-05, "loss": 5.6438, "step": 8572 }, { "epoch": 0.42, "grad_norm": 1.9500248432159424, "learning_rate": 2.8815652947280003e-05, "loss": 5.6569, "step": 8576 }, { "epoch": 0.42, "grad_norm": 2.0697216987609863, "learning_rate": 2.8805771036118385e-05, "loss": 5.5631, "step": 8580 }, { "epoch": 0.42, "grad_norm": 1.8026626110076904, "learning_rate": 2.8795889124956767e-05, "loss": 5.6308, "step": 8584 }, { "epoch": 0.42, "grad_norm": 1.6089578866958618, "learning_rate": 2.8786007213795152e-05, "loss": 5.5297, "step": 8588 }, { "epoch": 0.42, "grad_norm": 1.8671869039535522, "learning_rate": 2.8776125302633534e-05, "loss": 5.5388, "step": 8592 }, { "epoch": 0.42, "grad_norm": 1.7763539552688599, "learning_rate": 2.8766243391471913e-05, "loss": 5.6698, "step": 8596 }, { "epoch": 0.42, "grad_norm": 1.6824060678482056, "learning_rate": 2.8756361480310295e-05, "loss": 5.6613, "step": 8600 }, { "epoch": 0.43, "grad_norm": 1.9802032709121704, "learning_rate": 2.8746479569148677e-05, "loss": 5.6713, "step": 8604 }, { "epoch": 0.43, "grad_norm": 1.8129801750183105, "learning_rate": 2.873659765798706e-05, "loss": 5.5339, "step": 8608 }, { "epoch": 0.43, "grad_norm": 2.0134878158569336, "learning_rate": 2.8726715746825438e-05, "loss": 5.5781, "step": 8612 }, { "epoch": 0.43, "grad_norm": 1.7483782768249512, "learning_rate": 2.871683383566382e-05, "loss": 5.5785, "step": 8616 }, { "epoch": 0.43, "grad_norm": 2.1585028171539307, "learning_rate": 2.8706951924502202e-05, "loss": 5.5063, "step": 8620 }, { "epoch": 0.43, "grad_norm": 2.0951032638549805, "learning_rate": 2.8697070013340584e-05, "loss": 5.5559, "step": 8624 }, { "epoch": 0.43, "grad_norm": 1.7235571146011353, "learning_rate": 2.8687188102178963e-05, "loss": 5.6181, "step": 8628 }, { "epoch": 0.43, "grad_norm": 2.0943381786346436, "learning_rate": 2.8677306191017345e-05, "loss": 5.5952, "step": 8632 }, { "epoch": 0.43, "grad_norm": 1.9712262153625488, "learning_rate": 2.8667424279855727e-05, "loss": 5.6672, "step": 8636 }, { "epoch": 0.43, "grad_norm": 1.837053894996643, "learning_rate": 2.8657542368694105e-05, "loss": 5.6023, "step": 8640 }, { "epoch": 0.43, "grad_norm": 1.9455965757369995, "learning_rate": 2.8647660457532487e-05, "loss": 5.6813, "step": 8644 }, { "epoch": 0.43, "grad_norm": 1.9085052013397217, "learning_rate": 2.863777854637087e-05, "loss": 5.5879, "step": 8648 }, { "epoch": 0.43, "grad_norm": 2.043121814727783, "learning_rate": 2.862789663520925e-05, "loss": 5.484, "step": 8652 }, { "epoch": 0.43, "grad_norm": 2.043996572494507, "learning_rate": 2.861801472404763e-05, "loss": 5.5774, "step": 8656 }, { "epoch": 0.43, "grad_norm": 2.140770673751831, "learning_rate": 2.8608132812886012e-05, "loss": 5.5304, "step": 8660 }, { "epoch": 0.43, "grad_norm": 2.25034499168396, "learning_rate": 2.8598250901724394e-05, "loss": 5.4963, "step": 8664 }, { "epoch": 0.43, "grad_norm": 1.623706340789795, "learning_rate": 2.8588368990562776e-05, "loss": 5.5669, "step": 8668 }, { "epoch": 0.43, "grad_norm": 1.8031169176101685, "learning_rate": 2.8578487079401155e-05, "loss": 5.5762, "step": 8672 }, { "epoch": 0.43, "grad_norm": 2.0662543773651123, "learning_rate": 2.8568605168239537e-05, "loss": 5.4806, "step": 8676 }, { "epoch": 0.43, "grad_norm": 2.1524598598480225, "learning_rate": 2.855872325707792e-05, "loss": 5.6503, "step": 8680 }, { "epoch": 0.43, "grad_norm": 1.9902993440628052, "learning_rate": 2.8548841345916298e-05, "loss": 5.4978, "step": 8684 }, { "epoch": 0.43, "grad_norm": 1.8020424842834473, "learning_rate": 2.853895943475468e-05, "loss": 5.6205, "step": 8688 }, { "epoch": 0.43, "grad_norm": 1.732035517692566, "learning_rate": 2.8529077523593062e-05, "loss": 5.4912, "step": 8692 }, { "epoch": 0.43, "grad_norm": 1.8738574981689453, "learning_rate": 2.8519195612431444e-05, "loss": 5.5621, "step": 8696 }, { "epoch": 0.43, "grad_norm": 1.9714980125427246, "learning_rate": 2.850931370126983e-05, "loss": 5.6087, "step": 8700 }, { "epoch": 0.43, "grad_norm": 2.0756115913391113, "learning_rate": 2.849943179010821e-05, "loss": 5.65, "step": 8704 }, { "epoch": 0.43, "grad_norm": 2.104881763458252, "learning_rate": 2.8489549878946594e-05, "loss": 5.6053, "step": 8708 }, { "epoch": 0.43, "grad_norm": 1.7442312240600586, "learning_rate": 2.8479667967784972e-05, "loss": 5.6199, "step": 8712 }, { "epoch": 0.43, "grad_norm": 2.0308890342712402, "learning_rate": 2.8469786056623354e-05, "loss": 5.7298, "step": 8716 }, { "epoch": 0.43, "grad_norm": 1.7256563901901245, "learning_rate": 2.8459904145461736e-05, "loss": 5.4541, "step": 8720 }, { "epoch": 0.43, "grad_norm": 2.112795352935791, "learning_rate": 2.8450022234300115e-05, "loss": 5.5393, "step": 8724 }, { "epoch": 0.43, "grad_norm": 1.886513113975525, "learning_rate": 2.8440140323138497e-05, "loss": 5.6014, "step": 8728 }, { "epoch": 0.43, "grad_norm": 1.9806932210922241, "learning_rate": 2.843025841197688e-05, "loss": 5.6828, "step": 8732 }, { "epoch": 0.43, "grad_norm": 2.23341965675354, "learning_rate": 2.842037650081526e-05, "loss": 5.7338, "step": 8736 }, { "epoch": 0.43, "grad_norm": 1.7140839099884033, "learning_rate": 2.841049458965364e-05, "loss": 5.6041, "step": 8740 }, { "epoch": 0.43, "grad_norm": 1.7378863096237183, "learning_rate": 2.8400612678492022e-05, "loss": 5.5925, "step": 8744 }, { "epoch": 0.43, "grad_norm": 1.7445762157440186, "learning_rate": 2.8390730767330404e-05, "loss": 5.5118, "step": 8748 }, { "epoch": 0.43, "grad_norm": 1.9341727495193481, "learning_rate": 2.8380848856168786e-05, "loss": 5.6059, "step": 8752 }, { "epoch": 0.43, "grad_norm": 2.1164705753326416, "learning_rate": 2.8370966945007165e-05, "loss": 5.7005, "step": 8756 }, { "epoch": 0.43, "grad_norm": 1.7062251567840576, "learning_rate": 2.8361085033845547e-05, "loss": 5.5896, "step": 8760 }, { "epoch": 0.43, "grad_norm": 1.8553556203842163, "learning_rate": 2.835120312268393e-05, "loss": 5.6167, "step": 8764 }, { "epoch": 0.43, "grad_norm": 1.7515733242034912, "learning_rate": 2.8341321211522308e-05, "loss": 5.5941, "step": 8768 }, { "epoch": 0.43, "grad_norm": 1.8316551446914673, "learning_rate": 2.833143930036069e-05, "loss": 5.4873, "step": 8772 }, { "epoch": 0.43, "grad_norm": 1.9083267450332642, "learning_rate": 2.8321557389199072e-05, "loss": 5.6866, "step": 8776 }, { "epoch": 0.43, "grad_norm": 1.4909923076629639, "learning_rate": 2.8311675478037454e-05, "loss": 5.5345, "step": 8780 }, { "epoch": 0.43, "grad_norm": 1.7641175985336304, "learning_rate": 2.8301793566875832e-05, "loss": 5.6514, "step": 8784 }, { "epoch": 0.43, "grad_norm": 1.8059598207473755, "learning_rate": 2.8291911655714215e-05, "loss": 5.4931, "step": 8788 }, { "epoch": 0.43, "grad_norm": 1.8962539434432983, "learning_rate": 2.8282029744552597e-05, "loss": 5.5765, "step": 8792 }, { "epoch": 0.43, "grad_norm": 2.188370943069458, "learning_rate": 2.827214783339098e-05, "loss": 5.6586, "step": 8796 }, { "epoch": 0.43, "grad_norm": 1.9519965648651123, "learning_rate": 2.8262265922229357e-05, "loss": 5.6691, "step": 8800 }, { "epoch": 0.44, "grad_norm": 1.9207426309585571, "learning_rate": 2.825238401106774e-05, "loss": 5.7145, "step": 8804 }, { "epoch": 0.44, "grad_norm": 1.8392800092697144, "learning_rate": 2.824250209990612e-05, "loss": 5.5793, "step": 8808 }, { "epoch": 0.44, "grad_norm": 2.228625774383545, "learning_rate": 2.8232620188744503e-05, "loss": 5.6521, "step": 8812 }, { "epoch": 0.44, "grad_norm": 2.18316650390625, "learning_rate": 2.822273827758289e-05, "loss": 5.7501, "step": 8816 }, { "epoch": 0.44, "grad_norm": 2.011134147644043, "learning_rate": 2.821285636642127e-05, "loss": 5.6124, "step": 8820 }, { "epoch": 0.44, "grad_norm": 1.9527933597564697, "learning_rate": 2.820297445525965e-05, "loss": 5.4269, "step": 8824 }, { "epoch": 0.44, "grad_norm": 1.6352803707122803, "learning_rate": 2.8193092544098032e-05, "loss": 5.7361, "step": 8828 }, { "epoch": 0.44, "grad_norm": 1.889510154724121, "learning_rate": 2.8183210632936414e-05, "loss": 5.5663, "step": 8832 }, { "epoch": 0.44, "grad_norm": 1.8308645486831665, "learning_rate": 2.8173328721774796e-05, "loss": 5.6488, "step": 8836 }, { "epoch": 0.44, "grad_norm": 1.8273881673812866, "learning_rate": 2.8163446810613175e-05, "loss": 5.6201, "step": 8840 }, { "epoch": 0.44, "grad_norm": 1.8046854734420776, "learning_rate": 2.8153564899451557e-05, "loss": 5.6918, "step": 8844 }, { "epoch": 0.44, "grad_norm": 2.0153470039367676, "learning_rate": 2.814368298828994e-05, "loss": 5.567, "step": 8848 }, { "epoch": 0.44, "grad_norm": 1.6771526336669922, "learning_rate": 2.8133801077128317e-05, "loss": 5.5549, "step": 8852 }, { "epoch": 0.44, "grad_norm": 1.917395830154419, "learning_rate": 2.81239191659667e-05, "loss": 5.564, "step": 8856 }, { "epoch": 0.44, "grad_norm": 1.7779862880706787, "learning_rate": 2.811403725480508e-05, "loss": 5.6795, "step": 8860 }, { "epoch": 0.44, "grad_norm": 1.9387528896331787, "learning_rate": 2.8104155343643464e-05, "loss": 5.5145, "step": 8864 }, { "epoch": 0.44, "grad_norm": 2.0087499618530273, "learning_rate": 2.8094273432481842e-05, "loss": 5.548, "step": 8868 }, { "epoch": 0.44, "grad_norm": 2.05503249168396, "learning_rate": 2.8084391521320224e-05, "loss": 5.6076, "step": 8872 }, { "epoch": 0.44, "grad_norm": 1.6153981685638428, "learning_rate": 2.8074509610158606e-05, "loss": 5.6318, "step": 8876 }, { "epoch": 0.44, "grad_norm": 1.6110368967056274, "learning_rate": 2.806462769899699e-05, "loss": 5.596, "step": 8880 }, { "epoch": 0.44, "grad_norm": 1.7750228643417358, "learning_rate": 2.8054745787835367e-05, "loss": 5.528, "step": 8884 }, { "epoch": 0.44, "grad_norm": 2.0455288887023926, "learning_rate": 2.804486387667375e-05, "loss": 5.4797, "step": 8888 }, { "epoch": 0.44, "grad_norm": 1.8901207447052002, "learning_rate": 2.803498196551213e-05, "loss": 5.6263, "step": 8892 }, { "epoch": 0.44, "grad_norm": 1.78607177734375, "learning_rate": 2.8025100054350513e-05, "loss": 5.6771, "step": 8896 }, { "epoch": 0.44, "grad_norm": 1.7866969108581543, "learning_rate": 2.8015218143188892e-05, "loss": 5.6625, "step": 8900 }, { "epoch": 0.44, "grad_norm": 1.713319182395935, "learning_rate": 2.8005336232027274e-05, "loss": 5.5529, "step": 8904 }, { "epoch": 0.44, "grad_norm": 1.7033851146697998, "learning_rate": 2.7995454320865656e-05, "loss": 5.6241, "step": 8908 }, { "epoch": 0.44, "grad_norm": 1.626865029335022, "learning_rate": 2.7985572409704035e-05, "loss": 5.623, "step": 8912 }, { "epoch": 0.44, "grad_norm": 1.8610364198684692, "learning_rate": 2.7975690498542417e-05, "loss": 5.5305, "step": 8916 }, { "epoch": 0.44, "grad_norm": 2.474336624145508, "learning_rate": 2.79658085873808e-05, "loss": 5.5765, "step": 8920 }, { "epoch": 0.44, "grad_norm": 2.0331220626831055, "learning_rate": 2.795592667621918e-05, "loss": 5.5884, "step": 8924 }, { "epoch": 0.44, "grad_norm": 1.7288097143173218, "learning_rate": 2.794604476505756e-05, "loss": 5.5913, "step": 8928 }, { "epoch": 0.44, "grad_norm": 1.5526161193847656, "learning_rate": 2.793616285389595e-05, "loss": 5.5849, "step": 8932 }, { "epoch": 0.44, "grad_norm": 1.636829137802124, "learning_rate": 2.7926280942734327e-05, "loss": 5.5373, "step": 8936 }, { "epoch": 0.44, "grad_norm": 1.8936233520507812, "learning_rate": 2.791639903157271e-05, "loss": 5.7408, "step": 8940 }, { "epoch": 0.44, "grad_norm": 1.8193585872650146, "learning_rate": 2.790651712041109e-05, "loss": 5.6536, "step": 8944 }, { "epoch": 0.44, "grad_norm": 2.0485775470733643, "learning_rate": 2.7896635209249473e-05, "loss": 5.6758, "step": 8948 }, { "epoch": 0.44, "grad_norm": 1.8670713901519775, "learning_rate": 2.7886753298087852e-05, "loss": 5.5534, "step": 8952 }, { "epoch": 0.44, "grad_norm": 1.8386154174804688, "learning_rate": 2.7876871386926234e-05, "loss": 5.6744, "step": 8956 }, { "epoch": 0.44, "grad_norm": 2.060434103012085, "learning_rate": 2.7866989475764616e-05, "loss": 5.6095, "step": 8960 }, { "epoch": 0.44, "grad_norm": 1.5825614929199219, "learning_rate": 2.7857107564602998e-05, "loss": 5.5813, "step": 8964 }, { "epoch": 0.44, "grad_norm": 1.6022027730941772, "learning_rate": 2.7847225653441377e-05, "loss": 5.5509, "step": 8968 }, { "epoch": 0.44, "grad_norm": 1.892376184463501, "learning_rate": 2.783734374227976e-05, "loss": 5.5502, "step": 8972 }, { "epoch": 0.44, "grad_norm": 1.8178150653839111, "learning_rate": 2.782746183111814e-05, "loss": 5.5913, "step": 8976 }, { "epoch": 0.44, "grad_norm": 1.899789571762085, "learning_rate": 2.7817579919956523e-05, "loss": 5.6232, "step": 8980 }, { "epoch": 0.44, "grad_norm": 1.5999733209609985, "learning_rate": 2.78076980087949e-05, "loss": 5.5976, "step": 8984 }, { "epoch": 0.44, "grad_norm": 1.885884404182434, "learning_rate": 2.7797816097633284e-05, "loss": 5.5364, "step": 8988 }, { "epoch": 0.44, "grad_norm": 1.7063078880310059, "learning_rate": 2.7787934186471666e-05, "loss": 5.5669, "step": 8992 }, { "epoch": 0.44, "grad_norm": 1.8398665189743042, "learning_rate": 2.7778052275310044e-05, "loss": 5.5482, "step": 8996 }, { "epoch": 0.44, "grad_norm": 1.7803176641464233, "learning_rate": 2.7768170364148426e-05, "loss": 5.6153, "step": 9000 }, { "epoch": 0.44, "grad_norm": 1.9626309871673584, "learning_rate": 2.775828845298681e-05, "loss": 5.5867, "step": 9004 }, { "epoch": 0.45, "grad_norm": 2.0748071670532227, "learning_rate": 2.774840654182519e-05, "loss": 5.5583, "step": 9008 }, { "epoch": 0.45, "grad_norm": 1.8239414691925049, "learning_rate": 2.773852463066357e-05, "loss": 5.5866, "step": 9012 }, { "epoch": 0.45, "grad_norm": 1.6811037063598633, "learning_rate": 2.772864271950195e-05, "loss": 5.4177, "step": 9016 }, { "epoch": 0.45, "grad_norm": 2.1083476543426514, "learning_rate": 2.7718760808340333e-05, "loss": 5.5789, "step": 9020 }, { "epoch": 0.45, "grad_norm": 1.621522068977356, "learning_rate": 2.7708878897178715e-05, "loss": 5.5616, "step": 9024 }, { "epoch": 0.45, "grad_norm": 2.0952842235565186, "learning_rate": 2.7698996986017094e-05, "loss": 5.5983, "step": 9028 }, { "epoch": 0.45, "grad_norm": 1.8348654508590698, "learning_rate": 2.7689115074855476e-05, "loss": 5.4559, "step": 9032 }, { "epoch": 0.45, "grad_norm": 2.1072843074798584, "learning_rate": 2.7679233163693858e-05, "loss": 5.6188, "step": 9036 }, { "epoch": 0.45, "grad_norm": 1.5936877727508545, "learning_rate": 2.7669351252532237e-05, "loss": 5.5372, "step": 9040 }, { "epoch": 0.45, "grad_norm": 1.8489831686019897, "learning_rate": 2.7659469341370626e-05, "loss": 5.6375, "step": 9044 }, { "epoch": 0.45, "grad_norm": 2.1420514583587646, "learning_rate": 2.7649587430209008e-05, "loss": 5.5546, "step": 9048 }, { "epoch": 0.45, "grad_norm": 1.9371378421783447, "learning_rate": 2.7639705519047386e-05, "loss": 5.4121, "step": 9052 }, { "epoch": 0.45, "grad_norm": 2.1076085567474365, "learning_rate": 2.762982360788577e-05, "loss": 5.6349, "step": 9056 }, { "epoch": 0.45, "grad_norm": 2.1002743244171143, "learning_rate": 2.761994169672415e-05, "loss": 5.7135, "step": 9060 }, { "epoch": 0.45, "grad_norm": 1.5791290998458862, "learning_rate": 2.761005978556253e-05, "loss": 5.5199, "step": 9064 }, { "epoch": 0.45, "grad_norm": 1.9685577154159546, "learning_rate": 2.760017787440091e-05, "loss": 5.6298, "step": 9068 }, { "epoch": 0.45, "grad_norm": 1.6860204935073853, "learning_rate": 2.7590295963239293e-05, "loss": 5.67, "step": 9072 }, { "epoch": 0.45, "grad_norm": 2.2700107097625732, "learning_rate": 2.7580414052077675e-05, "loss": 5.661, "step": 9076 }, { "epoch": 0.45, "grad_norm": 1.9949781894683838, "learning_rate": 2.7570532140916054e-05, "loss": 5.6058, "step": 9080 }, { "epoch": 0.45, "grad_norm": 1.8357362747192383, "learning_rate": 2.7560650229754436e-05, "loss": 5.6563, "step": 9084 }, { "epoch": 0.45, "grad_norm": 2.157716751098633, "learning_rate": 2.7550768318592818e-05, "loss": 5.5362, "step": 9088 }, { "epoch": 0.45, "grad_norm": 2.0846590995788574, "learning_rate": 2.75408864074312e-05, "loss": 5.4562, "step": 9092 }, { "epoch": 0.45, "grad_norm": 1.8942608833312988, "learning_rate": 2.753100449626958e-05, "loss": 5.6858, "step": 9096 }, { "epoch": 0.45, "grad_norm": 1.882866621017456, "learning_rate": 2.752112258510796e-05, "loss": 5.5599, "step": 9100 }, { "epoch": 0.45, "grad_norm": 2.1065609455108643, "learning_rate": 2.7511240673946343e-05, "loss": 5.6257, "step": 9104 }, { "epoch": 0.45, "grad_norm": 1.8394254446029663, "learning_rate": 2.7501358762784725e-05, "loss": 5.548, "step": 9108 }, { "epoch": 0.45, "grad_norm": 1.759247899055481, "learning_rate": 2.7491476851623104e-05, "loss": 5.5753, "step": 9112 }, { "epoch": 0.45, "grad_norm": 1.773542881011963, "learning_rate": 2.7481594940461486e-05, "loss": 5.6327, "step": 9116 }, { "epoch": 0.45, "grad_norm": 2.0030620098114014, "learning_rate": 2.7471713029299868e-05, "loss": 5.5694, "step": 9120 }, { "epoch": 0.45, "grad_norm": 1.7507848739624023, "learning_rate": 2.7461831118138247e-05, "loss": 5.6386, "step": 9124 }, { "epoch": 0.45, "grad_norm": 1.7345106601715088, "learning_rate": 2.745194920697663e-05, "loss": 5.5398, "step": 9128 }, { "epoch": 0.45, "grad_norm": 2.1354246139526367, "learning_rate": 2.744206729581501e-05, "loss": 5.5165, "step": 9132 }, { "epoch": 0.45, "grad_norm": 1.6289561986923218, "learning_rate": 2.7432185384653393e-05, "loss": 5.6551, "step": 9136 }, { "epoch": 0.45, "grad_norm": 1.850968599319458, "learning_rate": 2.742230347349177e-05, "loss": 5.5557, "step": 9140 }, { "epoch": 0.45, "grad_norm": 1.6815087795257568, "learning_rate": 2.7412421562330154e-05, "loss": 5.5728, "step": 9144 }, { "epoch": 0.45, "grad_norm": 1.868223786354065, "learning_rate": 2.7402539651168536e-05, "loss": 5.5449, "step": 9148 }, { "epoch": 0.45, "grad_norm": 1.6773638725280762, "learning_rate": 2.7392657740006918e-05, "loss": 5.588, "step": 9152 }, { "epoch": 0.45, "grad_norm": 1.8860238790512085, "learning_rate": 2.7382775828845296e-05, "loss": 5.537, "step": 9156 }, { "epoch": 0.45, "grad_norm": 1.8454649448394775, "learning_rate": 2.7372893917683685e-05, "loss": 5.5351, "step": 9160 }, { "epoch": 0.45, "grad_norm": 1.8175948858261108, "learning_rate": 2.7363012006522064e-05, "loss": 5.5808, "step": 9164 }, { "epoch": 0.45, "grad_norm": 1.8350155353546143, "learning_rate": 2.7353130095360446e-05, "loss": 5.5217, "step": 9168 }, { "epoch": 0.45, "grad_norm": 1.6785465478897095, "learning_rate": 2.7343248184198828e-05, "loss": 5.5363, "step": 9172 }, { "epoch": 0.45, "grad_norm": 2.043757200241089, "learning_rate": 2.733336627303721e-05, "loss": 5.3924, "step": 9176 }, { "epoch": 0.45, "grad_norm": 1.7272677421569824, "learning_rate": 2.732348436187559e-05, "loss": 5.5548, "step": 9180 }, { "epoch": 0.45, "grad_norm": 1.7841123342514038, "learning_rate": 2.731360245071397e-05, "loss": 5.5787, "step": 9184 }, { "epoch": 0.45, "grad_norm": 2.2284867763519287, "learning_rate": 2.7303720539552353e-05, "loss": 5.6589, "step": 9188 }, { "epoch": 0.45, "grad_norm": 1.8492909669876099, "learning_rate": 2.7293838628390735e-05, "loss": 5.6947, "step": 9192 }, { "epoch": 0.45, "grad_norm": 1.870833396911621, "learning_rate": 2.7283956717229114e-05, "loss": 5.5846, "step": 9196 }, { "epoch": 0.45, "grad_norm": 2.007213592529297, "learning_rate": 2.7274074806067496e-05, "loss": 5.4946, "step": 9200 }, { "epoch": 0.45, "grad_norm": 2.178415536880493, "learning_rate": 2.7264192894905878e-05, "loss": 5.6511, "step": 9204 }, { "epoch": 0.45, "grad_norm": 1.777798056602478, "learning_rate": 2.7254310983744256e-05, "loss": 5.5872, "step": 9208 }, { "epoch": 0.46, "grad_norm": 2.0451252460479736, "learning_rate": 2.724442907258264e-05, "loss": 5.5361, "step": 9212 }, { "epoch": 0.46, "grad_norm": 1.672156572341919, "learning_rate": 2.723454716142102e-05, "loss": 5.5812, "step": 9216 }, { "epoch": 0.46, "grad_norm": 1.7018766403198242, "learning_rate": 2.7224665250259402e-05, "loss": 5.5854, "step": 9220 }, { "epoch": 0.46, "grad_norm": 1.8152602910995483, "learning_rate": 2.721478333909778e-05, "loss": 5.5784, "step": 9224 }, { "epoch": 0.46, "grad_norm": 1.722531795501709, "learning_rate": 2.7204901427936163e-05, "loss": 5.5214, "step": 9228 }, { "epoch": 0.46, "grad_norm": 1.9717679023742676, "learning_rate": 2.7195019516774545e-05, "loss": 5.5436, "step": 9232 }, { "epoch": 0.46, "grad_norm": 1.6937377452850342, "learning_rate": 2.7185137605612927e-05, "loss": 5.5609, "step": 9236 }, { "epoch": 0.46, "grad_norm": 1.6418997049331665, "learning_rate": 2.7175255694451306e-05, "loss": 5.5576, "step": 9240 }, { "epoch": 0.46, "grad_norm": 1.9024310111999512, "learning_rate": 2.7165373783289688e-05, "loss": 5.6251, "step": 9244 }, { "epoch": 0.46, "grad_norm": 1.9837607145309448, "learning_rate": 2.715549187212807e-05, "loss": 5.6149, "step": 9248 }, { "epoch": 0.46, "grad_norm": 1.5828512907028198, "learning_rate": 2.714560996096645e-05, "loss": 5.4367, "step": 9252 }, { "epoch": 0.46, "grad_norm": 2.0914130210876465, "learning_rate": 2.713572804980483e-05, "loss": 5.632, "step": 9256 }, { "epoch": 0.46, "grad_norm": 2.2636313438415527, "learning_rate": 2.7125846138643213e-05, "loss": 5.6487, "step": 9260 }, { "epoch": 0.46, "grad_norm": 2.0330276489257812, "learning_rate": 2.7115964227481595e-05, "loss": 5.5618, "step": 9264 }, { "epoch": 0.46, "grad_norm": 1.7501007318496704, "learning_rate": 2.7106082316319974e-05, "loss": 5.4962, "step": 9268 }, { "epoch": 0.46, "grad_norm": 2.0401079654693604, "learning_rate": 2.7096200405158356e-05, "loss": 5.5273, "step": 9272 }, { "epoch": 0.46, "grad_norm": 1.885672688484192, "learning_rate": 2.7086318493996745e-05, "loss": 5.5909, "step": 9276 }, { "epoch": 0.46, "grad_norm": 1.8399213552474976, "learning_rate": 2.7076436582835123e-05, "loss": 5.5688, "step": 9280 }, { "epoch": 0.46, "grad_norm": 1.6890549659729004, "learning_rate": 2.7066554671673505e-05, "loss": 5.5704, "step": 9284 }, { "epoch": 0.46, "grad_norm": 2.0120296478271484, "learning_rate": 2.7056672760511887e-05, "loss": 5.6876, "step": 9288 }, { "epoch": 0.46, "grad_norm": 1.8612735271453857, "learning_rate": 2.7046790849350266e-05, "loss": 5.5703, "step": 9292 }, { "epoch": 0.46, "grad_norm": 1.8374227285385132, "learning_rate": 2.7036908938188648e-05, "loss": 5.4759, "step": 9296 }, { "epoch": 0.46, "grad_norm": 1.8448069095611572, "learning_rate": 2.702702702702703e-05, "loss": 5.5785, "step": 9300 }, { "epoch": 0.46, "grad_norm": 1.6031235456466675, "learning_rate": 2.7017145115865412e-05, "loss": 5.5566, "step": 9304 }, { "epoch": 0.46, "grad_norm": 1.8467074632644653, "learning_rate": 2.700726320470379e-05, "loss": 5.6011, "step": 9308 }, { "epoch": 0.46, "grad_norm": 2.152393102645874, "learning_rate": 2.6997381293542173e-05, "loss": 5.4838, "step": 9312 }, { "epoch": 0.46, "grad_norm": 1.6905425786972046, "learning_rate": 2.6987499382380555e-05, "loss": 5.6356, "step": 9316 }, { "epoch": 0.46, "grad_norm": 2.052292823791504, "learning_rate": 2.6977617471218937e-05, "loss": 5.5832, "step": 9320 }, { "epoch": 0.46, "grad_norm": 1.9417749643325806, "learning_rate": 2.6967735560057316e-05, "loss": 5.6308, "step": 9324 }, { "epoch": 0.46, "grad_norm": 1.7844290733337402, "learning_rate": 2.6957853648895698e-05, "loss": 5.5116, "step": 9328 }, { "epoch": 0.46, "grad_norm": 1.945826768875122, "learning_rate": 2.694797173773408e-05, "loss": 5.5016, "step": 9332 }, { "epoch": 0.46, "grad_norm": 2.079559087753296, "learning_rate": 2.693808982657246e-05, "loss": 5.631, "step": 9336 }, { "epoch": 0.46, "grad_norm": 1.7609906196594238, "learning_rate": 2.692820791541084e-05, "loss": 5.6115, "step": 9340 }, { "epoch": 0.46, "grad_norm": 2.221853733062744, "learning_rate": 2.6918326004249223e-05, "loss": 5.6196, "step": 9344 }, { "epoch": 0.46, "grad_norm": 1.8510643243789673, "learning_rate": 2.6908444093087605e-05, "loss": 5.4623, "step": 9348 }, { "epoch": 0.46, "grad_norm": 2.1202962398529053, "learning_rate": 2.6898562181925983e-05, "loss": 5.6011, "step": 9352 }, { "epoch": 0.46, "grad_norm": 1.9215221405029297, "learning_rate": 2.6888680270764365e-05, "loss": 5.6439, "step": 9356 }, { "epoch": 0.46, "grad_norm": 1.8922703266143799, "learning_rate": 2.6878798359602748e-05, "loss": 5.6081, "step": 9360 }, { "epoch": 0.46, "grad_norm": 2.219496726989746, "learning_rate": 2.686891644844113e-05, "loss": 5.7311, "step": 9364 }, { "epoch": 0.46, "grad_norm": 1.926804542541504, "learning_rate": 2.6859034537279508e-05, "loss": 5.5689, "step": 9368 }, { "epoch": 0.46, "grad_norm": 2.2841477394104004, "learning_rate": 2.684915262611789e-05, "loss": 5.6086, "step": 9372 }, { "epoch": 0.46, "grad_norm": 1.9708247184753418, "learning_rate": 2.6839270714956272e-05, "loss": 5.6008, "step": 9376 }, { "epoch": 0.46, "grad_norm": 1.7928872108459473, "learning_rate": 2.6829388803794654e-05, "loss": 5.6374, "step": 9380 }, { "epoch": 0.46, "grad_norm": 1.7741127014160156, "learning_rate": 2.6819506892633033e-05, "loss": 5.611, "step": 9384 }, { "epoch": 0.46, "grad_norm": 1.63377845287323, "learning_rate": 2.6809624981471415e-05, "loss": 5.6611, "step": 9388 }, { "epoch": 0.46, "grad_norm": 1.8570857048034668, "learning_rate": 2.67997430703098e-05, "loss": 5.5389, "step": 9392 }, { "epoch": 0.46, "grad_norm": 1.8265759944915771, "learning_rate": 2.6789861159148183e-05, "loss": 5.6445, "step": 9396 }, { "epoch": 0.46, "grad_norm": 1.8934123516082764, "learning_rate": 2.6779979247986565e-05, "loss": 5.6222, "step": 9400 }, { "epoch": 0.46, "grad_norm": 1.8162897825241089, "learning_rate": 2.6770097336824947e-05, "loss": 5.474, "step": 9404 }, { "epoch": 0.46, "grad_norm": 2.1060986518859863, "learning_rate": 2.6760215425663325e-05, "loss": 5.6903, "step": 9408 }, { "epoch": 0.47, "grad_norm": 1.9316869974136353, "learning_rate": 2.6750333514501708e-05, "loss": 5.4954, "step": 9412 }, { "epoch": 0.47, "grad_norm": 1.7672759294509888, "learning_rate": 2.674045160334009e-05, "loss": 5.604, "step": 9416 }, { "epoch": 0.47, "grad_norm": 1.6622782945632935, "learning_rate": 2.6730569692178468e-05, "loss": 5.5769, "step": 9420 }, { "epoch": 0.47, "grad_norm": 1.6604558229446411, "learning_rate": 2.672068778101685e-05, "loss": 5.5286, "step": 9424 }, { "epoch": 0.47, "grad_norm": 1.779937744140625, "learning_rate": 2.6710805869855232e-05, "loss": 5.6161, "step": 9428 }, { "epoch": 0.47, "grad_norm": 1.9902960062026978, "learning_rate": 2.6700923958693614e-05, "loss": 5.6306, "step": 9432 }, { "epoch": 0.47, "grad_norm": 2.084911823272705, "learning_rate": 2.6691042047531993e-05, "loss": 5.5941, "step": 9436 }, { "epoch": 0.47, "grad_norm": 1.6512478590011597, "learning_rate": 2.6681160136370375e-05, "loss": 5.5051, "step": 9440 }, { "epoch": 0.47, "grad_norm": 2.100574016571045, "learning_rate": 2.6671278225208757e-05, "loss": 5.5, "step": 9444 }, { "epoch": 0.47, "grad_norm": 1.922301173210144, "learning_rate": 2.666139631404714e-05, "loss": 5.7458, "step": 9448 }, { "epoch": 0.47, "grad_norm": 2.0800986289978027, "learning_rate": 2.6651514402885518e-05, "loss": 5.4861, "step": 9452 }, { "epoch": 0.47, "grad_norm": 1.9604395627975464, "learning_rate": 2.66416324917239e-05, "loss": 5.6116, "step": 9456 }, { "epoch": 0.47, "grad_norm": 1.9513726234436035, "learning_rate": 2.6631750580562282e-05, "loss": 5.6, "step": 9460 }, { "epoch": 0.47, "grad_norm": 1.6587291955947876, "learning_rate": 2.6621868669400664e-05, "loss": 5.5058, "step": 9464 }, { "epoch": 0.47, "grad_norm": 1.724055290222168, "learning_rate": 2.6611986758239043e-05, "loss": 5.6968, "step": 9468 }, { "epoch": 0.47, "grad_norm": 1.8488603830337524, "learning_rate": 2.6602104847077425e-05, "loss": 5.4925, "step": 9472 }, { "epoch": 0.47, "grad_norm": 1.9515416622161865, "learning_rate": 2.6592222935915807e-05, "loss": 5.5739, "step": 9476 }, { "epoch": 0.47, "grad_norm": 1.8610438108444214, "learning_rate": 2.6582341024754186e-05, "loss": 5.6003, "step": 9480 }, { "epoch": 0.47, "grad_norm": 1.7559446096420288, "learning_rate": 2.6572459113592568e-05, "loss": 5.6684, "step": 9484 }, { "epoch": 0.47, "grad_norm": 1.95654296875, "learning_rate": 2.656257720243095e-05, "loss": 5.5024, "step": 9488 }, { "epoch": 0.47, "grad_norm": 1.9304943084716797, "learning_rate": 2.6552695291269332e-05, "loss": 5.5273, "step": 9492 }, { "epoch": 0.47, "grad_norm": 1.9279637336730957, "learning_rate": 2.654281338010771e-05, "loss": 5.503, "step": 9496 }, { "epoch": 0.47, "grad_norm": 1.975297212600708, "learning_rate": 2.6532931468946093e-05, "loss": 5.4805, "step": 9500 }, { "epoch": 0.47, "grad_norm": 1.8157209157943726, "learning_rate": 2.6523049557784478e-05, "loss": 5.5161, "step": 9504 }, { "epoch": 0.47, "grad_norm": 1.9559147357940674, "learning_rate": 2.651316764662286e-05, "loss": 5.5609, "step": 9508 }, { "epoch": 0.47, "grad_norm": 2.0093133449554443, "learning_rate": 2.6503285735461242e-05, "loss": 5.5939, "step": 9512 }, { "epoch": 0.47, "grad_norm": 1.8078559637069702, "learning_rate": 2.6493403824299624e-05, "loss": 5.4802, "step": 9516 }, { "epoch": 0.47, "grad_norm": 1.8105264902114868, "learning_rate": 2.6483521913138003e-05, "loss": 5.6028, "step": 9520 }, { "epoch": 0.47, "grad_norm": 2.1344854831695557, "learning_rate": 2.6473640001976385e-05, "loss": 5.6429, "step": 9524 }, { "epoch": 0.47, "grad_norm": 1.7309706211090088, "learning_rate": 2.6463758090814767e-05, "loss": 5.3914, "step": 9528 }, { "epoch": 0.47, "grad_norm": 2.0924670696258545, "learning_rate": 2.645387617965315e-05, "loss": 5.5516, "step": 9532 }, { "epoch": 0.47, "grad_norm": 2.076195001602173, "learning_rate": 2.6443994268491528e-05, "loss": 5.5816, "step": 9536 }, { "epoch": 0.47, "grad_norm": 2.286782741546631, "learning_rate": 2.643411235732991e-05, "loss": 5.633, "step": 9540 }, { "epoch": 0.47, "grad_norm": 2.0521233081817627, "learning_rate": 2.6424230446168292e-05, "loss": 5.5289, "step": 9544 }, { "epoch": 0.47, "grad_norm": 1.7153042554855347, "learning_rate": 2.6414348535006674e-05, "loss": 5.5659, "step": 9548 }, { "epoch": 0.47, "grad_norm": 1.8992023468017578, "learning_rate": 2.6404466623845053e-05, "loss": 5.5774, "step": 9552 }, { "epoch": 0.47, "grad_norm": 2.049445390701294, "learning_rate": 2.6394584712683435e-05, "loss": 5.5893, "step": 9556 }, { "epoch": 0.47, "grad_norm": 1.7689001560211182, "learning_rate": 2.6384702801521817e-05, "loss": 5.4599, "step": 9560 }, { "epoch": 0.47, "grad_norm": 1.7153732776641846, "learning_rate": 2.6374820890360195e-05, "loss": 5.4882, "step": 9564 }, { "epoch": 0.47, "grad_norm": 1.8958677053451538, "learning_rate": 2.6364938979198577e-05, "loss": 5.4936, "step": 9568 }, { "epoch": 0.47, "grad_norm": 1.7627476453781128, "learning_rate": 2.635505706803696e-05, "loss": 5.4682, "step": 9572 }, { "epoch": 0.47, "grad_norm": 1.768936276435852, "learning_rate": 2.634517515687534e-05, "loss": 5.5129, "step": 9576 }, { "epoch": 0.47, "grad_norm": 1.9146132469177246, "learning_rate": 2.633529324571372e-05, "loss": 5.7007, "step": 9580 }, { "epoch": 0.47, "grad_norm": 1.9375848770141602, "learning_rate": 2.6325411334552102e-05, "loss": 5.6438, "step": 9584 }, { "epoch": 0.47, "grad_norm": 1.8867487907409668, "learning_rate": 2.6315529423390484e-05, "loss": 5.5024, "step": 9588 }, { "epoch": 0.47, "grad_norm": 1.8224083185195923, "learning_rate": 2.6305647512228866e-05, "loss": 5.4406, "step": 9592 }, { "epoch": 0.47, "grad_norm": 1.8230053186416626, "learning_rate": 2.6295765601067245e-05, "loss": 5.6339, "step": 9596 }, { "epoch": 0.47, "grad_norm": 1.9611897468566895, "learning_rate": 2.6285883689905627e-05, "loss": 5.377, "step": 9600 }, { "epoch": 0.47, "grad_norm": 1.842227816581726, "learning_rate": 2.627600177874401e-05, "loss": 5.61, "step": 9604 }, { "epoch": 0.47, "grad_norm": 1.8939987421035767, "learning_rate": 2.6266119867582388e-05, "loss": 5.6035, "step": 9608 }, { "epoch": 0.47, "grad_norm": 1.800186276435852, "learning_rate": 2.625623795642077e-05, "loss": 5.6362, "step": 9612 }, { "epoch": 0.48, "grad_norm": 2.1012604236602783, "learning_rate": 2.6246356045259152e-05, "loss": 5.7597, "step": 9616 }, { "epoch": 0.48, "grad_norm": 1.9128572940826416, "learning_rate": 2.6236474134097537e-05, "loss": 5.5882, "step": 9620 }, { "epoch": 0.48, "grad_norm": 1.9831055402755737, "learning_rate": 2.622659222293592e-05, "loss": 5.6463, "step": 9624 }, { "epoch": 0.48, "grad_norm": 1.7044267654418945, "learning_rate": 2.62167103117743e-05, "loss": 5.4153, "step": 9628 }, { "epoch": 0.48, "grad_norm": 1.859174132347107, "learning_rate": 2.620682840061268e-05, "loss": 5.4704, "step": 9632 }, { "epoch": 0.48, "grad_norm": 1.8365576267242432, "learning_rate": 2.6196946489451062e-05, "loss": 5.6485, "step": 9636 }, { "epoch": 0.48, "grad_norm": 1.8253902196884155, "learning_rate": 2.6187064578289444e-05, "loss": 5.6777, "step": 9640 }, { "epoch": 0.48, "grad_norm": 1.8828972578048706, "learning_rate": 2.6177182667127826e-05, "loss": 5.4909, "step": 9644 }, { "epoch": 0.48, "grad_norm": 1.7956063747406006, "learning_rate": 2.6167300755966205e-05, "loss": 5.5966, "step": 9648 }, { "epoch": 0.48, "grad_norm": 2.1321167945861816, "learning_rate": 2.6157418844804587e-05, "loss": 5.6254, "step": 9652 }, { "epoch": 0.48, "grad_norm": 1.7192585468292236, "learning_rate": 2.614753693364297e-05, "loss": 5.4451, "step": 9656 }, { "epoch": 0.48, "grad_norm": 1.9337345361709595, "learning_rate": 2.613765502248135e-05, "loss": 5.6618, "step": 9660 }, { "epoch": 0.48, "grad_norm": 2.0022127628326416, "learning_rate": 2.612777311131973e-05, "loss": 5.5478, "step": 9664 }, { "epoch": 0.48, "grad_norm": 1.6713944673538208, "learning_rate": 2.6117891200158112e-05, "loss": 5.5078, "step": 9668 }, { "epoch": 0.48, "grad_norm": 1.902187466621399, "learning_rate": 2.6108009288996494e-05, "loss": 5.6425, "step": 9672 }, { "epoch": 0.48, "grad_norm": 2.1489810943603516, "learning_rate": 2.6098127377834876e-05, "loss": 5.6363, "step": 9676 }, { "epoch": 0.48, "grad_norm": 1.7200430631637573, "learning_rate": 2.6088245466673255e-05, "loss": 5.6002, "step": 9680 }, { "epoch": 0.48, "grad_norm": 1.9988877773284912, "learning_rate": 2.6078363555511637e-05, "loss": 5.6169, "step": 9684 }, { "epoch": 0.48, "grad_norm": 1.99684476852417, "learning_rate": 2.606848164435002e-05, "loss": 5.4686, "step": 9688 }, { "epoch": 0.48, "grad_norm": 1.6895607709884644, "learning_rate": 2.6058599733188398e-05, "loss": 5.566, "step": 9692 }, { "epoch": 0.48, "grad_norm": 1.6524453163146973, "learning_rate": 2.604871782202678e-05, "loss": 5.5477, "step": 9696 }, { "epoch": 0.48, "grad_norm": 1.7908339500427246, "learning_rate": 2.603883591086516e-05, "loss": 5.7, "step": 9700 }, { "epoch": 0.48, "grad_norm": 1.8683209419250488, "learning_rate": 2.6028953999703544e-05, "loss": 5.4894, "step": 9704 }, { "epoch": 0.48, "grad_norm": 1.883863925933838, "learning_rate": 2.6019072088541922e-05, "loss": 5.6895, "step": 9708 }, { "epoch": 0.48, "grad_norm": 1.8453763723373413, "learning_rate": 2.6009190177380304e-05, "loss": 5.4554, "step": 9712 }, { "epoch": 0.48, "grad_norm": 1.7885375022888184, "learning_rate": 2.5999308266218686e-05, "loss": 5.6365, "step": 9716 }, { "epoch": 0.48, "grad_norm": 1.9146047830581665, "learning_rate": 2.598942635505707e-05, "loss": 5.5714, "step": 9720 }, { "epoch": 0.48, "grad_norm": 1.9622520208358765, "learning_rate": 2.5979544443895447e-05, "loss": 5.6337, "step": 9724 }, { "epoch": 0.48, "grad_norm": 2.236665725708008, "learning_rate": 2.596966253273383e-05, "loss": 5.5048, "step": 9728 }, { "epoch": 0.48, "grad_norm": 1.8453449010849, "learning_rate": 2.595978062157221e-05, "loss": 5.6236, "step": 9732 }, { "epoch": 0.48, "grad_norm": 1.9595757722854614, "learning_rate": 2.5949898710410597e-05, "loss": 5.6084, "step": 9736 }, { "epoch": 0.48, "grad_norm": 2.1944825649261475, "learning_rate": 2.594001679924898e-05, "loss": 5.4383, "step": 9740 }, { "epoch": 0.48, "grad_norm": 1.821465015411377, "learning_rate": 2.593013488808736e-05, "loss": 5.4977, "step": 9744 }, { "epoch": 0.48, "grad_norm": 1.7616369724273682, "learning_rate": 2.592025297692574e-05, "loss": 5.5552, "step": 9748 }, { "epoch": 0.48, "grad_norm": 1.8618212938308716, "learning_rate": 2.591037106576412e-05, "loss": 5.6036, "step": 9752 }, { "epoch": 0.48, "grad_norm": 2.120798110961914, "learning_rate": 2.5900489154602504e-05, "loss": 5.4679, "step": 9756 }, { "epoch": 0.48, "grad_norm": 2.2172162532806396, "learning_rate": 2.5890607243440886e-05, "loss": 5.583, "step": 9760 }, { "epoch": 0.48, "grad_norm": 1.9342153072357178, "learning_rate": 2.5880725332279264e-05, "loss": 5.5783, "step": 9764 }, { "epoch": 0.48, "grad_norm": 1.6747381687164307, "learning_rate": 2.5870843421117647e-05, "loss": 5.6047, "step": 9768 }, { "epoch": 0.48, "grad_norm": 2.0254557132720947, "learning_rate": 2.586096150995603e-05, "loss": 5.6368, "step": 9772 }, { "epoch": 0.48, "grad_norm": 1.9324589967727661, "learning_rate": 2.5851079598794407e-05, "loss": 5.6284, "step": 9776 }, { "epoch": 0.48, "grad_norm": 1.6605185270309448, "learning_rate": 2.584119768763279e-05, "loss": 5.5681, "step": 9780 }, { "epoch": 0.48, "grad_norm": 1.6846225261688232, "learning_rate": 2.583131577647117e-05, "loss": 5.5159, "step": 9784 }, { "epoch": 0.48, "grad_norm": 2.0729458332061768, "learning_rate": 2.5821433865309553e-05, "loss": 5.5701, "step": 9788 }, { "epoch": 0.48, "grad_norm": 1.831186056137085, "learning_rate": 2.5811551954147932e-05, "loss": 5.5996, "step": 9792 }, { "epoch": 0.48, "grad_norm": 1.744837999343872, "learning_rate": 2.5801670042986314e-05, "loss": 5.5798, "step": 9796 }, { "epoch": 0.48, "grad_norm": 1.7464189529418945, "learning_rate": 2.5791788131824696e-05, "loss": 5.4147, "step": 9800 }, { "epoch": 0.48, "grad_norm": 2.083294630050659, "learning_rate": 2.5781906220663078e-05, "loss": 5.5884, "step": 9804 }, { "epoch": 0.48, "grad_norm": 1.848488211631775, "learning_rate": 2.5772024309501457e-05, "loss": 5.4192, "step": 9808 }, { "epoch": 0.48, "grad_norm": 1.8457578420639038, "learning_rate": 2.576214239833984e-05, "loss": 5.6189, "step": 9812 }, { "epoch": 0.49, "grad_norm": 1.9450173377990723, "learning_rate": 2.575226048717822e-05, "loss": 5.5665, "step": 9816 }, { "epoch": 0.49, "grad_norm": 1.7913298606872559, "learning_rate": 2.57423785760166e-05, "loss": 5.5333, "step": 9820 }, { "epoch": 0.49, "grad_norm": 1.8808374404907227, "learning_rate": 2.5732496664854982e-05, "loss": 5.4615, "step": 9824 }, { "epoch": 0.49, "grad_norm": 2.1443545818328857, "learning_rate": 2.5722614753693364e-05, "loss": 5.599, "step": 9828 }, { "epoch": 0.49, "grad_norm": 1.867958903312683, "learning_rate": 2.5712732842531746e-05, "loss": 5.6383, "step": 9832 }, { "epoch": 0.49, "grad_norm": 1.9032071828842163, "learning_rate": 2.5702850931370125e-05, "loss": 5.6756, "step": 9836 }, { "epoch": 0.49, "grad_norm": 2.0405712127685547, "learning_rate": 2.5692969020208507e-05, "loss": 5.5741, "step": 9840 }, { "epoch": 0.49, "grad_norm": 1.9483815431594849, "learning_rate": 2.568308710904689e-05, "loss": 5.5603, "step": 9844 }, { "epoch": 0.49, "grad_norm": 1.6286249160766602, "learning_rate": 2.5673205197885274e-05, "loss": 5.5213, "step": 9848 }, { "epoch": 0.49, "grad_norm": 2.009946346282959, "learning_rate": 2.5663323286723656e-05, "loss": 5.6207, "step": 9852 }, { "epoch": 0.49, "grad_norm": 1.7582067251205444, "learning_rate": 2.5653441375562038e-05, "loss": 5.6453, "step": 9856 }, { "epoch": 0.49, "grad_norm": 1.7018730640411377, "learning_rate": 2.5643559464400417e-05, "loss": 5.6713, "step": 9860 }, { "epoch": 0.49, "grad_norm": 1.920355200767517, "learning_rate": 2.56336775532388e-05, "loss": 5.6109, "step": 9864 }, { "epoch": 0.49, "grad_norm": 1.79157555103302, "learning_rate": 2.562379564207718e-05, "loss": 5.5102, "step": 9868 }, { "epoch": 0.49, "grad_norm": 1.7163673639297485, "learning_rate": 2.5613913730915563e-05, "loss": 5.6472, "step": 9872 }, { "epoch": 0.49, "grad_norm": 1.8523849248886108, "learning_rate": 2.5604031819753942e-05, "loss": 5.6162, "step": 9876 }, { "epoch": 0.49, "grad_norm": 2.006044626235962, "learning_rate": 2.5594149908592324e-05, "loss": 5.6793, "step": 9880 }, { "epoch": 0.49, "grad_norm": 1.745415449142456, "learning_rate": 2.558673847522111e-05, "loss": 5.5417, "step": 9884 }, { "epoch": 0.49, "grad_norm": 1.9379210472106934, "learning_rate": 2.5576856564059492e-05, "loss": 5.6047, "step": 9888 }, { "epoch": 0.49, "grad_norm": 1.8513517379760742, "learning_rate": 2.5566974652897875e-05, "loss": 5.6515, "step": 9892 }, { "epoch": 0.49, "grad_norm": 1.8587697744369507, "learning_rate": 2.5557092741736253e-05, "loss": 5.5598, "step": 9896 }, { "epoch": 0.49, "grad_norm": 1.9905014038085938, "learning_rate": 2.5547210830574635e-05, "loss": 5.6137, "step": 9900 }, { "epoch": 0.49, "grad_norm": 1.83003568649292, "learning_rate": 2.5537328919413017e-05, "loss": 5.6503, "step": 9904 }, { "epoch": 0.49, "grad_norm": 1.71113121509552, "learning_rate": 2.55274470082514e-05, "loss": 5.561, "step": 9908 }, { "epoch": 0.49, "grad_norm": 1.6225950717926025, "learning_rate": 2.5517565097089778e-05, "loss": 5.4564, "step": 9912 }, { "epoch": 0.49, "grad_norm": 1.8159841299057007, "learning_rate": 2.550768318592816e-05, "loss": 5.5984, "step": 9916 }, { "epoch": 0.49, "grad_norm": 2.003652572631836, "learning_rate": 2.5497801274766542e-05, "loss": 5.4597, "step": 9920 }, { "epoch": 0.49, "grad_norm": 1.8188750743865967, "learning_rate": 2.5487919363604924e-05, "loss": 5.46, "step": 9924 }, { "epoch": 0.49, "grad_norm": 1.832154631614685, "learning_rate": 2.5478037452443303e-05, "loss": 5.5748, "step": 9928 }, { "epoch": 0.49, "grad_norm": 1.9353375434875488, "learning_rate": 2.5468155541281685e-05, "loss": 5.5672, "step": 9932 }, { "epoch": 0.49, "grad_norm": 1.9940686225891113, "learning_rate": 2.5458273630120067e-05, "loss": 5.5623, "step": 9936 }, { "epoch": 0.49, "grad_norm": 1.8885998725891113, "learning_rate": 2.544839171895845e-05, "loss": 5.6176, "step": 9940 }, { "epoch": 0.49, "grad_norm": 1.8526244163513184, "learning_rate": 2.5438509807796828e-05, "loss": 5.6358, "step": 9944 }, { "epoch": 0.49, "grad_norm": 1.8423300981521606, "learning_rate": 2.542862789663521e-05, "loss": 5.5085, "step": 9948 }, { "epoch": 0.49, "grad_norm": 1.6946587562561035, "learning_rate": 2.5418745985473592e-05, "loss": 5.452, "step": 9952 }, { "epoch": 0.49, "grad_norm": 1.8842428922653198, "learning_rate": 2.540886407431197e-05, "loss": 5.4547, "step": 9956 }, { "epoch": 0.49, "grad_norm": 1.8062175512313843, "learning_rate": 2.5398982163150353e-05, "loss": 5.5232, "step": 9960 }, { "epoch": 0.49, "grad_norm": 1.968763828277588, "learning_rate": 2.5389100251988735e-05, "loss": 5.6634, "step": 9964 }, { "epoch": 0.49, "grad_norm": 1.782151460647583, "learning_rate": 2.5379218340827117e-05, "loss": 5.4746, "step": 9968 }, { "epoch": 0.49, "grad_norm": 2.0645010471343994, "learning_rate": 2.5369336429665495e-05, "loss": 5.5755, "step": 9972 }, { "epoch": 0.49, "grad_norm": 2.081439733505249, "learning_rate": 2.5359454518503877e-05, "loss": 5.5264, "step": 9976 }, { "epoch": 0.49, "grad_norm": 1.9738670587539673, "learning_rate": 2.534957260734226e-05, "loss": 5.5369, "step": 9980 }, { "epoch": 0.49, "grad_norm": 2.08581805229187, "learning_rate": 2.533969069618064e-05, "loss": 5.6849, "step": 9984 }, { "epoch": 0.49, "grad_norm": 1.998647928237915, "learning_rate": 2.532980878501902e-05, "loss": 5.628, "step": 9988 }, { "epoch": 0.49, "grad_norm": 1.9414081573486328, "learning_rate": 2.5319926873857402e-05, "loss": 5.5865, "step": 9992 }, { "epoch": 0.49, "grad_norm": 1.7478708028793335, "learning_rate": 2.5310044962695788e-05, "loss": 5.4073, "step": 9996 }, { "epoch": 0.49, "grad_norm": 2.031409740447998, "learning_rate": 2.530016305153417e-05, "loss": 5.6781, "step": 10000 }, { "epoch": 0.49, "grad_norm": 1.8171534538269043, "learning_rate": 2.5290281140372552e-05, "loss": 5.6865, "step": 10004 }, { "epoch": 0.49, "grad_norm": 2.128012180328369, "learning_rate": 2.5280399229210934e-05, "loss": 5.6194, "step": 10008 }, { "epoch": 0.49, "grad_norm": 2.1675913333892822, "learning_rate": 2.5270517318049313e-05, "loss": 5.4726, "step": 10012 }, { "epoch": 0.49, "grad_norm": 2.229329824447632, "learning_rate": 2.5260635406887695e-05, "loss": 5.564, "step": 10016 }, { "epoch": 0.5, "grad_norm": 1.8510156869888306, "learning_rate": 2.5250753495726077e-05, "loss": 5.4116, "step": 10020 }, { "epoch": 0.5, "grad_norm": 1.9130499362945557, "learning_rate": 2.524087158456446e-05, "loss": 5.5758, "step": 10024 }, { "epoch": 0.5, "grad_norm": 2.073378562927246, "learning_rate": 2.5230989673402837e-05, "loss": 5.5724, "step": 10028 }, { "epoch": 0.5, "grad_norm": 2.248762845993042, "learning_rate": 2.522110776224122e-05, "loss": 5.6893, "step": 10032 }, { "epoch": 0.5, "grad_norm": 2.0180504322052, "learning_rate": 2.52112258510796e-05, "loss": 5.642, "step": 10036 }, { "epoch": 0.5, "grad_norm": 1.786154866218567, "learning_rate": 2.520134393991798e-05, "loss": 5.6359, "step": 10040 }, { "epoch": 0.5, "grad_norm": 1.8328882455825806, "learning_rate": 2.5191462028756362e-05, "loss": 5.5998, "step": 10044 }, { "epoch": 0.5, "grad_norm": 2.1407294273376465, "learning_rate": 2.5181580117594744e-05, "loss": 5.7289, "step": 10048 }, { "epoch": 0.5, "grad_norm": 1.6847882270812988, "learning_rate": 2.5171698206433126e-05, "loss": 5.6441, "step": 10052 }, { "epoch": 0.5, "grad_norm": 1.7295290231704712, "learning_rate": 2.5161816295271505e-05, "loss": 5.5917, "step": 10056 }, { "epoch": 0.5, "grad_norm": 2.069204807281494, "learning_rate": 2.5151934384109887e-05, "loss": 5.3987, "step": 10060 }, { "epoch": 0.5, "grad_norm": 1.8884251117706299, "learning_rate": 2.514205247294827e-05, "loss": 5.5554, "step": 10064 }, { "epoch": 0.5, "grad_norm": 1.767871618270874, "learning_rate": 2.513217056178665e-05, "loss": 5.6294, "step": 10068 }, { "epoch": 0.5, "grad_norm": 1.8906821012496948, "learning_rate": 2.512228865062503e-05, "loss": 5.5464, "step": 10072 }, { "epoch": 0.5, "grad_norm": 1.6515437364578247, "learning_rate": 2.5112406739463412e-05, "loss": 5.4848, "step": 10076 }, { "epoch": 0.5, "grad_norm": 2.0448694229125977, "learning_rate": 2.5102524828301794e-05, "loss": 5.5193, "step": 10080 }, { "epoch": 0.5, "grad_norm": 1.867287278175354, "learning_rate": 2.5092642917140173e-05, "loss": 5.5843, "step": 10084 }, { "epoch": 0.5, "grad_norm": 1.833117961883545, "learning_rate": 2.5082761005978555e-05, "loss": 5.4566, "step": 10088 }, { "epoch": 0.5, "grad_norm": 1.842322826385498, "learning_rate": 2.5072879094816937e-05, "loss": 5.5783, "step": 10092 }, { "epoch": 0.5, "grad_norm": 1.7041923999786377, "learning_rate": 2.506299718365532e-05, "loss": 5.6291, "step": 10096 }, { "epoch": 0.5, "grad_norm": 1.9583604335784912, "learning_rate": 2.5053115272493698e-05, "loss": 5.5434, "step": 10100 }, { "epoch": 0.5, "grad_norm": 1.8357032537460327, "learning_rate": 2.504323336133208e-05, "loss": 5.5751, "step": 10104 }, { "epoch": 0.5, "grad_norm": 1.9538915157318115, "learning_rate": 2.503335145017047e-05, "loss": 5.7138, "step": 10108 }, { "epoch": 0.5, "grad_norm": 2.0937674045562744, "learning_rate": 2.5023469539008847e-05, "loss": 5.5237, "step": 10112 }, { "epoch": 0.5, "grad_norm": 1.9279645681381226, "learning_rate": 2.501358762784723e-05, "loss": 5.6076, "step": 10116 }, { "epoch": 0.5, "grad_norm": 1.8550045490264893, "learning_rate": 2.500370571668561e-05, "loss": 5.5574, "step": 10120 }, { "epoch": 0.5, "grad_norm": 2.0821774005889893, "learning_rate": 2.499382380552399e-05, "loss": 5.532, "step": 10124 }, { "epoch": 0.5, "grad_norm": 2.088787078857422, "learning_rate": 2.498394189436237e-05, "loss": 5.6185, "step": 10128 }, { "epoch": 0.5, "grad_norm": 1.7520018815994263, "learning_rate": 2.497405998320075e-05, "loss": 5.5648, "step": 10132 }, { "epoch": 0.5, "grad_norm": 1.963576078414917, "learning_rate": 2.4964178072039136e-05, "loss": 5.6652, "step": 10136 }, { "epoch": 0.5, "grad_norm": 1.9875547885894775, "learning_rate": 2.4954296160877515e-05, "loss": 5.5179, "step": 10140 }, { "epoch": 0.5, "grad_norm": 1.9994404315948486, "learning_rate": 2.4944414249715897e-05, "loss": 5.6485, "step": 10144 }, { "epoch": 0.5, "grad_norm": 2.0803356170654297, "learning_rate": 2.493453233855428e-05, "loss": 5.4617, "step": 10148 }, { "epoch": 0.5, "grad_norm": 1.6625994443893433, "learning_rate": 2.492465042739266e-05, "loss": 5.5363, "step": 10152 }, { "epoch": 0.5, "grad_norm": 1.8753883838653564, "learning_rate": 2.491476851623104e-05, "loss": 5.3869, "step": 10156 }, { "epoch": 0.5, "grad_norm": 1.644220232963562, "learning_rate": 2.4904886605069422e-05, "loss": 5.5971, "step": 10160 }, { "epoch": 0.5, "grad_norm": 2.017249584197998, "learning_rate": 2.4895004693907804e-05, "loss": 5.506, "step": 10164 }, { "epoch": 0.5, "grad_norm": 2.240140914916992, "learning_rate": 2.4885122782746182e-05, "loss": 5.6691, "step": 10168 }, { "epoch": 0.5, "grad_norm": 1.842436671257019, "learning_rate": 2.4875240871584565e-05, "loss": 5.5673, "step": 10172 }, { "epoch": 0.5, "grad_norm": 1.8828648328781128, "learning_rate": 2.4865358960422947e-05, "loss": 5.5275, "step": 10176 }, { "epoch": 0.5, "grad_norm": 2.1830368041992188, "learning_rate": 2.485547704926133e-05, "loss": 5.6258, "step": 10180 }, { "epoch": 0.5, "grad_norm": 2.012813091278076, "learning_rate": 2.4845595138099707e-05, "loss": 5.5947, "step": 10184 }, { "epoch": 0.5, "grad_norm": 1.7969881296157837, "learning_rate": 2.483571322693809e-05, "loss": 5.564, "step": 10188 }, { "epoch": 0.5, "grad_norm": 1.8949358463287354, "learning_rate": 2.482583131577647e-05, "loss": 5.6213, "step": 10192 }, { "epoch": 0.5, "grad_norm": 1.8343627452850342, "learning_rate": 2.4815949404614854e-05, "loss": 5.5869, "step": 10196 }, { "epoch": 0.5, "grad_norm": 1.8885095119476318, "learning_rate": 2.4806067493453236e-05, "loss": 5.586, "step": 10200 }, { "epoch": 0.5, "grad_norm": 2.0102386474609375, "learning_rate": 2.4796185582291618e-05, "loss": 5.5219, "step": 10204 }, { "epoch": 0.5, "grad_norm": 2.127244472503662, "learning_rate": 2.478630367113e-05, "loss": 5.6196, "step": 10208 }, { "epoch": 0.5, "grad_norm": 1.7955610752105713, "learning_rate": 2.477642175996838e-05, "loss": 5.5848, "step": 10212 }, { "epoch": 0.5, "grad_norm": 1.9604214429855347, "learning_rate": 2.476653984880676e-05, "loss": 5.6014, "step": 10216 }, { "epoch": 0.5, "grad_norm": 2.11737322807312, "learning_rate": 2.4756657937645142e-05, "loss": 5.5377, "step": 10220 }, { "epoch": 0.51, "grad_norm": 2.0900325775146484, "learning_rate": 2.4746776026483525e-05, "loss": 5.5901, "step": 10224 }, { "epoch": 0.51, "grad_norm": 1.7925187349319458, "learning_rate": 2.4736894115321903e-05, "loss": 5.4441, "step": 10228 }, { "epoch": 0.51, "grad_norm": 2.107675790786743, "learning_rate": 2.4727012204160285e-05, "loss": 5.5537, "step": 10232 }, { "epoch": 0.51, "grad_norm": 2.0693657398223877, "learning_rate": 2.4717130292998667e-05, "loss": 5.5913, "step": 10236 }, { "epoch": 0.51, "grad_norm": 2.158755302429199, "learning_rate": 2.4707248381837046e-05, "loss": 5.7313, "step": 10240 }, { "epoch": 0.51, "grad_norm": 1.9274017810821533, "learning_rate": 2.4697366470675428e-05, "loss": 5.5108, "step": 10244 }, { "epoch": 0.51, "grad_norm": 1.8750487565994263, "learning_rate": 2.468748455951381e-05, "loss": 5.5994, "step": 10248 }, { "epoch": 0.51, "grad_norm": 1.6671373844146729, "learning_rate": 2.4677602648352192e-05, "loss": 5.6319, "step": 10252 }, { "epoch": 0.51, "grad_norm": 1.721057415008545, "learning_rate": 2.4667720737190574e-05, "loss": 5.5567, "step": 10256 }, { "epoch": 0.51, "grad_norm": 1.7472023963928223, "learning_rate": 2.4657838826028956e-05, "loss": 5.5035, "step": 10260 }, { "epoch": 0.51, "grad_norm": 1.897161602973938, "learning_rate": 2.464795691486734e-05, "loss": 5.5944, "step": 10264 }, { "epoch": 0.51, "grad_norm": 1.9217979907989502, "learning_rate": 2.4638075003705717e-05, "loss": 5.5253, "step": 10268 }, { "epoch": 0.51, "grad_norm": 2.010005474090576, "learning_rate": 2.46281930925441e-05, "loss": 5.7227, "step": 10272 }, { "epoch": 0.51, "grad_norm": 1.7467890977859497, "learning_rate": 2.461831118138248e-05, "loss": 5.5426, "step": 10276 }, { "epoch": 0.51, "grad_norm": 1.9532781839370728, "learning_rate": 2.4608429270220863e-05, "loss": 5.5081, "step": 10280 }, { "epoch": 0.51, "grad_norm": 1.979059100151062, "learning_rate": 2.4598547359059242e-05, "loss": 5.5655, "step": 10284 }, { "epoch": 0.51, "grad_norm": 1.7593183517456055, "learning_rate": 2.4588665447897624e-05, "loss": 5.5235, "step": 10288 }, { "epoch": 0.51, "grad_norm": 1.8140133619308472, "learning_rate": 2.4578783536736006e-05, "loss": 5.5346, "step": 10292 }, { "epoch": 0.51, "grad_norm": 1.9852521419525146, "learning_rate": 2.4568901625574388e-05, "loss": 5.5365, "step": 10296 }, { "epoch": 0.51, "grad_norm": 2.00048828125, "learning_rate": 2.4559019714412767e-05, "loss": 5.5772, "step": 10300 }, { "epoch": 0.51, "grad_norm": 1.9229919910430908, "learning_rate": 2.454913780325115e-05, "loss": 5.4598, "step": 10304 }, { "epoch": 0.51, "grad_norm": 1.9021358489990234, "learning_rate": 2.453925589208953e-05, "loss": 5.4991, "step": 10308 }, { "epoch": 0.51, "grad_norm": 1.9008359909057617, "learning_rate": 2.4529373980927913e-05, "loss": 5.5531, "step": 10312 }, { "epoch": 0.51, "grad_norm": 1.8930054903030396, "learning_rate": 2.4519492069766295e-05, "loss": 5.5356, "step": 10316 }, { "epoch": 0.51, "grad_norm": 1.8766874074935913, "learning_rate": 2.4509610158604677e-05, "loss": 5.6213, "step": 10320 }, { "epoch": 0.51, "grad_norm": 1.8489048480987549, "learning_rate": 2.4499728247443056e-05, "loss": 5.5647, "step": 10324 }, { "epoch": 0.51, "grad_norm": 2.0344486236572266, "learning_rate": 2.4489846336281438e-05, "loss": 5.5734, "step": 10328 }, { "epoch": 0.51, "grad_norm": 1.83133864402771, "learning_rate": 2.447996442511982e-05, "loss": 5.4536, "step": 10332 }, { "epoch": 0.51, "grad_norm": 1.6086121797561646, "learning_rate": 2.4470082513958202e-05, "loss": 5.5275, "step": 10336 }, { "epoch": 0.51, "grad_norm": 2.0024595260620117, "learning_rate": 2.446020060279658e-05, "loss": 5.5924, "step": 10340 }, { "epoch": 0.51, "grad_norm": 1.9737377166748047, "learning_rate": 2.4450318691634963e-05, "loss": 5.5466, "step": 10344 }, { "epoch": 0.51, "grad_norm": 1.9475603103637695, "learning_rate": 2.4440436780473345e-05, "loss": 5.5415, "step": 10348 }, { "epoch": 0.51, "grad_norm": 2.1475892066955566, "learning_rate": 2.4430554869311727e-05, "loss": 5.6749, "step": 10352 }, { "epoch": 0.51, "grad_norm": 1.7354161739349365, "learning_rate": 2.4420672958150105e-05, "loss": 5.4942, "step": 10356 }, { "epoch": 0.51, "grad_norm": 1.9726040363311768, "learning_rate": 2.4410791046988487e-05, "loss": 5.5258, "step": 10360 }, { "epoch": 0.51, "grad_norm": 1.7719279527664185, "learning_rate": 2.440090913582687e-05, "loss": 5.5169, "step": 10364 }, { "epoch": 0.51, "grad_norm": 1.8121774196624756, "learning_rate": 2.439102722466525e-05, "loss": 5.5811, "step": 10368 }, { "epoch": 0.51, "grad_norm": 1.656872034072876, "learning_rate": 2.4381145313503634e-05, "loss": 5.6121, "step": 10372 }, { "epoch": 0.51, "grad_norm": 1.9030474424362183, "learning_rate": 2.4371263402342016e-05, "loss": 5.5099, "step": 10376 }, { "epoch": 0.51, "grad_norm": 1.8425421714782715, "learning_rate": 2.4361381491180398e-05, "loss": 5.5167, "step": 10380 }, { "epoch": 0.51, "grad_norm": 2.0104122161865234, "learning_rate": 2.4351499580018776e-05, "loss": 5.629, "step": 10384 }, { "epoch": 0.51, "grad_norm": 1.7906516790390015, "learning_rate": 2.434161766885716e-05, "loss": 5.632, "step": 10388 }, { "epoch": 0.51, "grad_norm": 1.966325044631958, "learning_rate": 2.433173575769554e-05, "loss": 5.6271, "step": 10392 }, { "epoch": 0.51, "grad_norm": 1.9197744131088257, "learning_rate": 2.432185384653392e-05, "loss": 5.5729, "step": 10396 }, { "epoch": 0.51, "grad_norm": 1.9084253311157227, "learning_rate": 2.43119719353723e-05, "loss": 5.5213, "step": 10400 }, { "epoch": 0.51, "grad_norm": 1.743037223815918, "learning_rate": 2.4302090024210683e-05, "loss": 5.6202, "step": 10404 }, { "epoch": 0.51, "grad_norm": 1.6884185075759888, "learning_rate": 2.4292208113049065e-05, "loss": 5.52, "step": 10408 }, { "epoch": 0.51, "grad_norm": 1.9082211256027222, "learning_rate": 2.4282326201887444e-05, "loss": 5.6913, "step": 10412 }, { "epoch": 0.51, "grad_norm": 1.7120208740234375, "learning_rate": 2.4272444290725826e-05, "loss": 5.536, "step": 10416 }, { "epoch": 0.51, "grad_norm": 2.0013391971588135, "learning_rate": 2.4262562379564208e-05, "loss": 5.6444, "step": 10420 }, { "epoch": 0.52, "grad_norm": 1.751233458518982, "learning_rate": 2.425268046840259e-05, "loss": 5.4514, "step": 10424 }, { "epoch": 0.52, "grad_norm": 1.7701613903045654, "learning_rate": 2.4242798557240972e-05, "loss": 5.5492, "step": 10428 }, { "epoch": 0.52, "grad_norm": 1.672849416732788, "learning_rate": 2.4232916646079354e-05, "loss": 5.5202, "step": 10432 }, { "epoch": 0.52, "grad_norm": 1.9248921871185303, "learning_rate": 2.4223034734917736e-05, "loss": 5.5876, "step": 10436 }, { "epoch": 0.52, "grad_norm": 1.7727632522583008, "learning_rate": 2.4213152823756115e-05, "loss": 5.5529, "step": 10440 }, { "epoch": 0.52, "grad_norm": 1.9755526781082153, "learning_rate": 2.4203270912594497e-05, "loss": 5.6183, "step": 10444 }, { "epoch": 0.52, "grad_norm": 1.7922430038452148, "learning_rate": 2.419338900143288e-05, "loss": 5.5369, "step": 10448 }, { "epoch": 0.52, "grad_norm": 2.185535430908203, "learning_rate": 2.4183507090271258e-05, "loss": 5.6702, "step": 10452 }, { "epoch": 0.52, "grad_norm": 1.659045934677124, "learning_rate": 2.417362517910964e-05, "loss": 5.4956, "step": 10456 }, { "epoch": 0.52, "grad_norm": 1.8726792335510254, "learning_rate": 2.4163743267948022e-05, "loss": 5.5971, "step": 10460 }, { "epoch": 0.52, "grad_norm": 1.8656306266784668, "learning_rate": 2.4153861356786404e-05, "loss": 5.6505, "step": 10464 }, { "epoch": 0.52, "grad_norm": 2.1114795207977295, "learning_rate": 2.4143979445624783e-05, "loss": 5.6579, "step": 10468 }, { "epoch": 0.52, "grad_norm": 1.7665212154388428, "learning_rate": 2.4134097534463165e-05, "loss": 5.5182, "step": 10472 }, { "epoch": 0.52, "grad_norm": 2.0020570755004883, "learning_rate": 2.4124215623301547e-05, "loss": 5.6146, "step": 10476 }, { "epoch": 0.52, "grad_norm": 1.81464684009552, "learning_rate": 2.411433371213993e-05, "loss": 5.6334, "step": 10480 }, { "epoch": 0.52, "grad_norm": 2.0478081703186035, "learning_rate": 2.410445180097831e-05, "loss": 5.6295, "step": 10484 }, { "epoch": 0.52, "grad_norm": 1.9547245502471924, "learning_rate": 2.4094569889816693e-05, "loss": 5.604, "step": 10488 }, { "epoch": 0.52, "grad_norm": 2.018515110015869, "learning_rate": 2.4084687978655075e-05, "loss": 5.498, "step": 10492 }, { "epoch": 0.52, "grad_norm": 1.7030364274978638, "learning_rate": 2.4074806067493454e-05, "loss": 5.654, "step": 10496 }, { "epoch": 0.52, "grad_norm": 2.0866682529449463, "learning_rate": 2.4064924156331836e-05, "loss": 5.5155, "step": 10500 }, { "epoch": 0.52, "grad_norm": 2.075389862060547, "learning_rate": 2.4055042245170218e-05, "loss": 5.5227, "step": 10504 }, { "epoch": 0.52, "grad_norm": 1.9765968322753906, "learning_rate": 2.40451603340086e-05, "loss": 5.6732, "step": 10508 }, { "epoch": 0.52, "grad_norm": 2.0013983249664307, "learning_rate": 2.403527842284698e-05, "loss": 5.6551, "step": 10512 }, { "epoch": 0.52, "grad_norm": 2.044642448425293, "learning_rate": 2.402539651168536e-05, "loss": 5.5395, "step": 10516 }, { "epoch": 0.52, "grad_norm": 1.6758370399475098, "learning_rate": 2.4015514600523743e-05, "loss": 5.5563, "step": 10520 }, { "epoch": 0.52, "grad_norm": 1.8717275857925415, "learning_rate": 2.400563268936212e-05, "loss": 5.5039, "step": 10524 }, { "epoch": 0.52, "grad_norm": 1.9705373048782349, "learning_rate": 2.3995750778200504e-05, "loss": 5.6172, "step": 10528 }, { "epoch": 0.52, "grad_norm": 1.7623977661132812, "learning_rate": 2.3985868867038886e-05, "loss": 5.4925, "step": 10532 }, { "epoch": 0.52, "grad_norm": 1.9180505275726318, "learning_rate": 2.3975986955877268e-05, "loss": 5.6497, "step": 10536 }, { "epoch": 0.52, "grad_norm": 1.940900206565857, "learning_rate": 2.396610504471565e-05, "loss": 5.5216, "step": 10540 }, { "epoch": 0.52, "grad_norm": 1.7929134368896484, "learning_rate": 2.3956223133554032e-05, "loss": 5.483, "step": 10544 }, { "epoch": 0.52, "grad_norm": 1.8497039079666138, "learning_rate": 2.3946341222392414e-05, "loss": 5.5895, "step": 10548 }, { "epoch": 0.52, "grad_norm": 1.8685697317123413, "learning_rate": 2.3936459311230793e-05, "loss": 5.5565, "step": 10552 }, { "epoch": 0.52, "grad_norm": 2.2542967796325684, "learning_rate": 2.3926577400069175e-05, "loss": 5.5895, "step": 10556 }, { "epoch": 0.52, "grad_norm": 2.043692111968994, "learning_rate": 2.3916695488907557e-05, "loss": 5.4708, "step": 10560 }, { "epoch": 0.52, "grad_norm": 2.1556544303894043, "learning_rate": 2.390681357774594e-05, "loss": 5.5257, "step": 10564 }, { "epoch": 0.52, "grad_norm": 2.0351712703704834, "learning_rate": 2.3896931666584317e-05, "loss": 5.5247, "step": 10568 }, { "epoch": 0.52, "grad_norm": 1.7244350910186768, "learning_rate": 2.38870497554227e-05, "loss": 5.4927, "step": 10572 }, { "epoch": 0.52, "grad_norm": 2.0928471088409424, "learning_rate": 2.387716784426108e-05, "loss": 5.4176, "step": 10576 }, { "epoch": 0.52, "grad_norm": 2.0530073642730713, "learning_rate": 2.3867285933099464e-05, "loss": 5.6525, "step": 10580 }, { "epoch": 0.52, "grad_norm": 1.8700250387191772, "learning_rate": 2.3857404021937842e-05, "loss": 5.5591, "step": 10584 }, { "epoch": 0.52, "grad_norm": 1.6774561405181885, "learning_rate": 2.3847522110776224e-05, "loss": 5.5925, "step": 10588 }, { "epoch": 0.52, "grad_norm": 2.1128244400024414, "learning_rate": 2.3837640199614606e-05, "loss": 5.5118, "step": 10592 }, { "epoch": 0.52, "grad_norm": 1.9600874185562134, "learning_rate": 2.382775828845299e-05, "loss": 5.563, "step": 10596 }, { "epoch": 0.52, "grad_norm": 1.9983313083648682, "learning_rate": 2.381787637729137e-05, "loss": 5.4649, "step": 10600 }, { "epoch": 0.52, "grad_norm": 1.9052096605300903, "learning_rate": 2.3807994466129753e-05, "loss": 5.5366, "step": 10604 }, { "epoch": 0.52, "grad_norm": 1.9954668283462524, "learning_rate": 2.379811255496813e-05, "loss": 5.5479, "step": 10608 }, { "epoch": 0.52, "grad_norm": 1.9341228008270264, "learning_rate": 2.3788230643806513e-05, "loss": 5.4649, "step": 10612 }, { "epoch": 0.52, "grad_norm": 1.9938849210739136, "learning_rate": 2.3778348732644895e-05, "loss": 5.5791, "step": 10616 }, { "epoch": 0.52, "grad_norm": 1.6775513887405396, "learning_rate": 2.3768466821483277e-05, "loss": 5.61, "step": 10620 }, { "epoch": 0.52, "grad_norm": 1.9239006042480469, "learning_rate": 2.3758584910321656e-05, "loss": 5.5689, "step": 10624 }, { "epoch": 0.53, "grad_norm": 2.060796022415161, "learning_rate": 2.3748702999160038e-05, "loss": 5.5823, "step": 10628 }, { "epoch": 0.53, "grad_norm": 1.95215904712677, "learning_rate": 2.373882108799842e-05, "loss": 5.4581, "step": 10632 }, { "epoch": 0.53, "grad_norm": 1.9908874034881592, "learning_rate": 2.3728939176836802e-05, "loss": 5.5968, "step": 10636 }, { "epoch": 0.53, "grad_norm": 1.6556943655014038, "learning_rate": 2.371905726567518e-05, "loss": 5.5399, "step": 10640 }, { "epoch": 0.53, "grad_norm": 1.9152753353118896, "learning_rate": 2.3709175354513563e-05, "loss": 5.6018, "step": 10644 }, { "epoch": 0.53, "grad_norm": 2.3656210899353027, "learning_rate": 2.3699293443351945e-05, "loss": 5.4332, "step": 10648 }, { "epoch": 0.53, "grad_norm": 1.9801435470581055, "learning_rate": 2.3689411532190324e-05, "loss": 5.5684, "step": 10652 }, { "epoch": 0.53, "grad_norm": 2.1441659927368164, "learning_rate": 2.367952962102871e-05, "loss": 5.3962, "step": 10656 }, { "epoch": 0.53, "grad_norm": 2.2612667083740234, "learning_rate": 2.366964770986709e-05, "loss": 5.5301, "step": 10660 }, { "epoch": 0.53, "grad_norm": 2.1340534687042236, "learning_rate": 2.3659765798705473e-05, "loss": 5.6125, "step": 10664 }, { "epoch": 0.53, "grad_norm": 2.0850844383239746, "learning_rate": 2.3649883887543852e-05, "loss": 5.4433, "step": 10668 }, { "epoch": 0.53, "grad_norm": 1.7150609493255615, "learning_rate": 2.3640001976382234e-05, "loss": 5.5438, "step": 10672 }, { "epoch": 0.53, "grad_norm": 1.9754505157470703, "learning_rate": 2.3630120065220616e-05, "loss": 5.6475, "step": 10676 }, { "epoch": 0.53, "grad_norm": 1.9348641633987427, "learning_rate": 2.3620238154058995e-05, "loss": 5.6183, "step": 10680 }, { "epoch": 0.53, "grad_norm": 1.9712179899215698, "learning_rate": 2.3610356242897377e-05, "loss": 5.5499, "step": 10684 }, { "epoch": 0.53, "grad_norm": 1.9912450313568115, "learning_rate": 2.360047433173576e-05, "loss": 5.534, "step": 10688 }, { "epoch": 0.53, "grad_norm": 1.8730937242507935, "learning_rate": 2.359059242057414e-05, "loss": 5.6078, "step": 10692 }, { "epoch": 0.53, "grad_norm": 2.082500696182251, "learning_rate": 2.358071050941252e-05, "loss": 5.5638, "step": 10696 }, { "epoch": 0.53, "grad_norm": 1.990276575088501, "learning_rate": 2.35708285982509e-05, "loss": 5.4854, "step": 10700 }, { "epoch": 0.53, "grad_norm": 1.8696178197860718, "learning_rate": 2.3560946687089284e-05, "loss": 5.5041, "step": 10704 }, { "epoch": 0.53, "grad_norm": 2.1446194648742676, "learning_rate": 2.3551064775927666e-05, "loss": 5.6358, "step": 10708 }, { "epoch": 0.53, "grad_norm": 1.7713229656219482, "learning_rate": 2.3541182864766048e-05, "loss": 5.5635, "step": 10712 }, { "epoch": 0.53, "grad_norm": 1.876599907875061, "learning_rate": 2.353130095360443e-05, "loss": 5.4398, "step": 10716 }, { "epoch": 0.53, "grad_norm": 1.7540398836135864, "learning_rate": 2.3521419042442812e-05, "loss": 5.4983, "step": 10720 }, { "epoch": 0.53, "grad_norm": 2.0557737350463867, "learning_rate": 2.351153713128119e-05, "loss": 5.4517, "step": 10724 }, { "epoch": 0.53, "grad_norm": 1.7068967819213867, "learning_rate": 2.3501655220119573e-05, "loss": 5.4742, "step": 10728 }, { "epoch": 0.53, "grad_norm": 1.9416307210922241, "learning_rate": 2.3491773308957955e-05, "loss": 5.6298, "step": 10732 }, { "epoch": 0.53, "grad_norm": 1.8731729984283447, "learning_rate": 2.3481891397796333e-05, "loss": 5.5531, "step": 10736 }, { "epoch": 0.53, "grad_norm": 2.060716390609741, "learning_rate": 2.3472009486634715e-05, "loss": 5.6034, "step": 10740 }, { "epoch": 0.53, "grad_norm": 2.003988742828369, "learning_rate": 2.3462127575473098e-05, "loss": 5.5876, "step": 10744 }, { "epoch": 0.53, "grad_norm": 2.0887482166290283, "learning_rate": 2.345224566431148e-05, "loss": 5.5417, "step": 10748 }, { "epoch": 0.53, "grad_norm": 2.017455577850342, "learning_rate": 2.3442363753149858e-05, "loss": 5.546, "step": 10752 }, { "epoch": 0.53, "grad_norm": 1.8705204725265503, "learning_rate": 2.343248184198824e-05, "loss": 5.6274, "step": 10756 }, { "epoch": 0.53, "grad_norm": 1.9942293167114258, "learning_rate": 2.3422599930826622e-05, "loss": 5.4287, "step": 10760 }, { "epoch": 0.53, "grad_norm": 1.894716739654541, "learning_rate": 2.3412718019665004e-05, "loss": 5.5343, "step": 10764 }, { "epoch": 0.53, "grad_norm": 2.020045042037964, "learning_rate": 2.3402836108503387e-05, "loss": 5.6098, "step": 10768 }, { "epoch": 0.53, "grad_norm": 2.01450252532959, "learning_rate": 2.339295419734177e-05, "loss": 5.5875, "step": 10772 }, { "epoch": 0.53, "grad_norm": 1.9330328702926636, "learning_rate": 2.338307228618015e-05, "loss": 5.5679, "step": 10776 }, { "epoch": 0.53, "grad_norm": 1.871109962463379, "learning_rate": 2.337319037501853e-05, "loss": 5.5312, "step": 10780 }, { "epoch": 0.53, "grad_norm": 2.02812123298645, "learning_rate": 2.336330846385691e-05, "loss": 5.3512, "step": 10784 }, { "epoch": 0.53, "grad_norm": 1.9817901849746704, "learning_rate": 2.3353426552695293e-05, "loss": 5.4152, "step": 10788 }, { "epoch": 0.53, "grad_norm": 1.9336769580841064, "learning_rate": 2.3343544641533675e-05, "loss": 5.5362, "step": 10792 }, { "epoch": 0.53, "grad_norm": 1.8532609939575195, "learning_rate": 2.3333662730372054e-05, "loss": 5.4603, "step": 10796 }, { "epoch": 0.53, "grad_norm": 1.8806451559066772, "learning_rate": 2.3323780819210436e-05, "loss": 5.3589, "step": 10800 }, { "epoch": 0.53, "grad_norm": 1.8947298526763916, "learning_rate": 2.3313898908048818e-05, "loss": 5.5674, "step": 10804 }, { "epoch": 0.53, "grad_norm": 1.9279778003692627, "learning_rate": 2.3304016996887197e-05, "loss": 5.5096, "step": 10808 }, { "epoch": 0.53, "grad_norm": 2.0926554203033447, "learning_rate": 2.329413508572558e-05, "loss": 5.6147, "step": 10812 }, { "epoch": 0.53, "grad_norm": 2.0202767848968506, "learning_rate": 2.328425317456396e-05, "loss": 5.6283, "step": 10816 }, { "epoch": 0.53, "grad_norm": 1.9681588411331177, "learning_rate": 2.3274371263402343e-05, "loss": 5.711, "step": 10820 }, { "epoch": 0.53, "grad_norm": 2.059591054916382, "learning_rate": 2.3264489352240722e-05, "loss": 5.5387, "step": 10824 }, { "epoch": 0.54, "grad_norm": 1.7772566080093384, "learning_rate": 2.3254607441079107e-05, "loss": 5.5673, "step": 10828 }, { "epoch": 0.54, "grad_norm": 2.169037103652954, "learning_rate": 2.324472552991749e-05, "loss": 5.4922, "step": 10832 }, { "epoch": 0.54, "grad_norm": 1.7867958545684814, "learning_rate": 2.3234843618755868e-05, "loss": 5.4829, "step": 10836 }, { "epoch": 0.54, "grad_norm": 2.0761024951934814, "learning_rate": 2.322496170759425e-05, "loss": 5.57, "step": 10840 }, { "epoch": 0.54, "grad_norm": 1.8901103734970093, "learning_rate": 2.3215079796432632e-05, "loss": 5.6031, "step": 10844 }, { "epoch": 0.54, "grad_norm": 1.8197063207626343, "learning_rate": 2.3205197885271014e-05, "loss": 5.6221, "step": 10848 }, { "epoch": 0.54, "grad_norm": 1.8209093809127808, "learning_rate": 2.3195315974109393e-05, "loss": 5.4747, "step": 10852 }, { "epoch": 0.54, "grad_norm": 2.1222522258758545, "learning_rate": 2.3185434062947775e-05, "loss": 5.5274, "step": 10856 }, { "epoch": 0.54, "grad_norm": 1.7312264442443848, "learning_rate": 2.3175552151786157e-05, "loss": 5.5181, "step": 10860 }, { "epoch": 0.54, "grad_norm": 1.9499653577804565, "learning_rate": 2.316567024062454e-05, "loss": 5.5653, "step": 10864 }, { "epoch": 0.54, "grad_norm": 1.7755954265594482, "learning_rate": 2.3155788329462918e-05, "loss": 5.5213, "step": 10868 }, { "epoch": 0.54, "grad_norm": 1.6512964963912964, "learning_rate": 2.31459064183013e-05, "loss": 5.4163, "step": 10872 }, { "epoch": 0.54, "grad_norm": 1.9016536474227905, "learning_rate": 2.3136024507139682e-05, "loss": 5.4877, "step": 10876 }, { "epoch": 0.54, "grad_norm": 1.9362504482269287, "learning_rate": 2.312614259597806e-05, "loss": 5.5776, "step": 10880 }, { "epoch": 0.54, "grad_norm": 2.0821099281311035, "learning_rate": 2.3116260684816446e-05, "loss": 5.4961, "step": 10884 }, { "epoch": 0.54, "grad_norm": 1.6689753532409668, "learning_rate": 2.3106378773654828e-05, "loss": 5.5967, "step": 10888 }, { "epoch": 0.54, "grad_norm": 1.8034151792526245, "learning_rate": 2.3096496862493207e-05, "loss": 5.5646, "step": 10892 }, { "epoch": 0.54, "grad_norm": 1.8747068643569946, "learning_rate": 2.308661495133159e-05, "loss": 5.599, "step": 10896 }, { "epoch": 0.54, "grad_norm": 2.044126033782959, "learning_rate": 2.307673304016997e-05, "loss": 5.4739, "step": 10900 }, { "epoch": 0.54, "grad_norm": 2.0283472537994385, "learning_rate": 2.3066851129008353e-05, "loss": 5.5447, "step": 10904 }, { "epoch": 0.54, "grad_norm": 2.0088658332824707, "learning_rate": 2.305696921784673e-05, "loss": 5.6111, "step": 10908 }, { "epoch": 0.54, "grad_norm": 1.8219811916351318, "learning_rate": 2.3047087306685114e-05, "loss": 5.6571, "step": 10912 }, { "epoch": 0.54, "grad_norm": 2.010409116744995, "learning_rate": 2.3037205395523496e-05, "loss": 5.5405, "step": 10916 }, { "epoch": 0.54, "grad_norm": 2.03324031829834, "learning_rate": 2.3027323484361878e-05, "loss": 5.633, "step": 10920 }, { "epoch": 0.54, "grad_norm": 1.9077966213226318, "learning_rate": 2.3017441573200256e-05, "loss": 5.5292, "step": 10924 }, { "epoch": 0.54, "grad_norm": 1.7984883785247803, "learning_rate": 2.300755966203864e-05, "loss": 5.5423, "step": 10928 }, { "epoch": 0.54, "grad_norm": 2.038520336151123, "learning_rate": 2.299767775087702e-05, "loss": 5.5662, "step": 10932 }, { "epoch": 0.54, "grad_norm": 2.0444748401641846, "learning_rate": 2.2987795839715403e-05, "loss": 5.5273, "step": 10936 }, { "epoch": 0.54, "grad_norm": 2.0163471698760986, "learning_rate": 2.2977913928553785e-05, "loss": 5.5836, "step": 10940 }, { "epoch": 0.54, "grad_norm": 2.246277093887329, "learning_rate": 2.2968032017392167e-05, "loss": 5.717, "step": 10944 }, { "epoch": 0.54, "grad_norm": 2.040771484375, "learning_rate": 2.295815010623055e-05, "loss": 5.5738, "step": 10948 }, { "epoch": 0.54, "grad_norm": 1.9808095693588257, "learning_rate": 2.2948268195068927e-05, "loss": 5.6387, "step": 10952 }, { "epoch": 0.54, "grad_norm": 2.1175899505615234, "learning_rate": 2.293838628390731e-05, "loss": 5.5871, "step": 10956 }, { "epoch": 0.54, "grad_norm": 1.99501633644104, "learning_rate": 2.292850437274569e-05, "loss": 5.5695, "step": 10960 }, { "epoch": 0.54, "grad_norm": 1.9630918502807617, "learning_rate": 2.291862246158407e-05, "loss": 5.5771, "step": 10964 }, { "epoch": 0.54, "grad_norm": 1.638632893562317, "learning_rate": 2.2908740550422452e-05, "loss": 5.551, "step": 10968 }, { "epoch": 0.54, "grad_norm": 1.9305760860443115, "learning_rate": 2.2898858639260834e-05, "loss": 5.5803, "step": 10972 }, { "epoch": 0.54, "grad_norm": 1.7646735906600952, "learning_rate": 2.2888976728099216e-05, "loss": 5.5042, "step": 10976 }, { "epoch": 0.54, "grad_norm": 2.2676877975463867, "learning_rate": 2.2879094816937595e-05, "loss": 5.5913, "step": 10980 }, { "epoch": 0.54, "grad_norm": 1.974547028541565, "learning_rate": 2.2869212905775977e-05, "loss": 5.5148, "step": 10984 }, { "epoch": 0.54, "grad_norm": 1.830438494682312, "learning_rate": 2.285933099461436e-05, "loss": 5.5472, "step": 10988 }, { "epoch": 0.54, "grad_norm": 2.166577100753784, "learning_rate": 2.284944908345274e-05, "loss": 5.4449, "step": 10992 }, { "epoch": 0.54, "grad_norm": 2.0472869873046875, "learning_rate": 2.283956717229112e-05, "loss": 5.6287, "step": 10996 }, { "epoch": 0.54, "grad_norm": 1.8216519355773926, "learning_rate": 2.2829685261129505e-05, "loss": 5.5427, "step": 11000 }, { "epoch": 0.54, "grad_norm": 2.0221810340881348, "learning_rate": 2.2819803349967887e-05, "loss": 5.6037, "step": 11004 }, { "epoch": 0.54, "grad_norm": 1.8711715936660767, "learning_rate": 2.2809921438806266e-05, "loss": 5.5891, "step": 11008 }, { "epoch": 0.54, "grad_norm": 1.8653993606567383, "learning_rate": 2.2800039527644648e-05, "loss": 5.4277, "step": 11012 }, { "epoch": 0.54, "grad_norm": 1.9809545278549194, "learning_rate": 2.279015761648303e-05, "loss": 5.4548, "step": 11016 }, { "epoch": 0.54, "grad_norm": 1.9581102132797241, "learning_rate": 2.278027570532141e-05, "loss": 5.5037, "step": 11020 }, { "epoch": 0.54, "grad_norm": 2.251185417175293, "learning_rate": 2.277039379415979e-05, "loss": 5.5988, "step": 11024 }, { "epoch": 0.54, "grad_norm": 1.7871466875076294, "learning_rate": 2.2760511882998173e-05, "loss": 5.3987, "step": 11028 }, { "epoch": 0.55, "grad_norm": 1.8406583070755005, "learning_rate": 2.2750629971836555e-05, "loss": 5.6034, "step": 11032 }, { "epoch": 0.55, "grad_norm": 2.1242032051086426, "learning_rate": 2.2740748060674934e-05, "loss": 5.512, "step": 11036 }, { "epoch": 0.55, "grad_norm": 2.062807083129883, "learning_rate": 2.2730866149513316e-05, "loss": 5.5253, "step": 11040 }, { "epoch": 0.55, "grad_norm": 2.1405367851257324, "learning_rate": 2.2720984238351698e-05, "loss": 5.579, "step": 11044 }, { "epoch": 0.55, "grad_norm": 1.989423155784607, "learning_rate": 2.271110232719008e-05, "loss": 5.5023, "step": 11048 }, { "epoch": 0.55, "grad_norm": 1.7983574867248535, "learning_rate": 2.270122041602846e-05, "loss": 5.6174, "step": 11052 }, { "epoch": 0.55, "grad_norm": 1.9673739671707153, "learning_rate": 2.2691338504866844e-05, "loss": 5.4921, "step": 11056 }, { "epoch": 0.55, "grad_norm": 2.0545785427093506, "learning_rate": 2.2681456593705226e-05, "loss": 5.5915, "step": 11060 }, { "epoch": 0.55, "grad_norm": 1.947089433670044, "learning_rate": 2.2671574682543605e-05, "loss": 5.6147, "step": 11064 }, { "epoch": 0.55, "grad_norm": 1.6919525861740112, "learning_rate": 2.2661692771381987e-05, "loss": 5.5268, "step": 11068 }, { "epoch": 0.55, "grad_norm": 1.9140796661376953, "learning_rate": 2.265181086022037e-05, "loss": 5.6551, "step": 11072 }, { "epoch": 0.55, "grad_norm": 2.034980297088623, "learning_rate": 2.264192894905875e-05, "loss": 5.5495, "step": 11076 }, { "epoch": 0.55, "grad_norm": 2.2609176635742188, "learning_rate": 2.263204703789713e-05, "loss": 5.52, "step": 11080 }, { "epoch": 0.55, "grad_norm": 2.154770612716675, "learning_rate": 2.262216512673551e-05, "loss": 5.5406, "step": 11084 }, { "epoch": 0.55, "grad_norm": 1.8771696090698242, "learning_rate": 2.2612283215573894e-05, "loss": 5.4876, "step": 11088 }, { "epoch": 0.55, "grad_norm": 1.8587368726730347, "learning_rate": 2.2602401304412272e-05, "loss": 5.6025, "step": 11092 }, { "epoch": 0.55, "grad_norm": 1.8150922060012817, "learning_rate": 2.2592519393250654e-05, "loss": 5.4589, "step": 11096 }, { "epoch": 0.55, "grad_norm": 1.94694185256958, "learning_rate": 2.2582637482089037e-05, "loss": 5.4608, "step": 11100 }, { "epoch": 0.55, "grad_norm": 1.919480800628662, "learning_rate": 2.257275557092742e-05, "loss": 5.4808, "step": 11104 }, { "epoch": 0.55, "grad_norm": 1.9377970695495605, "learning_rate": 2.2562873659765797e-05, "loss": 5.5321, "step": 11108 }, { "epoch": 0.55, "grad_norm": 1.8229963779449463, "learning_rate": 2.2552991748604183e-05, "loss": 5.5011, "step": 11112 }, { "epoch": 0.55, "grad_norm": 1.7934753894805908, "learning_rate": 2.2543109837442565e-05, "loss": 5.4912, "step": 11116 }, { "epoch": 0.55, "grad_norm": 2.294386386871338, "learning_rate": 2.2533227926280943e-05, "loss": 5.5593, "step": 11120 }, { "epoch": 0.55, "grad_norm": 1.9534809589385986, "learning_rate": 2.2523346015119325e-05, "loss": 5.5784, "step": 11124 }, { "epoch": 0.55, "grad_norm": 2.121579885482788, "learning_rate": 2.2513464103957708e-05, "loss": 5.6106, "step": 11128 }, { "epoch": 0.55, "grad_norm": 1.8341236114501953, "learning_rate": 2.250358219279609e-05, "loss": 5.5102, "step": 11132 }, { "epoch": 0.55, "grad_norm": 1.8656764030456543, "learning_rate": 2.2493700281634468e-05, "loss": 5.5725, "step": 11136 }, { "epoch": 0.55, "grad_norm": 2.026597738265991, "learning_rate": 2.248381837047285e-05, "loss": 5.5864, "step": 11140 }, { "epoch": 0.55, "grad_norm": 1.7924002408981323, "learning_rate": 2.2473936459311232e-05, "loss": 5.5194, "step": 11144 }, { "epoch": 0.55, "grad_norm": 1.9699978828430176, "learning_rate": 2.2464054548149614e-05, "loss": 5.5491, "step": 11148 }, { "epoch": 0.55, "grad_norm": 2.0057380199432373, "learning_rate": 2.2454172636987993e-05, "loss": 5.5209, "step": 11152 }, { "epoch": 0.55, "grad_norm": 1.9208364486694336, "learning_rate": 2.2444290725826375e-05, "loss": 5.6376, "step": 11156 }, { "epoch": 0.55, "grad_norm": 2.1530115604400635, "learning_rate": 2.2434408814664757e-05, "loss": 5.5286, "step": 11160 }, { "epoch": 0.55, "grad_norm": 2.0784969329833984, "learning_rate": 2.2424526903503136e-05, "loss": 5.629, "step": 11164 }, { "epoch": 0.55, "grad_norm": 1.7239807844161987, "learning_rate": 2.2414644992341518e-05, "loss": 5.4354, "step": 11168 }, { "epoch": 0.55, "grad_norm": 2.1020421981811523, "learning_rate": 2.2404763081179903e-05, "loss": 5.4923, "step": 11172 }, { "epoch": 0.55, "grad_norm": 1.8407636880874634, "learning_rate": 2.2394881170018282e-05, "loss": 5.5506, "step": 11176 }, { "epoch": 0.55, "grad_norm": 1.8488260507583618, "learning_rate": 2.2384999258856664e-05, "loss": 5.6145, "step": 11180 }, { "epoch": 0.55, "grad_norm": 2.0274198055267334, "learning_rate": 2.2375117347695046e-05, "loss": 5.5115, "step": 11184 }, { "epoch": 0.55, "grad_norm": 2.122840642929077, "learning_rate": 2.2365235436533428e-05, "loss": 5.4538, "step": 11188 }, { "epoch": 0.55, "grad_norm": 2.121507406234741, "learning_rate": 2.2355353525371807e-05, "loss": 5.4959, "step": 11192 }, { "epoch": 0.55, "grad_norm": 2.1240291595458984, "learning_rate": 2.234547161421019e-05, "loss": 5.3912, "step": 11196 }, { "epoch": 0.55, "grad_norm": 1.7324938774108887, "learning_rate": 2.233558970304857e-05, "loss": 5.5702, "step": 11200 }, { "epoch": 0.55, "grad_norm": 1.7895362377166748, "learning_rate": 2.2325707791886953e-05, "loss": 5.5114, "step": 11204 }, { "epoch": 0.55, "grad_norm": 1.8272205591201782, "learning_rate": 2.2315825880725332e-05, "loss": 5.5404, "step": 11208 }, { "epoch": 0.55, "grad_norm": 2.2934823036193848, "learning_rate": 2.2305943969563714e-05, "loss": 5.6429, "step": 11212 }, { "epoch": 0.55, "grad_norm": 2.1152985095977783, "learning_rate": 2.2296062058402096e-05, "loss": 5.5438, "step": 11216 }, { "epoch": 0.55, "grad_norm": 1.9162452220916748, "learning_rate": 2.2286180147240478e-05, "loss": 5.574, "step": 11220 }, { "epoch": 0.55, "grad_norm": 1.9226047992706299, "learning_rate": 2.2276298236078857e-05, "loss": 5.5964, "step": 11224 }, { "epoch": 0.55, "grad_norm": 2.2039763927459717, "learning_rate": 2.2266416324917242e-05, "loss": 5.5645, "step": 11228 }, { "epoch": 0.55, "grad_norm": 1.8182283639907837, "learning_rate": 2.2256534413755624e-05, "loss": 5.5464, "step": 11232 }, { "epoch": 0.56, "grad_norm": 1.8267743587493896, "learning_rate": 2.2246652502594003e-05, "loss": 5.5826, "step": 11236 }, { "epoch": 0.56, "grad_norm": 1.933274269104004, "learning_rate": 2.2236770591432385e-05, "loss": 5.5138, "step": 11240 }, { "epoch": 0.56, "grad_norm": 2.160489320755005, "learning_rate": 2.2226888680270767e-05, "loss": 5.5879, "step": 11244 }, { "epoch": 0.56, "grad_norm": 1.8666636943817139, "learning_rate": 2.2217006769109146e-05, "loss": 5.5751, "step": 11248 }, { "epoch": 0.56, "grad_norm": 2.0974009037017822, "learning_rate": 2.2207124857947528e-05, "loss": 5.4429, "step": 11252 }, { "epoch": 0.56, "grad_norm": 1.6826304197311401, "learning_rate": 2.219724294678591e-05, "loss": 5.5061, "step": 11256 }, { "epoch": 0.56, "grad_norm": 1.9517406225204468, "learning_rate": 2.2187361035624292e-05, "loss": 5.5793, "step": 11260 }, { "epoch": 0.56, "grad_norm": 1.9135072231292725, "learning_rate": 2.217747912446267e-05, "loss": 5.4732, "step": 11264 }, { "epoch": 0.56, "grad_norm": 1.7441056966781616, "learning_rate": 2.2167597213301053e-05, "loss": 5.4808, "step": 11268 }, { "epoch": 0.56, "grad_norm": 1.851120114326477, "learning_rate": 2.2157715302139435e-05, "loss": 5.6432, "step": 11272 }, { "epoch": 0.56, "grad_norm": 1.8004862070083618, "learning_rate": 2.2147833390977817e-05, "loss": 5.5154, "step": 11276 }, { "epoch": 0.56, "grad_norm": 1.6254485845565796, "learning_rate": 2.2137951479816195e-05, "loss": 5.6375, "step": 11280 }, { "epoch": 0.56, "grad_norm": 1.9577946662902832, "learning_rate": 2.2128069568654577e-05, "loss": 5.559, "step": 11284 }, { "epoch": 0.56, "grad_norm": 2.018347978591919, "learning_rate": 2.2118187657492963e-05, "loss": 5.484, "step": 11288 }, { "epoch": 0.56, "grad_norm": 1.8874802589416504, "learning_rate": 2.210830574633134e-05, "loss": 5.577, "step": 11292 }, { "epoch": 0.56, "grad_norm": 1.764642357826233, "learning_rate": 2.2098423835169724e-05, "loss": 5.45, "step": 11296 }, { "epoch": 0.56, "grad_norm": 2.156693458557129, "learning_rate": 2.2088541924008106e-05, "loss": 5.5669, "step": 11300 }, { "epoch": 0.56, "grad_norm": 1.8049863576889038, "learning_rate": 2.2078660012846484e-05, "loss": 5.5686, "step": 11304 }, { "epoch": 0.56, "grad_norm": 1.7862818241119385, "learning_rate": 2.2068778101684866e-05, "loss": 5.4679, "step": 11308 }, { "epoch": 0.56, "grad_norm": 1.8136225938796997, "learning_rate": 2.205889619052325e-05, "loss": 5.5579, "step": 11312 }, { "epoch": 0.56, "grad_norm": 1.744735598564148, "learning_rate": 2.204901427936163e-05, "loss": 5.5756, "step": 11316 }, { "epoch": 0.56, "grad_norm": 2.0922114849090576, "learning_rate": 2.203913236820001e-05, "loss": 5.3664, "step": 11320 }, { "epoch": 0.56, "grad_norm": 1.8184114694595337, "learning_rate": 2.202925045703839e-05, "loss": 5.3773, "step": 11324 }, { "epoch": 0.56, "grad_norm": 1.75265371799469, "learning_rate": 2.2019368545876773e-05, "loss": 5.5344, "step": 11328 }, { "epoch": 0.56, "grad_norm": 2.045360803604126, "learning_rate": 2.2009486634715155e-05, "loss": 5.5896, "step": 11332 }, { "epoch": 0.56, "grad_norm": 1.7742873430252075, "learning_rate": 2.1999604723553534e-05, "loss": 5.5624, "step": 11336 }, { "epoch": 0.56, "grad_norm": 2.0210537910461426, "learning_rate": 2.1989722812391916e-05, "loss": 5.6381, "step": 11340 }, { "epoch": 0.56, "grad_norm": 1.8388046026229858, "learning_rate": 2.19798409012303e-05, "loss": 5.5318, "step": 11344 }, { "epoch": 0.56, "grad_norm": 1.9268219470977783, "learning_rate": 2.196995899006868e-05, "loss": 5.5328, "step": 11348 }, { "epoch": 0.56, "grad_norm": 2.0054502487182617, "learning_rate": 2.1960077078907062e-05, "loss": 5.5936, "step": 11352 }, { "epoch": 0.56, "grad_norm": 1.741058349609375, "learning_rate": 2.1950195167745444e-05, "loss": 5.5389, "step": 11356 }, { "epoch": 0.56, "grad_norm": 1.9942952394485474, "learning_rate": 2.1940313256583826e-05, "loss": 5.5587, "step": 11360 }, { "epoch": 0.56, "grad_norm": 1.9131739139556885, "learning_rate": 2.1930431345422205e-05, "loss": 5.417, "step": 11364 }, { "epoch": 0.56, "grad_norm": 1.7850251197814941, "learning_rate": 2.1920549434260587e-05, "loss": 5.5102, "step": 11368 }, { "epoch": 0.56, "grad_norm": 2.221616744995117, "learning_rate": 2.191066752309897e-05, "loss": 5.5262, "step": 11372 }, { "epoch": 0.56, "grad_norm": 1.910980463027954, "learning_rate": 2.1900785611937348e-05, "loss": 5.7343, "step": 11376 }, { "epoch": 0.56, "grad_norm": 1.8210395574569702, "learning_rate": 2.189090370077573e-05, "loss": 5.5456, "step": 11380 }, { "epoch": 0.56, "grad_norm": 1.9524825811386108, "learning_rate": 2.1881021789614112e-05, "loss": 5.5558, "step": 11384 }, { "epoch": 0.56, "grad_norm": 1.5711989402770996, "learning_rate": 2.1871139878452494e-05, "loss": 5.4329, "step": 11388 }, { "epoch": 0.56, "grad_norm": 2.0921053886413574, "learning_rate": 2.1861257967290873e-05, "loss": 5.6219, "step": 11392 }, { "epoch": 0.56, "grad_norm": 2.04020357131958, "learning_rate": 2.1851376056129255e-05, "loss": 5.4476, "step": 11396 }, { "epoch": 0.56, "grad_norm": 1.7985339164733887, "learning_rate": 2.184149414496764e-05, "loss": 5.5278, "step": 11400 }, { "epoch": 0.56, "grad_norm": 1.8650219440460205, "learning_rate": 2.183161223380602e-05, "loss": 5.5657, "step": 11404 }, { "epoch": 0.56, "grad_norm": 1.8160661458969116, "learning_rate": 2.18217303226444e-05, "loss": 5.525, "step": 11408 }, { "epoch": 0.56, "grad_norm": 2.0409529209136963, "learning_rate": 2.1811848411482783e-05, "loss": 5.4098, "step": 11412 }, { "epoch": 0.56, "grad_norm": 1.731351375579834, "learning_rate": 2.1801966500321165e-05, "loss": 5.588, "step": 11416 }, { "epoch": 0.56, "grad_norm": 1.9625564813613892, "learning_rate": 2.1792084589159544e-05, "loss": 5.4872, "step": 11420 }, { "epoch": 0.56, "grad_norm": 2.0399959087371826, "learning_rate": 2.1782202677997926e-05, "loss": 5.5627, "step": 11424 }, { "epoch": 0.56, "grad_norm": 1.684001088142395, "learning_rate": 2.1772320766836308e-05, "loss": 5.6156, "step": 11428 }, { "epoch": 0.56, "grad_norm": 1.7688792943954468, "learning_rate": 2.176243885567469e-05, "loss": 5.4949, "step": 11432 }, { "epoch": 0.57, "grad_norm": 2.069566011428833, "learning_rate": 2.175255694451307e-05, "loss": 5.6452, "step": 11436 }, { "epoch": 0.57, "grad_norm": 2.030831813812256, "learning_rate": 2.174267503335145e-05, "loss": 5.3957, "step": 11440 }, { "epoch": 0.57, "grad_norm": 1.9303979873657227, "learning_rate": 2.1732793122189833e-05, "loss": 5.4431, "step": 11444 }, { "epoch": 0.57, "grad_norm": 1.6101690530776978, "learning_rate": 2.172291121102821e-05, "loss": 5.5681, "step": 11448 }, { "epoch": 0.57, "grad_norm": 1.9189858436584473, "learning_rate": 2.1713029299866593e-05, "loss": 5.4847, "step": 11452 }, { "epoch": 0.57, "grad_norm": 2.09655499458313, "learning_rate": 2.1703147388704976e-05, "loss": 5.5774, "step": 11456 }, { "epoch": 0.57, "grad_norm": 2.018876791000366, "learning_rate": 2.1693265477543358e-05, "loss": 5.614, "step": 11460 }, { "epoch": 0.57, "grad_norm": 1.754034161567688, "learning_rate": 2.168338356638174e-05, "loss": 5.443, "step": 11464 }, { "epoch": 0.57, "grad_norm": 1.950484275817871, "learning_rate": 2.1673501655220122e-05, "loss": 5.4982, "step": 11468 }, { "epoch": 0.57, "grad_norm": 1.8960100412368774, "learning_rate": 2.1663619744058504e-05, "loss": 5.478, "step": 11472 }, { "epoch": 0.57, "grad_norm": 1.7579559087753296, "learning_rate": 2.1653737832896882e-05, "loss": 5.5021, "step": 11476 }, { "epoch": 0.57, "grad_norm": 2.0521066188812256, "learning_rate": 2.1643855921735264e-05, "loss": 5.5329, "step": 11480 }, { "epoch": 0.57, "grad_norm": 1.999937891960144, "learning_rate": 2.1633974010573647e-05, "loss": 5.4377, "step": 11484 }, { "epoch": 0.57, "grad_norm": 2.099349021911621, "learning_rate": 2.162409209941203e-05, "loss": 5.562, "step": 11488 }, { "epoch": 0.57, "grad_norm": 2.0938682556152344, "learning_rate": 2.1614210188250407e-05, "loss": 5.577, "step": 11492 }, { "epoch": 0.57, "grad_norm": 1.6756892204284668, "learning_rate": 2.160432827708879e-05, "loss": 5.6301, "step": 11496 }, { "epoch": 0.57, "grad_norm": 1.7941629886627197, "learning_rate": 2.159444636592717e-05, "loss": 5.5144, "step": 11500 }, { "epoch": 0.57, "grad_norm": 1.9828251600265503, "learning_rate": 2.1584564454765553e-05, "loss": 5.674, "step": 11504 }, { "epoch": 0.57, "grad_norm": 1.8105112314224243, "learning_rate": 2.1574682543603932e-05, "loss": 5.5155, "step": 11508 }, { "epoch": 0.57, "grad_norm": 1.7346547842025757, "learning_rate": 2.1564800632442314e-05, "loss": 5.6869, "step": 11512 }, { "epoch": 0.57, "grad_norm": 1.7930268049240112, "learning_rate": 2.15549187212807e-05, "loss": 5.4631, "step": 11516 }, { "epoch": 0.57, "grad_norm": 1.799404263496399, "learning_rate": 2.154503681011908e-05, "loss": 5.5324, "step": 11520 }, { "epoch": 0.57, "grad_norm": 1.948190450668335, "learning_rate": 2.153515489895746e-05, "loss": 5.6236, "step": 11524 }, { "epoch": 0.57, "grad_norm": 2.0511763095855713, "learning_rate": 2.1525272987795842e-05, "loss": 5.6085, "step": 11528 }, { "epoch": 0.57, "grad_norm": 2.1785106658935547, "learning_rate": 2.151539107663422e-05, "loss": 5.4754, "step": 11532 }, { "epoch": 0.57, "grad_norm": 1.9326204061508179, "learning_rate": 2.1505509165472603e-05, "loss": 5.5052, "step": 11536 }, { "epoch": 0.57, "grad_norm": 1.9747314453125, "learning_rate": 2.1495627254310985e-05, "loss": 5.4398, "step": 11540 }, { "epoch": 0.57, "grad_norm": 1.743662714958191, "learning_rate": 2.1485745343149367e-05, "loss": 5.4521, "step": 11544 }, { "epoch": 0.57, "grad_norm": 1.823280930519104, "learning_rate": 2.1475863431987746e-05, "loss": 5.4957, "step": 11548 }, { "epoch": 0.57, "grad_norm": 2.047112464904785, "learning_rate": 2.1465981520826128e-05, "loss": 5.584, "step": 11552 }, { "epoch": 0.57, "grad_norm": 1.7637749910354614, "learning_rate": 2.145609960966451e-05, "loss": 5.6439, "step": 11556 }, { "epoch": 0.57, "grad_norm": 1.7925045490264893, "learning_rate": 2.1446217698502892e-05, "loss": 5.4745, "step": 11560 }, { "epoch": 0.57, "grad_norm": 1.8895896673202515, "learning_rate": 2.143633578734127e-05, "loss": 5.4082, "step": 11564 }, { "epoch": 0.57, "grad_norm": 1.956284761428833, "learning_rate": 2.1426453876179653e-05, "loss": 5.3854, "step": 11568 }, { "epoch": 0.57, "grad_norm": 1.9955167770385742, "learning_rate": 2.141657196501804e-05, "loss": 5.4486, "step": 11572 }, { "epoch": 0.57, "grad_norm": 2.0636212825775146, "learning_rate": 2.1406690053856417e-05, "loss": 5.6201, "step": 11576 }, { "epoch": 0.57, "grad_norm": 2.0052077770233154, "learning_rate": 2.13968081426948e-05, "loss": 5.5525, "step": 11580 }, { "epoch": 0.57, "grad_norm": 1.9207104444503784, "learning_rate": 2.138692623153318e-05, "loss": 5.6011, "step": 11584 }, { "epoch": 0.57, "grad_norm": 1.9215471744537354, "learning_rate": 2.137704432037156e-05, "loss": 5.556, "step": 11588 }, { "epoch": 0.57, "grad_norm": 1.6822532415390015, "learning_rate": 2.1367162409209942e-05, "loss": 5.521, "step": 11592 }, { "epoch": 0.57, "grad_norm": 2.1839683055877686, "learning_rate": 2.1357280498048324e-05, "loss": 5.6367, "step": 11596 }, { "epoch": 0.57, "grad_norm": 1.855774998664856, "learning_rate": 2.1347398586886706e-05, "loss": 5.518, "step": 11600 }, { "epoch": 0.57, "grad_norm": 2.0112996101379395, "learning_rate": 2.1337516675725085e-05, "loss": 5.6125, "step": 11604 }, { "epoch": 0.57, "grad_norm": 2.0093090534210205, "learning_rate": 2.1327634764563467e-05, "loss": 5.6358, "step": 11608 }, { "epoch": 0.57, "grad_norm": 1.6822510957717896, "learning_rate": 2.131775285340185e-05, "loss": 5.5177, "step": 11612 }, { "epoch": 0.57, "grad_norm": 1.9259730577468872, "learning_rate": 2.130787094224023e-05, "loss": 5.5029, "step": 11616 }, { "epoch": 0.57, "grad_norm": 1.9150243997573853, "learning_rate": 2.129798903107861e-05, "loss": 5.5636, "step": 11620 }, { "epoch": 0.57, "grad_norm": 1.8998494148254395, "learning_rate": 2.128810711991699e-05, "loss": 5.5628, "step": 11624 }, { "epoch": 0.57, "grad_norm": 1.9951376914978027, "learning_rate": 2.1278225208755374e-05, "loss": 5.5671, "step": 11628 }, { "epoch": 0.57, "grad_norm": 1.7849557399749756, "learning_rate": 2.1268343297593756e-05, "loss": 5.5127, "step": 11632 }, { "epoch": 0.57, "grad_norm": 2.0039825439453125, "learning_rate": 2.1258461386432138e-05, "loss": 5.5114, "step": 11636 }, { "epoch": 0.58, "grad_norm": 2.007443904876709, "learning_rate": 2.124857947527052e-05, "loss": 5.5814, "step": 11640 }, { "epoch": 0.58, "grad_norm": 1.7764946222305298, "learning_rate": 2.1238697564108902e-05, "loss": 5.5585, "step": 11644 }, { "epoch": 0.58, "grad_norm": 2.218045473098755, "learning_rate": 2.122881565294728e-05, "loss": 5.4461, "step": 11648 }, { "epoch": 0.58, "grad_norm": 2.088454484939575, "learning_rate": 2.1218933741785663e-05, "loss": 5.492, "step": 11652 }, { "epoch": 0.58, "grad_norm": 1.8053348064422607, "learning_rate": 2.1209051830624045e-05, "loss": 5.5221, "step": 11656 }, { "epoch": 0.58, "grad_norm": 2.035414218902588, "learning_rate": 2.1199169919462423e-05, "loss": 5.6862, "step": 11660 }, { "epoch": 0.58, "grad_norm": 2.0980472564697266, "learning_rate": 2.1189288008300805e-05, "loss": 5.6149, "step": 11664 }, { "epoch": 0.58, "grad_norm": 2.033268690109253, "learning_rate": 2.1179406097139187e-05, "loss": 5.4371, "step": 11668 }, { "epoch": 0.58, "grad_norm": 1.8143877983093262, "learning_rate": 2.116952418597757e-05, "loss": 5.5271, "step": 11672 }, { "epoch": 0.58, "grad_norm": 1.7776703834533691, "learning_rate": 2.1159642274815948e-05, "loss": 5.6312, "step": 11676 }, { "epoch": 0.58, "grad_norm": 1.8278911113739014, "learning_rate": 2.114976036365433e-05, "loss": 5.5128, "step": 11680 }, { "epoch": 0.58, "grad_norm": 1.8950855731964111, "learning_rate": 2.1139878452492712e-05, "loss": 5.4134, "step": 11684 }, { "epoch": 0.58, "grad_norm": 1.828503966331482, "learning_rate": 2.1129996541331094e-05, "loss": 5.5917, "step": 11688 }, { "epoch": 0.58, "grad_norm": 1.9912033081054688, "learning_rate": 2.1120114630169476e-05, "loss": 5.4005, "step": 11692 }, { "epoch": 0.58, "grad_norm": 1.871800422668457, "learning_rate": 2.111023271900786e-05, "loss": 5.5202, "step": 11696 }, { "epoch": 0.58, "grad_norm": 1.8771202564239502, "learning_rate": 2.110035080784624e-05, "loss": 5.5294, "step": 11700 }, { "epoch": 0.58, "grad_norm": 1.8930245637893677, "learning_rate": 2.109046889668462e-05, "loss": 5.482, "step": 11704 }, { "epoch": 0.58, "grad_norm": 1.910056471824646, "learning_rate": 2.1080586985523e-05, "loss": 5.625, "step": 11708 }, { "epoch": 0.58, "grad_norm": 1.7149245738983154, "learning_rate": 2.1070705074361383e-05, "loss": 5.494, "step": 11712 }, { "epoch": 0.58, "grad_norm": 1.8306865692138672, "learning_rate": 2.1060823163199765e-05, "loss": 5.5143, "step": 11716 }, { "epoch": 0.58, "grad_norm": 1.9364176988601685, "learning_rate": 2.1050941252038144e-05, "loss": 5.5469, "step": 11720 }, { "epoch": 0.58, "grad_norm": 1.9254311323165894, "learning_rate": 2.1041059340876526e-05, "loss": 5.6064, "step": 11724 }, { "epoch": 0.58, "grad_norm": 1.8167616128921509, "learning_rate": 2.1031177429714908e-05, "loss": 5.6055, "step": 11728 }, { "epoch": 0.58, "grad_norm": 1.9327070713043213, "learning_rate": 2.1021295518553287e-05, "loss": 5.6255, "step": 11732 }, { "epoch": 0.58, "grad_norm": 2.0709402561187744, "learning_rate": 2.101141360739167e-05, "loss": 5.5823, "step": 11736 }, { "epoch": 0.58, "grad_norm": 1.6243914365768433, "learning_rate": 2.100153169623005e-05, "loss": 5.5357, "step": 11740 }, { "epoch": 0.58, "grad_norm": 2.0544323921203613, "learning_rate": 2.0991649785068433e-05, "loss": 5.4751, "step": 11744 }, { "epoch": 0.58, "grad_norm": 1.8204375505447388, "learning_rate": 2.0981767873906815e-05, "loss": 5.4421, "step": 11748 }, { "epoch": 0.58, "grad_norm": 1.8007164001464844, "learning_rate": 2.0971885962745197e-05, "loss": 5.5578, "step": 11752 }, { "epoch": 0.58, "grad_norm": 2.0843026638031006, "learning_rate": 2.096200405158358e-05, "loss": 5.5082, "step": 11756 }, { "epoch": 0.58, "grad_norm": 1.7756987810134888, "learning_rate": 2.0952122140421958e-05, "loss": 5.4814, "step": 11760 }, { "epoch": 0.58, "grad_norm": 1.9955602884292603, "learning_rate": 2.094224022926034e-05, "loss": 5.6497, "step": 11764 }, { "epoch": 0.58, "grad_norm": 2.000737190246582, "learning_rate": 2.0932358318098722e-05, "loss": 5.5241, "step": 11768 }, { "epoch": 0.58, "grad_norm": 1.7721540927886963, "learning_rate": 2.0922476406937104e-05, "loss": 5.4961, "step": 11772 }, { "epoch": 0.58, "grad_norm": 1.9845120906829834, "learning_rate": 2.0912594495775483e-05, "loss": 5.6158, "step": 11776 }, { "epoch": 0.58, "grad_norm": 1.7041538953781128, "learning_rate": 2.0902712584613865e-05, "loss": 5.4892, "step": 11780 }, { "epoch": 0.58, "grad_norm": 1.8320084810256958, "learning_rate": 2.0892830673452247e-05, "loss": 5.6435, "step": 11784 }, { "epoch": 0.58, "grad_norm": 1.7904802560806274, "learning_rate": 2.088294876229063e-05, "loss": 5.4774, "step": 11788 }, { "epoch": 0.58, "grad_norm": 1.894283413887024, "learning_rate": 2.0873066851129008e-05, "loss": 5.4308, "step": 11792 }, { "epoch": 0.58, "grad_norm": 1.8176664113998413, "learning_rate": 2.086318493996739e-05, "loss": 5.4719, "step": 11796 }, { "epoch": 0.58, "grad_norm": 1.9829813241958618, "learning_rate": 2.0853303028805772e-05, "loss": 5.4217, "step": 11800 }, { "epoch": 0.58, "grad_norm": 1.8619272708892822, "learning_rate": 2.0843421117644154e-05, "loss": 5.5205, "step": 11804 }, { "epoch": 0.58, "grad_norm": 1.8735342025756836, "learning_rate": 2.0833539206482536e-05, "loss": 5.5359, "step": 11808 }, { "epoch": 0.58, "grad_norm": 1.860768437385559, "learning_rate": 2.0823657295320918e-05, "loss": 5.5111, "step": 11812 }, { "epoch": 0.58, "grad_norm": 2.0333621501922607, "learning_rate": 2.0813775384159297e-05, "loss": 5.5294, "step": 11816 }, { "epoch": 0.58, "grad_norm": 1.9842480421066284, "learning_rate": 2.080389347299768e-05, "loss": 5.521, "step": 11820 }, { "epoch": 0.58, "grad_norm": 1.7381541728973389, "learning_rate": 2.079401156183606e-05, "loss": 5.5587, "step": 11824 }, { "epoch": 0.58, "grad_norm": 1.6994056701660156, "learning_rate": 2.0784129650674443e-05, "loss": 5.5389, "step": 11828 }, { "epoch": 0.58, "grad_norm": 1.7459512948989868, "learning_rate": 2.077424773951282e-05, "loss": 5.6567, "step": 11832 }, { "epoch": 0.58, "grad_norm": 1.9870339632034302, "learning_rate": 2.0764365828351203e-05, "loss": 5.5713, "step": 11836 }, { "epoch": 0.59, "grad_norm": 2.1633095741271973, "learning_rate": 2.0754483917189586e-05, "loss": 5.4663, "step": 11840 }, { "epoch": 0.59, "grad_norm": 1.8806403875350952, "learning_rate": 2.0744602006027968e-05, "loss": 5.489, "step": 11844 }, { "epoch": 0.59, "grad_norm": 2.007995367050171, "learning_rate": 2.0734720094866346e-05, "loss": 5.669, "step": 11848 }, { "epoch": 0.59, "grad_norm": 2.024402141571045, "learning_rate": 2.072483818370473e-05, "loss": 5.5401, "step": 11852 }, { "epoch": 0.59, "grad_norm": 1.9132201671600342, "learning_rate": 2.071495627254311e-05, "loss": 5.6524, "step": 11856 }, { "epoch": 0.59, "grad_norm": 2.0311427116394043, "learning_rate": 2.0705074361381492e-05, "loss": 5.5418, "step": 11860 }, { "epoch": 0.59, "grad_norm": 2.0080606937408447, "learning_rate": 2.0695192450219875e-05, "loss": 5.5101, "step": 11864 }, { "epoch": 0.59, "grad_norm": 1.698283076286316, "learning_rate": 2.0685310539058257e-05, "loss": 5.6118, "step": 11868 }, { "epoch": 0.59, "grad_norm": 1.9465456008911133, "learning_rate": 2.0675428627896635e-05, "loss": 5.545, "step": 11872 }, { "epoch": 0.59, "grad_norm": 1.7435317039489746, "learning_rate": 2.0665546716735017e-05, "loss": 5.4387, "step": 11876 }, { "epoch": 0.59, "grad_norm": 2.106904983520508, "learning_rate": 2.06556648055734e-05, "loss": 5.6137, "step": 11880 }, { "epoch": 0.59, "grad_norm": 2.1040077209472656, "learning_rate": 2.064578289441178e-05, "loss": 5.5537, "step": 11884 }, { "epoch": 0.59, "grad_norm": 1.9074550867080688, "learning_rate": 2.063590098325016e-05, "loss": 5.5837, "step": 11888 }, { "epoch": 0.59, "grad_norm": 2.0350706577301025, "learning_rate": 2.0626019072088542e-05, "loss": 5.6268, "step": 11892 }, { "epoch": 0.59, "grad_norm": 2.2680063247680664, "learning_rate": 2.0616137160926924e-05, "loss": 5.4717, "step": 11896 }, { "epoch": 0.59, "grad_norm": 1.8204529285430908, "learning_rate": 2.0606255249765306e-05, "loss": 5.5205, "step": 11900 }, { "epoch": 0.59, "grad_norm": 2.01955246925354, "learning_rate": 2.0596373338603685e-05, "loss": 5.4586, "step": 11904 }, { "epoch": 0.59, "grad_norm": 2.047470808029175, "learning_rate": 2.0586491427442067e-05, "loss": 5.5496, "step": 11908 }, { "epoch": 0.59, "grad_norm": 1.9742119312286377, "learning_rate": 2.057660951628045e-05, "loss": 5.5521, "step": 11912 }, { "epoch": 0.59, "grad_norm": 1.9781845808029175, "learning_rate": 2.056672760511883e-05, "loss": 5.4699, "step": 11916 }, { "epoch": 0.59, "grad_norm": 1.7147916555404663, "learning_rate": 2.0556845693957213e-05, "loss": 5.5772, "step": 11920 }, { "epoch": 0.59, "grad_norm": 1.8772165775299072, "learning_rate": 2.0546963782795595e-05, "loss": 5.4976, "step": 11924 }, { "epoch": 0.59, "grad_norm": 1.9535537958145142, "learning_rate": 2.0537081871633977e-05, "loss": 5.4462, "step": 11928 }, { "epoch": 0.59, "grad_norm": 1.8837189674377441, "learning_rate": 2.0527199960472356e-05, "loss": 5.5724, "step": 11932 }, { "epoch": 0.59, "grad_norm": 1.9887135028839111, "learning_rate": 2.0517318049310738e-05, "loss": 5.6435, "step": 11936 }, { "epoch": 0.59, "grad_norm": 1.9106159210205078, "learning_rate": 2.050743613814912e-05, "loss": 5.6322, "step": 11940 }, { "epoch": 0.59, "grad_norm": 1.9000262022018433, "learning_rate": 2.04975542269875e-05, "loss": 5.5926, "step": 11944 }, { "epoch": 0.59, "grad_norm": 1.8226341009140015, "learning_rate": 2.048767231582588e-05, "loss": 5.5135, "step": 11948 }, { "epoch": 0.59, "grad_norm": 1.8902435302734375, "learning_rate": 2.0477790404664263e-05, "loss": 5.4756, "step": 11952 }, { "epoch": 0.59, "grad_norm": 2.058504819869995, "learning_rate": 2.0467908493502645e-05, "loss": 5.6203, "step": 11956 }, { "epoch": 0.59, "grad_norm": 1.977247714996338, "learning_rate": 2.0458026582341024e-05, "loss": 5.5297, "step": 11960 }, { "epoch": 0.59, "grad_norm": 1.6700700521469116, "learning_rate": 2.0448144671179406e-05, "loss": 5.5585, "step": 11964 }, { "epoch": 0.59, "grad_norm": 1.7290418148040771, "learning_rate": 2.0438262760017788e-05, "loss": 5.498, "step": 11968 }, { "epoch": 0.59, "grad_norm": 1.7892743349075317, "learning_rate": 2.042838084885617e-05, "loss": 5.4366, "step": 11972 }, { "epoch": 0.59, "grad_norm": 2.1159372329711914, "learning_rate": 2.0418498937694552e-05, "loss": 5.5055, "step": 11976 }, { "epoch": 0.59, "grad_norm": 2.0965609550476074, "learning_rate": 2.0408617026532934e-05, "loss": 5.6062, "step": 11980 }, { "epoch": 0.59, "grad_norm": 1.8796151876449585, "learning_rate": 2.0398735115371316e-05, "loss": 5.5831, "step": 11984 }, { "epoch": 0.59, "grad_norm": 1.8930494785308838, "learning_rate": 2.0388853204209695e-05, "loss": 5.5128, "step": 11988 }, { "epoch": 0.59, "grad_norm": 2.1627285480499268, "learning_rate": 2.0378971293048077e-05, "loss": 5.5642, "step": 11992 }, { "epoch": 0.59, "grad_norm": 1.7648162841796875, "learning_rate": 2.036908938188646e-05, "loss": 5.4877, "step": 11996 }, { "epoch": 0.59, "grad_norm": 2.2223527431488037, "learning_rate": 2.035920747072484e-05, "loss": 5.5078, "step": 12000 }, { "epoch": 0.59, "grad_norm": 2.301929473876953, "learning_rate": 2.034932555956322e-05, "loss": 5.5219, "step": 12004 }, { "epoch": 0.59, "grad_norm": 1.9763400554656982, "learning_rate": 2.03394436484016e-05, "loss": 5.5175, "step": 12008 }, { "epoch": 0.59, "grad_norm": 2.0059380531311035, "learning_rate": 2.0329561737239984e-05, "loss": 5.5533, "step": 12012 }, { "epoch": 0.59, "grad_norm": 2.3033435344696045, "learning_rate": 2.0319679826078362e-05, "loss": 5.5641, "step": 12016 }, { "epoch": 0.59, "grad_norm": 1.9601203203201294, "learning_rate": 2.0309797914916744e-05, "loss": 5.4841, "step": 12020 }, { "epoch": 0.59, "grad_norm": 1.893579363822937, "learning_rate": 2.0299916003755126e-05, "loss": 5.418, "step": 12024 }, { "epoch": 0.59, "grad_norm": 1.857035517692566, "learning_rate": 2.029003409259351e-05, "loss": 5.5092, "step": 12028 }, { "epoch": 0.59, "grad_norm": 2.068701982498169, "learning_rate": 2.028015218143189e-05, "loss": 5.4992, "step": 12032 }, { "epoch": 0.59, "grad_norm": 1.8823144435882568, "learning_rate": 2.0270270270270273e-05, "loss": 5.4709, "step": 12036 }, { "epoch": 0.59, "grad_norm": 1.873325228691101, "learning_rate": 2.0260388359108655e-05, "loss": 5.4815, "step": 12040 }, { "epoch": 0.6, "grad_norm": 1.88863205909729, "learning_rate": 2.0250506447947033e-05, "loss": 5.6217, "step": 12044 }, { "epoch": 0.6, "grad_norm": 1.813723087310791, "learning_rate": 2.0240624536785415e-05, "loss": 5.5736, "step": 12048 }, { "epoch": 0.6, "grad_norm": 1.8674362897872925, "learning_rate": 2.0230742625623797e-05, "loss": 5.4345, "step": 12052 }, { "epoch": 0.6, "grad_norm": 1.840463399887085, "learning_rate": 2.022086071446218e-05, "loss": 5.4454, "step": 12056 }, { "epoch": 0.6, "grad_norm": 1.8874510526657104, "learning_rate": 2.0210978803300558e-05, "loss": 5.4834, "step": 12060 }, { "epoch": 0.6, "grad_norm": 1.8396440744400024, "learning_rate": 2.020109689213894e-05, "loss": 5.5029, "step": 12064 }, { "epoch": 0.6, "grad_norm": 1.950766921043396, "learning_rate": 2.0191214980977322e-05, "loss": 5.4637, "step": 12068 }, { "epoch": 0.6, "grad_norm": 2.1061501502990723, "learning_rate": 2.0181333069815704e-05, "loss": 5.4081, "step": 12072 }, { "epoch": 0.6, "grad_norm": 1.9160481691360474, "learning_rate": 2.0171451158654083e-05, "loss": 5.484, "step": 12076 }, { "epoch": 0.6, "grad_norm": 2.151904344558716, "learning_rate": 2.0161569247492465e-05, "loss": 5.5189, "step": 12080 }, { "epoch": 0.6, "grad_norm": 1.8847110271453857, "learning_rate": 2.0151687336330847e-05, "loss": 5.4264, "step": 12084 }, { "epoch": 0.6, "grad_norm": 1.914305329322815, "learning_rate": 2.014180542516923e-05, "loss": 5.551, "step": 12088 }, { "epoch": 0.6, "grad_norm": 2.034773111343384, "learning_rate": 2.013192351400761e-05, "loss": 5.5728, "step": 12092 }, { "epoch": 0.6, "grad_norm": 1.7983940839767456, "learning_rate": 2.0122041602845993e-05, "loss": 5.4991, "step": 12096 }, { "epoch": 0.6, "grad_norm": 2.019416570663452, "learning_rate": 2.0112159691684372e-05, "loss": 5.6503, "step": 12100 }, { "epoch": 0.6, "grad_norm": 1.9403501749038696, "learning_rate": 2.0102277780522754e-05, "loss": 5.5295, "step": 12104 }, { "epoch": 0.6, "grad_norm": 2.006972074508667, "learning_rate": 2.0092395869361136e-05, "loss": 5.4612, "step": 12108 }, { "epoch": 0.6, "grad_norm": 2.0223989486694336, "learning_rate": 2.0082513958199518e-05, "loss": 5.4774, "step": 12112 }, { "epoch": 0.6, "grad_norm": 1.7617640495300293, "learning_rate": 2.0072632047037897e-05, "loss": 5.5604, "step": 12116 }, { "epoch": 0.6, "grad_norm": 1.961089849472046, "learning_rate": 2.006275013587628e-05, "loss": 5.4945, "step": 12120 }, { "epoch": 0.6, "grad_norm": 2.054691791534424, "learning_rate": 2.005286822471466e-05, "loss": 5.4596, "step": 12124 }, { "epoch": 0.6, "grad_norm": 1.8954715728759766, "learning_rate": 2.0042986313553043e-05, "loss": 5.5597, "step": 12128 }, { "epoch": 0.6, "grad_norm": 1.9412333965301514, "learning_rate": 2.0033104402391422e-05, "loss": 5.5437, "step": 12132 }, { "epoch": 0.6, "grad_norm": 1.7679616212844849, "learning_rate": 2.0023222491229804e-05, "loss": 5.3381, "step": 12136 }, { "epoch": 0.6, "grad_norm": 1.884602665901184, "learning_rate": 2.0013340580068186e-05, "loss": 5.5533, "step": 12140 }, { "epoch": 0.6, "grad_norm": 2.0963213443756104, "learning_rate": 2.0003458668906565e-05, "loss": 5.5015, "step": 12144 }, { "epoch": 0.6, "grad_norm": 1.796938180923462, "learning_rate": 1.999357675774495e-05, "loss": 5.4519, "step": 12148 }, { "epoch": 0.6, "grad_norm": 2.2093305587768555, "learning_rate": 1.9983694846583332e-05, "loss": 5.6164, "step": 12152 }, { "epoch": 0.6, "grad_norm": 1.8350774049758911, "learning_rate": 1.997381293542171e-05, "loss": 5.583, "step": 12156 }, { "epoch": 0.6, "grad_norm": 1.8653216361999512, "learning_rate": 1.9963931024260093e-05, "loss": 5.656, "step": 12160 }, { "epoch": 0.6, "grad_norm": 1.7206474542617798, "learning_rate": 1.9954049113098475e-05, "loss": 5.5226, "step": 12164 }, { "epoch": 0.6, "grad_norm": 1.8878382444381714, "learning_rate": 1.9944167201936857e-05, "loss": 5.6363, "step": 12168 }, { "epoch": 0.6, "grad_norm": 2.0668506622314453, "learning_rate": 1.9934285290775236e-05, "loss": 5.5106, "step": 12172 }, { "epoch": 0.6, "grad_norm": 2.0027477741241455, "learning_rate": 1.9924403379613618e-05, "loss": 5.5217, "step": 12176 }, { "epoch": 0.6, "grad_norm": 2.1123950481414795, "learning_rate": 1.9914521468452e-05, "loss": 5.6275, "step": 12180 }, { "epoch": 0.6, "grad_norm": 2.1055514812469482, "learning_rate": 1.9904639557290382e-05, "loss": 5.6186, "step": 12184 }, { "epoch": 0.6, "grad_norm": 2.0140175819396973, "learning_rate": 1.989475764612876e-05, "loss": 5.5965, "step": 12188 }, { "epoch": 0.6, "grad_norm": 1.902254343032837, "learning_rate": 1.9884875734967142e-05, "loss": 5.5557, "step": 12192 }, { "epoch": 0.6, "grad_norm": 1.9510375261306763, "learning_rate": 1.9874993823805525e-05, "loss": 5.4804, "step": 12196 }, { "epoch": 0.6, "grad_norm": 1.950716495513916, "learning_rate": 1.9865111912643907e-05, "loss": 5.5887, "step": 12200 }, { "epoch": 0.6, "grad_norm": 2.0936696529388428, "learning_rate": 1.985523000148229e-05, "loss": 5.5463, "step": 12204 }, { "epoch": 0.6, "grad_norm": 1.6361807584762573, "learning_rate": 1.984534809032067e-05, "loss": 5.5516, "step": 12208 }, { "epoch": 0.6, "grad_norm": 2.1499600410461426, "learning_rate": 1.9835466179159053e-05, "loss": 5.5467, "step": 12212 }, { "epoch": 0.6, "grad_norm": 1.9551507234573364, "learning_rate": 1.982558426799743e-05, "loss": 5.549, "step": 12216 }, { "epoch": 0.6, "grad_norm": 1.9426565170288086, "learning_rate": 1.9815702356835814e-05, "loss": 5.5726, "step": 12220 }, { "epoch": 0.6, "grad_norm": 1.9303926229476929, "learning_rate": 1.9805820445674196e-05, "loss": 5.5571, "step": 12224 }, { "epoch": 0.6, "grad_norm": 1.859390139579773, "learning_rate": 1.9795938534512574e-05, "loss": 5.499, "step": 12228 }, { "epoch": 0.6, "grad_norm": 1.8791084289550781, "learning_rate": 1.9786056623350956e-05, "loss": 5.5617, "step": 12232 }, { "epoch": 0.6, "grad_norm": 2.1466195583343506, "learning_rate": 1.977617471218934e-05, "loss": 5.3845, "step": 12236 }, { "epoch": 0.6, "grad_norm": 1.7159942388534546, "learning_rate": 1.976629280102772e-05, "loss": 5.551, "step": 12240 }, { "epoch": 0.6, "grad_norm": 1.8822176456451416, "learning_rate": 1.97564108898661e-05, "loss": 5.4277, "step": 12244 }, { "epoch": 0.61, "grad_norm": 1.798642635345459, "learning_rate": 1.974652897870448e-05, "loss": 5.6463, "step": 12248 }, { "epoch": 0.61, "grad_norm": 1.9936386346817017, "learning_rate": 1.9736647067542863e-05, "loss": 5.6342, "step": 12252 }, { "epoch": 0.61, "grad_norm": 2.1285390853881836, "learning_rate": 1.9726765156381245e-05, "loss": 5.5005, "step": 12256 }, { "epoch": 0.61, "grad_norm": 1.88554048538208, "learning_rate": 1.9716883245219624e-05, "loss": 5.5671, "step": 12260 }, { "epoch": 0.61, "grad_norm": 2.1367435455322266, "learning_rate": 1.970700133405801e-05, "loss": 5.4724, "step": 12264 }, { "epoch": 0.61, "grad_norm": 1.9615224599838257, "learning_rate": 1.969711942289639e-05, "loss": 5.4797, "step": 12268 }, { "epoch": 0.61, "grad_norm": 2.0672008991241455, "learning_rate": 1.968723751173477e-05, "loss": 5.4903, "step": 12272 }, { "epoch": 0.61, "grad_norm": 1.8822599649429321, "learning_rate": 1.9677355600573152e-05, "loss": 5.4196, "step": 12276 }, { "epoch": 0.61, "grad_norm": 1.9987417459487915, "learning_rate": 1.9667473689411534e-05, "loss": 5.5575, "step": 12280 }, { "epoch": 0.61, "grad_norm": 1.8995615243911743, "learning_rate": 1.9657591778249916e-05, "loss": 5.5343, "step": 12284 }, { "epoch": 0.61, "grad_norm": 1.9003994464874268, "learning_rate": 1.9647709867088295e-05, "loss": 5.5341, "step": 12288 }, { "epoch": 0.61, "grad_norm": 1.9870491027832031, "learning_rate": 1.9637827955926677e-05, "loss": 5.5928, "step": 12292 }, { "epoch": 0.61, "grad_norm": 2.072319269180298, "learning_rate": 1.962794604476506e-05, "loss": 5.4821, "step": 12296 }, { "epoch": 0.61, "grad_norm": 1.9005805253982544, "learning_rate": 1.9618064133603438e-05, "loss": 5.5296, "step": 12300 }, { "epoch": 0.61, "grad_norm": 1.8590092658996582, "learning_rate": 1.960818222244182e-05, "loss": 5.568, "step": 12304 }, { "epoch": 0.61, "grad_norm": 2.030334949493408, "learning_rate": 1.9598300311280202e-05, "loss": 5.4239, "step": 12308 }, { "epoch": 0.61, "grad_norm": 1.9241563081741333, "learning_rate": 1.9588418400118584e-05, "loss": 5.5618, "step": 12312 }, { "epoch": 0.61, "grad_norm": 1.9295763969421387, "learning_rate": 1.9578536488956963e-05, "loss": 5.6011, "step": 12316 }, { "epoch": 0.61, "grad_norm": 1.9351352453231812, "learning_rate": 1.9568654577795348e-05, "loss": 5.5549, "step": 12320 }, { "epoch": 0.61, "grad_norm": 2.0557003021240234, "learning_rate": 1.955877266663373e-05, "loss": 5.531, "step": 12324 }, { "epoch": 0.61, "grad_norm": 2.0774848461151123, "learning_rate": 1.954889075547211e-05, "loss": 5.5031, "step": 12328 }, { "epoch": 0.61, "grad_norm": 1.9636493921279907, "learning_rate": 1.953900884431049e-05, "loss": 5.5733, "step": 12332 }, { "epoch": 0.61, "grad_norm": 2.006387710571289, "learning_rate": 1.9529126933148873e-05, "loss": 5.5184, "step": 12336 }, { "epoch": 0.61, "grad_norm": 1.8912975788116455, "learning_rate": 1.9519245021987255e-05, "loss": 5.4529, "step": 12340 }, { "epoch": 0.61, "grad_norm": 2.028090715408325, "learning_rate": 1.9509363110825634e-05, "loss": 5.4242, "step": 12344 }, { "epoch": 0.61, "grad_norm": 2.042482376098633, "learning_rate": 1.9499481199664016e-05, "loss": 5.5455, "step": 12348 }, { "epoch": 0.61, "grad_norm": 2.1823337078094482, "learning_rate": 1.9489599288502398e-05, "loss": 5.4165, "step": 12352 }, { "epoch": 0.61, "grad_norm": 1.8578866720199585, "learning_rate": 1.947971737734078e-05, "loss": 5.4873, "step": 12356 }, { "epoch": 0.61, "grad_norm": 1.964311957359314, "learning_rate": 1.946983546617916e-05, "loss": 5.5457, "step": 12360 }, { "epoch": 0.61, "grad_norm": 2.030364513397217, "learning_rate": 1.945995355501754e-05, "loss": 5.474, "step": 12364 }, { "epoch": 0.61, "grad_norm": 2.0638349056243896, "learning_rate": 1.9450071643855923e-05, "loss": 5.5748, "step": 12368 }, { "epoch": 0.61, "grad_norm": 2.002610445022583, "learning_rate": 1.94401897326943e-05, "loss": 5.546, "step": 12372 }, { "epoch": 0.61, "grad_norm": 1.8753662109375, "learning_rate": 1.9430307821532687e-05, "loss": 5.5053, "step": 12376 }, { "epoch": 0.61, "grad_norm": 2.1950864791870117, "learning_rate": 1.942042591037107e-05, "loss": 5.4716, "step": 12380 }, { "epoch": 0.61, "grad_norm": 2.067065715789795, "learning_rate": 1.9410543999209447e-05, "loss": 5.5028, "step": 12384 }, { "epoch": 0.61, "grad_norm": 2.0129029750823975, "learning_rate": 1.940066208804783e-05, "loss": 5.522, "step": 12388 }, { "epoch": 0.61, "grad_norm": 1.5361533164978027, "learning_rate": 1.939078017688621e-05, "loss": 5.4896, "step": 12392 }, { "epoch": 0.61, "grad_norm": 1.945295810699463, "learning_rate": 1.9380898265724594e-05, "loss": 5.5306, "step": 12396 }, { "epoch": 0.61, "grad_norm": 1.7914199829101562, "learning_rate": 1.9371016354562972e-05, "loss": 5.5514, "step": 12400 }, { "epoch": 0.61, "grad_norm": 2.061509609222412, "learning_rate": 1.9361134443401354e-05, "loss": 5.5803, "step": 12404 }, { "epoch": 0.61, "grad_norm": 1.9697644710540771, "learning_rate": 1.9351252532239736e-05, "loss": 5.4989, "step": 12408 }, { "epoch": 0.61, "grad_norm": 1.801199197769165, "learning_rate": 1.934137062107812e-05, "loss": 5.6458, "step": 12412 }, { "epoch": 0.61, "grad_norm": 1.985129475593567, "learning_rate": 1.9331488709916497e-05, "loss": 5.4291, "step": 12416 }, { "epoch": 0.61, "grad_norm": 1.8330814838409424, "learning_rate": 1.932160679875488e-05, "loss": 5.3954, "step": 12420 }, { "epoch": 0.61, "grad_norm": 2.2082693576812744, "learning_rate": 1.931172488759326e-05, "loss": 5.5882, "step": 12424 }, { "epoch": 0.61, "grad_norm": 1.6590445041656494, "learning_rate": 1.930184297643164e-05, "loss": 5.4728, "step": 12428 }, { "epoch": 0.61, "grad_norm": 2.0365848541259766, "learning_rate": 1.9291961065270022e-05, "loss": 5.5032, "step": 12432 }, { "epoch": 0.61, "grad_norm": 1.9523799419403076, "learning_rate": 1.9282079154108407e-05, "loss": 5.4974, "step": 12436 }, { "epoch": 0.61, "grad_norm": 1.9144923686981201, "learning_rate": 1.9272197242946786e-05, "loss": 5.5348, "step": 12440 }, { "epoch": 0.61, "grad_norm": 1.7671104669570923, "learning_rate": 1.9262315331785168e-05, "loss": 5.5292, "step": 12444 }, { "epoch": 0.62, "grad_norm": 2.0515549182891846, "learning_rate": 1.925243342062355e-05, "loss": 5.4845, "step": 12448 }, { "epoch": 0.62, "grad_norm": 1.9381266832351685, "learning_rate": 1.9242551509461932e-05, "loss": 5.5455, "step": 12452 }, { "epoch": 0.62, "grad_norm": 2.017817497253418, "learning_rate": 1.923266959830031e-05, "loss": 5.3844, "step": 12456 }, { "epoch": 0.62, "grad_norm": 1.8400537967681885, "learning_rate": 1.9222787687138693e-05, "loss": 5.4878, "step": 12460 }, { "epoch": 0.62, "grad_norm": 2.263641119003296, "learning_rate": 1.9212905775977075e-05, "loss": 5.5841, "step": 12464 }, { "epoch": 0.62, "grad_norm": 1.8722437620162964, "learning_rate": 1.9203023864815457e-05, "loss": 5.4782, "step": 12468 }, { "epoch": 0.62, "grad_norm": 2.020585060119629, "learning_rate": 1.9193141953653836e-05, "loss": 5.4668, "step": 12472 }, { "epoch": 0.62, "grad_norm": 1.7479088306427002, "learning_rate": 1.9183260042492218e-05, "loss": 5.5514, "step": 12476 }, { "epoch": 0.62, "grad_norm": 1.9156551361083984, "learning_rate": 1.91733781313306e-05, "loss": 5.4994, "step": 12480 }, { "epoch": 0.62, "grad_norm": 1.882408618927002, "learning_rate": 1.9163496220168982e-05, "loss": 5.4834, "step": 12484 }, { "epoch": 0.62, "grad_norm": 2.083282232284546, "learning_rate": 1.915361430900736e-05, "loss": 5.5116, "step": 12488 }, { "epoch": 0.62, "grad_norm": 1.9320555925369263, "learning_rate": 1.9143732397845746e-05, "loss": 5.5378, "step": 12492 }, { "epoch": 0.62, "grad_norm": 2.215940475463867, "learning_rate": 1.9133850486684128e-05, "loss": 5.4937, "step": 12496 }, { "epoch": 0.62, "grad_norm": 2.308119297027588, "learning_rate": 1.9123968575522507e-05, "loss": 5.555, "step": 12500 }, { "epoch": 0.62, "grad_norm": 2.178675413131714, "learning_rate": 1.911408666436089e-05, "loss": 5.487, "step": 12504 }, { "epoch": 0.62, "grad_norm": 2.004458427429199, "learning_rate": 1.910420475319927e-05, "loss": 5.5416, "step": 12508 }, { "epoch": 0.62, "grad_norm": 2.0435168743133545, "learning_rate": 1.909432284203765e-05, "loss": 5.4122, "step": 12512 }, { "epoch": 0.62, "grad_norm": 2.0281968116760254, "learning_rate": 1.9084440930876032e-05, "loss": 5.5032, "step": 12516 }, { "epoch": 0.62, "grad_norm": 1.9901241064071655, "learning_rate": 1.9074559019714414e-05, "loss": 5.5346, "step": 12520 }, { "epoch": 0.62, "grad_norm": 2.0608649253845215, "learning_rate": 1.9064677108552796e-05, "loss": 5.5318, "step": 12524 }, { "epoch": 0.62, "grad_norm": 2.1132655143737793, "learning_rate": 1.9054795197391175e-05, "loss": 5.5227, "step": 12528 }, { "epoch": 0.62, "grad_norm": 2.1006295680999756, "learning_rate": 1.9044913286229557e-05, "loss": 5.4704, "step": 12532 }, { "epoch": 0.62, "grad_norm": 1.8386894464492798, "learning_rate": 1.903503137506794e-05, "loss": 5.4159, "step": 12536 }, { "epoch": 0.62, "grad_norm": 1.9647696018218994, "learning_rate": 1.902514946390632e-05, "loss": 5.6805, "step": 12540 }, { "epoch": 0.62, "grad_norm": 2.1188244819641113, "learning_rate": 1.90152675527447e-05, "loss": 5.4004, "step": 12544 }, { "epoch": 0.62, "grad_norm": 2.0802998542785645, "learning_rate": 1.9005385641583085e-05, "loss": 5.4416, "step": 12548 }, { "epoch": 0.62, "grad_norm": 1.834084153175354, "learning_rate": 1.8995503730421467e-05, "loss": 5.6306, "step": 12552 }, { "epoch": 0.62, "grad_norm": 1.8112331628799438, "learning_rate": 1.8985621819259846e-05, "loss": 5.5333, "step": 12556 }, { "epoch": 0.62, "grad_norm": 1.899707317352295, "learning_rate": 1.8975739908098228e-05, "loss": 5.4976, "step": 12560 }, { "epoch": 0.62, "grad_norm": 2.298161506652832, "learning_rate": 1.896585799693661e-05, "loss": 5.594, "step": 12564 }, { "epoch": 0.62, "grad_norm": 2.173597574234009, "learning_rate": 1.8955976085774992e-05, "loss": 5.572, "step": 12568 }, { "epoch": 0.62, "grad_norm": 1.8348718881607056, "learning_rate": 1.894609417461337e-05, "loss": 5.4743, "step": 12572 }, { "epoch": 0.62, "grad_norm": 1.7967536449432373, "learning_rate": 1.8936212263451753e-05, "loss": 5.5196, "step": 12576 }, { "epoch": 0.62, "grad_norm": 1.9703553915023804, "learning_rate": 1.8926330352290135e-05, "loss": 5.532, "step": 12580 }, { "epoch": 0.62, "grad_norm": 1.7581743001937866, "learning_rate": 1.8916448441128513e-05, "loss": 5.5114, "step": 12584 }, { "epoch": 0.62, "grad_norm": 2.0711758136749268, "learning_rate": 1.8906566529966895e-05, "loss": 5.4094, "step": 12588 }, { "epoch": 0.62, "grad_norm": 1.7996923923492432, "learning_rate": 1.8896684618805277e-05, "loss": 5.5836, "step": 12592 }, { "epoch": 0.62, "grad_norm": 1.97800612449646, "learning_rate": 1.888680270764366e-05, "loss": 5.4898, "step": 12596 }, { "epoch": 0.62, "grad_norm": 1.7940218448638916, "learning_rate": 1.8876920796482038e-05, "loss": 5.6005, "step": 12600 }, { "epoch": 0.62, "grad_norm": 1.8298521041870117, "learning_rate": 1.886703888532042e-05, "loss": 5.55, "step": 12604 }, { "epoch": 0.62, "grad_norm": 2.040109872817993, "learning_rate": 1.8857156974158806e-05, "loss": 5.5685, "step": 12608 }, { "epoch": 0.62, "grad_norm": 1.8531662225723267, "learning_rate": 1.8847275062997184e-05, "loss": 5.5343, "step": 12612 }, { "epoch": 0.62, "grad_norm": 2.1842970848083496, "learning_rate": 1.8837393151835566e-05, "loss": 5.4704, "step": 12616 }, { "epoch": 0.62, "grad_norm": 1.876779556274414, "learning_rate": 1.882751124067395e-05, "loss": 5.5272, "step": 12620 }, { "epoch": 0.62, "grad_norm": 1.9100033044815063, "learning_rate": 1.881762932951233e-05, "loss": 5.4669, "step": 12624 }, { "epoch": 0.62, "grad_norm": 2.233772039413452, "learning_rate": 1.880774741835071e-05, "loss": 5.5786, "step": 12628 }, { "epoch": 0.62, "grad_norm": 2.021141767501831, "learning_rate": 1.879786550718909e-05, "loss": 5.5469, "step": 12632 }, { "epoch": 0.62, "grad_norm": 1.8748712539672852, "learning_rate": 1.8787983596027473e-05, "loss": 5.4186, "step": 12636 }, { "epoch": 0.62, "grad_norm": 2.0556745529174805, "learning_rate": 1.8778101684865855e-05, "loss": 5.6404, "step": 12640 }, { "epoch": 0.62, "grad_norm": 2.089085102081299, "learning_rate": 1.8768219773704234e-05, "loss": 5.6056, "step": 12644 }, { "epoch": 0.62, "grad_norm": 1.8434518575668335, "learning_rate": 1.8758337862542616e-05, "loss": 5.6488, "step": 12648 }, { "epoch": 0.63, "grad_norm": 2.003434658050537, "learning_rate": 1.8748455951380998e-05, "loss": 5.5057, "step": 12652 }, { "epoch": 0.63, "grad_norm": 2.292663335800171, "learning_rate": 1.8738574040219377e-05, "loss": 5.5022, "step": 12656 }, { "epoch": 0.63, "grad_norm": 1.9476063251495361, "learning_rate": 1.872869212905776e-05, "loss": 5.5844, "step": 12660 }, { "epoch": 0.63, "grad_norm": 2.138032913208008, "learning_rate": 1.8718810217896144e-05, "loss": 5.4383, "step": 12664 }, { "epoch": 0.63, "grad_norm": 1.8477308750152588, "learning_rate": 1.8708928306734523e-05, "loss": 5.5665, "step": 12668 }, { "epoch": 0.63, "grad_norm": 1.9181241989135742, "learning_rate": 1.8699046395572905e-05, "loss": 5.5661, "step": 12672 }, { "epoch": 0.63, "grad_norm": 1.928312063217163, "learning_rate": 1.8689164484411287e-05, "loss": 5.5366, "step": 12676 }, { "epoch": 0.63, "grad_norm": 2.210855484008789, "learning_rate": 1.867928257324967e-05, "loss": 5.5136, "step": 12680 }, { "epoch": 0.63, "grad_norm": 2.030755043029785, "learning_rate": 1.8669400662088048e-05, "loss": 5.4813, "step": 12684 }, { "epoch": 0.63, "grad_norm": 2.1717166900634766, "learning_rate": 1.865951875092643e-05, "loss": 5.579, "step": 12688 }, { "epoch": 0.63, "grad_norm": 2.068718671798706, "learning_rate": 1.8649636839764812e-05, "loss": 5.4381, "step": 12692 }, { "epoch": 0.63, "grad_norm": 2.0134997367858887, "learning_rate": 1.8639754928603194e-05, "loss": 5.6513, "step": 12696 }, { "epoch": 0.63, "grad_norm": 2.061288356781006, "learning_rate": 1.8629873017441573e-05, "loss": 5.4894, "step": 12700 }, { "epoch": 0.63, "grad_norm": 2.0297648906707764, "learning_rate": 1.8619991106279955e-05, "loss": 5.5485, "step": 12704 }, { "epoch": 0.63, "grad_norm": 2.0792784690856934, "learning_rate": 1.8610109195118337e-05, "loss": 5.5622, "step": 12708 }, { "epoch": 0.63, "grad_norm": 2.003371238708496, "learning_rate": 1.8600227283956715e-05, "loss": 5.5616, "step": 12712 }, { "epoch": 0.63, "grad_norm": 2.0913472175598145, "learning_rate": 1.8590345372795098e-05, "loss": 5.6751, "step": 12716 }, { "epoch": 0.63, "grad_norm": 1.9094600677490234, "learning_rate": 1.8580463461633483e-05, "loss": 5.5047, "step": 12720 }, { "epoch": 0.63, "grad_norm": 2.1928985118865967, "learning_rate": 1.857058155047186e-05, "loss": 5.6044, "step": 12724 }, { "epoch": 0.63, "grad_norm": 1.9102288484573364, "learning_rate": 1.8560699639310244e-05, "loss": 5.4843, "step": 12728 }, { "epoch": 0.63, "grad_norm": 1.908156156539917, "learning_rate": 1.8550817728148626e-05, "loss": 5.5217, "step": 12732 }, { "epoch": 0.63, "grad_norm": 1.8940017223358154, "learning_rate": 1.8540935816987008e-05, "loss": 5.5211, "step": 12736 }, { "epoch": 0.63, "grad_norm": 1.6733046770095825, "learning_rate": 1.8531053905825386e-05, "loss": 5.6507, "step": 12740 }, { "epoch": 0.63, "grad_norm": 1.9494881629943848, "learning_rate": 1.852117199466377e-05, "loss": 5.6028, "step": 12744 }, { "epoch": 0.63, "grad_norm": 2.361642360687256, "learning_rate": 1.851129008350215e-05, "loss": 5.5678, "step": 12748 }, { "epoch": 0.63, "grad_norm": 1.9810757637023926, "learning_rate": 1.8501408172340533e-05, "loss": 5.5055, "step": 12752 }, { "epoch": 0.63, "grad_norm": 2.196544885635376, "learning_rate": 1.849152626117891e-05, "loss": 5.4622, "step": 12756 }, { "epoch": 0.63, "grad_norm": 1.841874599456787, "learning_rate": 1.8481644350017293e-05, "loss": 5.525, "step": 12760 }, { "epoch": 0.63, "grad_norm": 1.982703685760498, "learning_rate": 1.8471762438855675e-05, "loss": 5.5797, "step": 12764 }, { "epoch": 0.63, "grad_norm": 2.193528413772583, "learning_rate": 1.8461880527694058e-05, "loss": 5.4744, "step": 12768 }, { "epoch": 0.63, "grad_norm": 1.6755714416503906, "learning_rate": 1.8451998616532436e-05, "loss": 5.4944, "step": 12772 }, { "epoch": 0.63, "grad_norm": 1.9214802980422974, "learning_rate": 1.8442116705370818e-05, "loss": 5.4525, "step": 12776 }, { "epoch": 0.63, "grad_norm": 2.1332216262817383, "learning_rate": 1.8432234794209204e-05, "loss": 5.4974, "step": 12780 }, { "epoch": 0.63, "grad_norm": 1.9983022212982178, "learning_rate": 1.8422352883047582e-05, "loss": 5.4737, "step": 12784 }, { "epoch": 0.63, "grad_norm": 2.090367555618286, "learning_rate": 1.8412470971885964e-05, "loss": 5.4413, "step": 12788 }, { "epoch": 0.63, "grad_norm": 1.903393030166626, "learning_rate": 1.8402589060724346e-05, "loss": 5.5338, "step": 12792 }, { "epoch": 0.63, "grad_norm": 1.8425928354263306, "learning_rate": 1.8392707149562725e-05, "loss": 5.5461, "step": 12796 }, { "epoch": 0.63, "grad_norm": 1.7911487817764282, "learning_rate": 1.8382825238401107e-05, "loss": 5.5144, "step": 12800 }, { "epoch": 0.63, "grad_norm": 1.9388378858566284, "learning_rate": 1.837294332723949e-05, "loss": 5.4777, "step": 12804 }, { "epoch": 0.63, "grad_norm": 1.9651098251342773, "learning_rate": 1.836306141607787e-05, "loss": 5.583, "step": 12808 }, { "epoch": 0.63, "grad_norm": 2.097846269607544, "learning_rate": 1.835317950491625e-05, "loss": 5.4835, "step": 12812 }, { "epoch": 0.63, "grad_norm": 1.8683522939682007, "learning_rate": 1.8343297593754632e-05, "loss": 5.542, "step": 12816 }, { "epoch": 0.63, "grad_norm": 1.9476234912872314, "learning_rate": 1.8333415682593014e-05, "loss": 5.5376, "step": 12820 }, { "epoch": 0.63, "grad_norm": 2.049328088760376, "learning_rate": 1.8323533771431396e-05, "loss": 5.4911, "step": 12824 }, { "epoch": 0.63, "grad_norm": 1.8876453638076782, "learning_rate": 1.8313651860269775e-05, "loss": 5.5226, "step": 12828 }, { "epoch": 0.63, "grad_norm": 1.9597487449645996, "learning_rate": 1.8303769949108157e-05, "loss": 5.4785, "step": 12832 }, { "epoch": 0.63, "grad_norm": 2.0029759407043457, "learning_rate": 1.8293888037946542e-05, "loss": 5.5109, "step": 12836 }, { "epoch": 0.63, "grad_norm": 1.9219965934753418, "learning_rate": 1.828400612678492e-05, "loss": 5.508, "step": 12840 }, { "epoch": 0.63, "grad_norm": 1.9963774681091309, "learning_rate": 1.8274124215623303e-05, "loss": 5.664, "step": 12844 }, { "epoch": 0.63, "grad_norm": 2.181628942489624, "learning_rate": 1.8264242304461685e-05, "loss": 5.4549, "step": 12848 }, { "epoch": 0.64, "grad_norm": 1.9105952978134155, "learning_rate": 1.8254360393300067e-05, "loss": 5.6288, "step": 12852 }, { "epoch": 0.64, "grad_norm": 1.8461229801177979, "learning_rate": 1.8244478482138446e-05, "loss": 5.5195, "step": 12856 }, { "epoch": 0.64, "grad_norm": 1.9446773529052734, "learning_rate": 1.8234596570976828e-05, "loss": 5.4313, "step": 12860 }, { "epoch": 0.64, "grad_norm": 2.007297992706299, "learning_rate": 1.822471465981521e-05, "loss": 5.6664, "step": 12864 }, { "epoch": 0.64, "grad_norm": 2.0537166595458984, "learning_rate": 1.821483274865359e-05, "loss": 5.5447, "step": 12868 }, { "epoch": 0.64, "grad_norm": 2.0639407634735107, "learning_rate": 1.820495083749197e-05, "loss": 5.4532, "step": 12872 }, { "epoch": 0.64, "grad_norm": 1.9469093084335327, "learning_rate": 1.8195068926330353e-05, "loss": 5.5461, "step": 12876 }, { "epoch": 0.64, "grad_norm": 1.766298770904541, "learning_rate": 1.8185187015168735e-05, "loss": 5.4775, "step": 12880 }, { "epoch": 0.64, "grad_norm": 1.7954472303390503, "learning_rate": 1.8175305104007114e-05, "loss": 5.5326, "step": 12884 }, { "epoch": 0.64, "grad_norm": 1.8361804485321045, "learning_rate": 1.8165423192845496e-05, "loss": 5.5217, "step": 12888 }, { "epoch": 0.64, "grad_norm": 2.2192041873931885, "learning_rate": 1.815554128168388e-05, "loss": 5.4166, "step": 12892 }, { "epoch": 0.64, "grad_norm": 2.092569351196289, "learning_rate": 1.814565937052226e-05, "loss": 5.5973, "step": 12896 }, { "epoch": 0.64, "grad_norm": 1.8529601097106934, "learning_rate": 1.8135777459360642e-05, "loss": 5.4088, "step": 12900 }, { "epoch": 0.64, "grad_norm": 1.7156609296798706, "learning_rate": 1.8125895548199024e-05, "loss": 5.5506, "step": 12904 }, { "epoch": 0.64, "grad_norm": 2.3277995586395264, "learning_rate": 1.8116013637037406e-05, "loss": 5.4852, "step": 12908 }, { "epoch": 0.64, "grad_norm": 2.039177894592285, "learning_rate": 1.8106131725875785e-05, "loss": 5.509, "step": 12912 }, { "epoch": 0.64, "grad_norm": 2.2541489601135254, "learning_rate": 1.8096249814714167e-05, "loss": 5.5551, "step": 12916 }, { "epoch": 0.64, "grad_norm": 1.9068487882614136, "learning_rate": 1.808636790355255e-05, "loss": 5.5223, "step": 12920 }, { "epoch": 0.64, "grad_norm": 1.8464723825454712, "learning_rate": 1.807648599239093e-05, "loss": 5.4688, "step": 12924 }, { "epoch": 0.64, "grad_norm": 1.8630855083465576, "learning_rate": 1.806660408122931e-05, "loss": 5.521, "step": 12928 }, { "epoch": 0.64, "grad_norm": 1.9354201555252075, "learning_rate": 1.805672217006769e-05, "loss": 5.5234, "step": 12932 }, { "epoch": 0.64, "grad_norm": 2.2278544902801514, "learning_rate": 1.8046840258906074e-05, "loss": 5.563, "step": 12936 }, { "epoch": 0.64, "grad_norm": 2.0102896690368652, "learning_rate": 1.8036958347744452e-05, "loss": 5.6071, "step": 12940 }, { "epoch": 0.64, "grad_norm": 2.12506103515625, "learning_rate": 1.8027076436582834e-05, "loss": 5.5518, "step": 12944 }, { "epoch": 0.64, "grad_norm": 2.134568929672241, "learning_rate": 1.8017194525421216e-05, "loss": 5.4289, "step": 12948 }, { "epoch": 0.64, "grad_norm": 1.866938829421997, "learning_rate": 1.80073126142596e-05, "loss": 5.4047, "step": 12952 }, { "epoch": 0.64, "grad_norm": 2.0489606857299805, "learning_rate": 1.799743070309798e-05, "loss": 5.4668, "step": 12956 }, { "epoch": 0.64, "grad_norm": 2.130350351333618, "learning_rate": 1.7987548791936363e-05, "loss": 5.599, "step": 12960 }, { "epoch": 0.64, "grad_norm": 2.1276466846466064, "learning_rate": 1.7977666880774745e-05, "loss": 5.3925, "step": 12964 }, { "epoch": 0.64, "grad_norm": 1.8620883226394653, "learning_rate": 1.7967784969613123e-05, "loss": 5.4918, "step": 12968 }, { "epoch": 0.64, "grad_norm": 1.7407227754592896, "learning_rate": 1.7957903058451505e-05, "loss": 5.5406, "step": 12972 }, { "epoch": 0.64, "grad_norm": 1.9221688508987427, "learning_rate": 1.7948021147289887e-05, "loss": 5.5111, "step": 12976 }, { "epoch": 0.64, "grad_norm": 2.0765016078948975, "learning_rate": 1.793813923612827e-05, "loss": 5.6063, "step": 12980 }, { "epoch": 0.64, "grad_norm": 2.040132999420166, "learning_rate": 1.7928257324966648e-05, "loss": 5.5904, "step": 12984 }, { "epoch": 0.64, "grad_norm": 1.8433319330215454, "learning_rate": 1.791837541380503e-05, "loss": 5.4806, "step": 12988 }, { "epoch": 0.64, "grad_norm": 2.065800428390503, "learning_rate": 1.7908493502643412e-05, "loss": 5.563, "step": 12992 }, { "epoch": 0.64, "grad_norm": 2.199831485748291, "learning_rate": 1.789861159148179e-05, "loss": 5.5233, "step": 12996 }, { "epoch": 0.64, "grad_norm": 1.8309836387634277, "learning_rate": 1.7888729680320173e-05, "loss": 5.4911, "step": 13000 }, { "epoch": 0.64, "grad_norm": 1.900347113609314, "learning_rate": 1.7878847769158555e-05, "loss": 5.5083, "step": 13004 }, { "epoch": 0.64, "grad_norm": 1.8298702239990234, "learning_rate": 1.786896585799694e-05, "loss": 5.5197, "step": 13008 }, { "epoch": 0.64, "grad_norm": 1.8966771364212036, "learning_rate": 1.785908394683532e-05, "loss": 5.4034, "step": 13012 }, { "epoch": 0.64, "grad_norm": 1.9861708879470825, "learning_rate": 1.78492020356737e-05, "loss": 5.3773, "step": 13016 }, { "epoch": 0.64, "grad_norm": 2.1633896827697754, "learning_rate": 1.7839320124512083e-05, "loss": 5.5735, "step": 13020 }, { "epoch": 0.64, "grad_norm": 1.8944169282913208, "learning_rate": 1.7829438213350462e-05, "loss": 5.4982, "step": 13024 }, { "epoch": 0.64, "grad_norm": 2.358996868133545, "learning_rate": 1.7819556302188844e-05, "loss": 5.5851, "step": 13028 }, { "epoch": 0.64, "grad_norm": 1.8002705574035645, "learning_rate": 1.7809674391027226e-05, "loss": 5.5903, "step": 13032 }, { "epoch": 0.64, "grad_norm": 2.174081325531006, "learning_rate": 1.7799792479865608e-05, "loss": 5.5327, "step": 13036 }, { "epoch": 0.64, "grad_norm": 2.115267515182495, "learning_rate": 1.7789910568703987e-05, "loss": 5.6085, "step": 13040 }, { "epoch": 0.64, "grad_norm": 2.197908878326416, "learning_rate": 1.778002865754237e-05, "loss": 5.5499, "step": 13044 }, { "epoch": 0.64, "grad_norm": 2.2714781761169434, "learning_rate": 1.777014674638075e-05, "loss": 5.6221, "step": 13048 }, { "epoch": 0.64, "grad_norm": 2.1368441581726074, "learning_rate": 1.7760264835219133e-05, "loss": 5.5613, "step": 13052 }, { "epoch": 0.65, "grad_norm": 2.0283219814300537, "learning_rate": 1.775038292405751e-05, "loss": 5.4734, "step": 13056 }, { "epoch": 0.65, "grad_norm": 1.7801271677017212, "learning_rate": 1.7740501012895894e-05, "loss": 5.4928, "step": 13060 }, { "epoch": 0.65, "grad_norm": 1.8141615390777588, "learning_rate": 1.7730619101734276e-05, "loss": 5.4943, "step": 13064 }, { "epoch": 0.65, "grad_norm": 1.9424062967300415, "learning_rate": 1.7720737190572658e-05, "loss": 5.4888, "step": 13068 }, { "epoch": 0.65, "grad_norm": 1.7956730127334595, "learning_rate": 1.771085527941104e-05, "loss": 5.3367, "step": 13072 }, { "epoch": 0.65, "grad_norm": 1.8769028186798096, "learning_rate": 1.7700973368249422e-05, "loss": 5.5365, "step": 13076 }, { "epoch": 0.65, "grad_norm": 1.8470765352249146, "learning_rate": 1.76910914570878e-05, "loss": 5.6281, "step": 13080 }, { "epoch": 0.65, "grad_norm": 1.9143494367599487, "learning_rate": 1.7681209545926183e-05, "loss": 5.403, "step": 13084 }, { "epoch": 0.65, "grad_norm": 1.8906290531158447, "learning_rate": 1.7671327634764565e-05, "loss": 5.4201, "step": 13088 }, { "epoch": 0.65, "grad_norm": 1.9759694337844849, "learning_rate": 1.7661445723602947e-05, "loss": 5.5, "step": 13092 }, { "epoch": 0.65, "grad_norm": 2.16597580909729, "learning_rate": 1.7651563812441325e-05, "loss": 5.4831, "step": 13096 }, { "epoch": 0.65, "grad_norm": 2.142273187637329, "learning_rate": 1.7641681901279708e-05, "loss": 5.5608, "step": 13100 }, { "epoch": 0.65, "grad_norm": 2.0143542289733887, "learning_rate": 1.763179999011809e-05, "loss": 5.4914, "step": 13104 }, { "epoch": 0.65, "grad_norm": 2.0241010189056396, "learning_rate": 1.762191807895647e-05, "loss": 5.5402, "step": 13108 }, { "epoch": 0.65, "grad_norm": 2.111691951751709, "learning_rate": 1.761203616779485e-05, "loss": 5.5417, "step": 13112 }, { "epoch": 0.65, "grad_norm": 2.0966546535491943, "learning_rate": 1.7602154256633232e-05, "loss": 5.4544, "step": 13116 }, { "epoch": 0.65, "grad_norm": 2.2063889503479004, "learning_rate": 1.7592272345471614e-05, "loss": 5.5447, "step": 13120 }, { "epoch": 0.65, "grad_norm": 2.0416488647460938, "learning_rate": 1.7582390434309997e-05, "loss": 5.56, "step": 13124 }, { "epoch": 0.65, "grad_norm": 2.1015825271606445, "learning_rate": 1.757250852314838e-05, "loss": 5.5632, "step": 13128 }, { "epoch": 0.65, "grad_norm": 2.154283046722412, "learning_rate": 1.756262661198676e-05, "loss": 5.4369, "step": 13132 }, { "epoch": 0.65, "grad_norm": 1.9613300561904907, "learning_rate": 1.7552744700825143e-05, "loss": 5.4493, "step": 13136 }, { "epoch": 0.65, "grad_norm": 1.936897873878479, "learning_rate": 1.754286278966352e-05, "loss": 5.6429, "step": 13140 }, { "epoch": 0.65, "grad_norm": 2.1174933910369873, "learning_rate": 1.7532980878501903e-05, "loss": 5.5222, "step": 13144 }, { "epoch": 0.65, "grad_norm": 1.8853952884674072, "learning_rate": 1.7523098967340285e-05, "loss": 5.5823, "step": 13148 }, { "epoch": 0.65, "grad_norm": 1.7702313661575317, "learning_rate": 1.7513217056178664e-05, "loss": 5.5205, "step": 13152 }, { "epoch": 0.65, "grad_norm": 1.8901150226593018, "learning_rate": 1.7503335145017046e-05, "loss": 5.5227, "step": 13156 }, { "epoch": 0.65, "grad_norm": 1.9898940324783325, "learning_rate": 1.7493453233855428e-05, "loss": 5.5064, "step": 13160 }, { "epoch": 0.65, "grad_norm": 1.8458938598632812, "learning_rate": 1.748357132269381e-05, "loss": 5.4032, "step": 13164 }, { "epoch": 0.65, "grad_norm": 2.1321139335632324, "learning_rate": 1.747368941153219e-05, "loss": 5.5491, "step": 13168 }, { "epoch": 0.65, "grad_norm": 2.055555820465088, "learning_rate": 1.746380750037057e-05, "loss": 5.4821, "step": 13172 }, { "epoch": 0.65, "grad_norm": 2.0164570808410645, "learning_rate": 1.7453925589208953e-05, "loss": 5.4772, "step": 13176 }, { "epoch": 0.65, "grad_norm": 1.8798964023590088, "learning_rate": 1.7444043678047335e-05, "loss": 5.6292, "step": 13180 }, { "epoch": 0.65, "grad_norm": 1.8820830583572388, "learning_rate": 1.7434161766885717e-05, "loss": 5.5503, "step": 13184 }, { "epoch": 0.65, "grad_norm": 1.768707513809204, "learning_rate": 1.74242798557241e-05, "loss": 5.5907, "step": 13188 }, { "epoch": 0.65, "grad_norm": 1.7819797992706299, "learning_rate": 1.741439794456248e-05, "loss": 5.3631, "step": 13192 }, { "epoch": 0.65, "grad_norm": 1.9888780117034912, "learning_rate": 1.740451603340086e-05, "loss": 5.5318, "step": 13196 }, { "epoch": 0.65, "grad_norm": 1.7733293771743774, "learning_rate": 1.7394634122239242e-05, "loss": 5.4794, "step": 13200 }, { "epoch": 0.65, "grad_norm": 2.08803653717041, "learning_rate": 1.7384752211077624e-05, "loss": 5.5392, "step": 13204 }, { "epoch": 0.65, "grad_norm": 1.7837083339691162, "learning_rate": 1.7374870299916006e-05, "loss": 5.5612, "step": 13208 }, { "epoch": 0.65, "grad_norm": 2.200601100921631, "learning_rate": 1.7364988388754385e-05, "loss": 5.4618, "step": 13212 }, { "epoch": 0.65, "grad_norm": 2.062946319580078, "learning_rate": 1.7355106477592767e-05, "loss": 5.4031, "step": 13216 }, { "epoch": 0.65, "grad_norm": 1.8929904699325562, "learning_rate": 1.734522456643115e-05, "loss": 5.6011, "step": 13220 }, { "epoch": 0.65, "grad_norm": 2.175036907196045, "learning_rate": 1.7335342655269528e-05, "loss": 5.5038, "step": 13224 }, { "epoch": 0.65, "grad_norm": 2.1107425689697266, "learning_rate": 1.732546074410791e-05, "loss": 5.4624, "step": 13228 }, { "epoch": 0.65, "grad_norm": 1.8881957530975342, "learning_rate": 1.7315578832946292e-05, "loss": 5.4579, "step": 13232 }, { "epoch": 0.65, "grad_norm": 1.9419997930526733, "learning_rate": 1.7305696921784674e-05, "loss": 5.5691, "step": 13236 }, { "epoch": 0.65, "grad_norm": 2.006504535675049, "learning_rate": 1.7295815010623056e-05, "loss": 5.604, "step": 13240 }, { "epoch": 0.65, "grad_norm": 1.9548697471618652, "learning_rate": 1.7285933099461438e-05, "loss": 5.5202, "step": 13244 }, { "epoch": 0.65, "grad_norm": 1.903361201286316, "learning_rate": 1.727605118829982e-05, "loss": 5.5459, "step": 13248 }, { "epoch": 0.65, "grad_norm": 1.9035142660140991, "learning_rate": 1.72661692771382e-05, "loss": 5.5067, "step": 13252 }, { "epoch": 0.65, "grad_norm": 1.9801918268203735, "learning_rate": 1.725628736597658e-05, "loss": 5.502, "step": 13256 }, { "epoch": 0.66, "grad_norm": 1.8996813297271729, "learning_rate": 1.7246405454814963e-05, "loss": 5.4709, "step": 13260 }, { "epoch": 0.66, "grad_norm": 1.903206467628479, "learning_rate": 1.7236523543653345e-05, "loss": 5.4252, "step": 13264 }, { "epoch": 0.66, "grad_norm": 2.280048370361328, "learning_rate": 1.7226641632491724e-05, "loss": 5.6236, "step": 13268 }, { "epoch": 0.66, "grad_norm": 1.916698932647705, "learning_rate": 1.7216759721330106e-05, "loss": 5.5417, "step": 13272 }, { "epoch": 0.66, "grad_norm": 1.8944514989852905, "learning_rate": 1.7206877810168488e-05, "loss": 5.5273, "step": 13276 }, { "epoch": 0.66, "grad_norm": 2.160426139831543, "learning_rate": 1.7196995899006866e-05, "loss": 5.4925, "step": 13280 }, { "epoch": 0.66, "grad_norm": 1.9661693572998047, "learning_rate": 1.718711398784525e-05, "loss": 5.5566, "step": 13284 }, { "epoch": 0.66, "grad_norm": 2.1579537391662598, "learning_rate": 1.717723207668363e-05, "loss": 5.5, "step": 13288 }, { "epoch": 0.66, "grad_norm": 1.8311907052993774, "learning_rate": 1.7167350165522013e-05, "loss": 5.4402, "step": 13292 }, { "epoch": 0.66, "grad_norm": 2.067732810974121, "learning_rate": 1.7157468254360395e-05, "loss": 5.5876, "step": 13296 }, { "epoch": 0.66, "grad_norm": 1.9030920267105103, "learning_rate": 1.7147586343198777e-05, "loss": 5.5544, "step": 13300 }, { "epoch": 0.66, "grad_norm": 1.9689793586730957, "learning_rate": 1.713770443203716e-05, "loss": 5.5876, "step": 13304 }, { "epoch": 0.66, "grad_norm": 2.319972038269043, "learning_rate": 1.7127822520875537e-05, "loss": 5.6179, "step": 13308 }, { "epoch": 0.66, "grad_norm": 2.05124568939209, "learning_rate": 1.711794060971392e-05, "loss": 5.4808, "step": 13312 }, { "epoch": 0.66, "grad_norm": 2.054259777069092, "learning_rate": 1.71080586985523e-05, "loss": 5.571, "step": 13316 }, { "epoch": 0.66, "grad_norm": 2.1697633266448975, "learning_rate": 1.7098176787390684e-05, "loss": 5.5687, "step": 13320 }, { "epoch": 0.66, "grad_norm": 2.158599853515625, "learning_rate": 1.7088294876229062e-05, "loss": 5.4535, "step": 13324 }, { "epoch": 0.66, "grad_norm": 2.263106107711792, "learning_rate": 1.7078412965067444e-05, "loss": 5.5662, "step": 13328 }, { "epoch": 0.66, "grad_norm": 1.9761734008789062, "learning_rate": 1.7068531053905826e-05, "loss": 5.4655, "step": 13332 }, { "epoch": 0.66, "grad_norm": 2.1491572856903076, "learning_rate": 1.705864914274421e-05, "loss": 5.5403, "step": 13336 }, { "epoch": 0.66, "grad_norm": 1.9614084959030151, "learning_rate": 1.7048767231582587e-05, "loss": 5.4883, "step": 13340 }, { "epoch": 0.66, "grad_norm": 2.1084208488464355, "learning_rate": 1.703888532042097e-05, "loss": 5.4915, "step": 13344 }, { "epoch": 0.66, "grad_norm": 1.9315608739852905, "learning_rate": 1.702900340925935e-05, "loss": 5.531, "step": 13348 }, { "epoch": 0.66, "grad_norm": 1.9300886392593384, "learning_rate": 1.7019121498097733e-05, "loss": 5.5294, "step": 13352 }, { "epoch": 0.66, "grad_norm": 1.9439579248428345, "learning_rate": 1.7009239586936115e-05, "loss": 5.5372, "step": 13356 }, { "epoch": 0.66, "grad_norm": 1.8347561359405518, "learning_rate": 1.6999357675774497e-05, "loss": 5.5304, "step": 13360 }, { "epoch": 0.66, "grad_norm": 1.8642383813858032, "learning_rate": 1.6989475764612876e-05, "loss": 5.3163, "step": 13364 }, { "epoch": 0.66, "grad_norm": 1.9651672840118408, "learning_rate": 1.6979593853451258e-05, "loss": 5.4271, "step": 13368 }, { "epoch": 0.66, "grad_norm": 2.146197557449341, "learning_rate": 1.696971194228964e-05, "loss": 5.6012, "step": 13372 }, { "epoch": 0.66, "grad_norm": 1.9717754125595093, "learning_rate": 1.6959830031128022e-05, "loss": 5.4703, "step": 13376 }, { "epoch": 0.66, "grad_norm": 1.8049124479293823, "learning_rate": 1.69499481199664e-05, "loss": 5.4398, "step": 13380 }, { "epoch": 0.66, "grad_norm": 1.8642454147338867, "learning_rate": 1.6940066208804783e-05, "loss": 5.5652, "step": 13384 }, { "epoch": 0.66, "grad_norm": 1.8307348489761353, "learning_rate": 1.6930184297643165e-05, "loss": 5.6079, "step": 13388 }, { "epoch": 0.66, "grad_norm": 1.8231866359710693, "learning_rate": 1.6920302386481547e-05, "loss": 5.4477, "step": 13392 }, { "epoch": 0.66, "grad_norm": 1.7777388095855713, "learning_rate": 1.6910420475319926e-05, "loss": 5.5014, "step": 13396 }, { "epoch": 0.66, "grad_norm": 2.1190943717956543, "learning_rate": 1.6900538564158308e-05, "loss": 5.512, "step": 13400 }, { "epoch": 0.66, "grad_norm": 1.7859139442443848, "learning_rate": 1.689065665299669e-05, "loss": 5.4176, "step": 13404 }, { "epoch": 0.66, "grad_norm": 1.937577486038208, "learning_rate": 1.6880774741835072e-05, "loss": 5.5242, "step": 13408 }, { "epoch": 0.66, "grad_norm": 1.9069881439208984, "learning_rate": 1.6870892830673454e-05, "loss": 5.5248, "step": 13412 }, { "epoch": 0.66, "grad_norm": 2.3434953689575195, "learning_rate": 1.6861010919511836e-05, "loss": 5.5887, "step": 13416 }, { "epoch": 0.66, "grad_norm": 2.0236103534698486, "learning_rate": 1.6851129008350218e-05, "loss": 5.5241, "step": 13420 }, { "epoch": 0.66, "grad_norm": 1.807458519935608, "learning_rate": 1.6841247097188597e-05, "loss": 5.5018, "step": 13424 }, { "epoch": 0.66, "grad_norm": 1.7654306888580322, "learning_rate": 1.683136518602698e-05, "loss": 5.5346, "step": 13428 }, { "epoch": 0.66, "grad_norm": 1.796311378479004, "learning_rate": 1.682148327486536e-05, "loss": 5.4943, "step": 13432 }, { "epoch": 0.66, "grad_norm": 2.1759448051452637, "learning_rate": 1.681160136370374e-05, "loss": 5.5745, "step": 13436 }, { "epoch": 0.66, "grad_norm": 1.9799151420593262, "learning_rate": 1.680171945254212e-05, "loss": 5.5721, "step": 13440 }, { "epoch": 0.66, "grad_norm": 1.8375133275985718, "learning_rate": 1.6791837541380504e-05, "loss": 5.5058, "step": 13444 }, { "epoch": 0.66, "grad_norm": 2.0697977542877197, "learning_rate": 1.6781955630218886e-05, "loss": 5.49, "step": 13448 }, { "epoch": 0.66, "grad_norm": 1.968781590461731, "learning_rate": 1.6772073719057264e-05, "loss": 5.414, "step": 13452 }, { "epoch": 0.66, "grad_norm": 2.182166814804077, "learning_rate": 1.6762191807895647e-05, "loss": 5.6226, "step": 13456 }, { "epoch": 0.67, "grad_norm": 2.110377073287964, "learning_rate": 1.675230989673403e-05, "loss": 5.5626, "step": 13460 }, { "epoch": 0.67, "grad_norm": 1.979601263999939, "learning_rate": 1.674242798557241e-05, "loss": 5.62, "step": 13464 }, { "epoch": 0.67, "grad_norm": 2.2129158973693848, "learning_rate": 1.6732546074410793e-05, "loss": 5.5565, "step": 13468 }, { "epoch": 0.67, "grad_norm": 2.0401570796966553, "learning_rate": 1.6722664163249175e-05, "loss": 5.4781, "step": 13472 }, { "epoch": 0.67, "grad_norm": 1.7625924348831177, "learning_rate": 1.6712782252087557e-05, "loss": 5.4104, "step": 13476 }, { "epoch": 0.67, "grad_norm": 1.8981072902679443, "learning_rate": 1.6702900340925936e-05, "loss": 5.5208, "step": 13480 }, { "epoch": 0.67, "grad_norm": 2.141097068786621, "learning_rate": 1.6693018429764318e-05, "loss": 5.505, "step": 13484 }, { "epoch": 0.67, "grad_norm": 2.0228660106658936, "learning_rate": 1.66831365186027e-05, "loss": 5.5653, "step": 13488 }, { "epoch": 0.67, "grad_norm": 1.9627779722213745, "learning_rate": 1.667325460744108e-05, "loss": 5.5383, "step": 13492 }, { "epoch": 0.67, "grad_norm": 1.80388605594635, "learning_rate": 1.666337269627946e-05, "loss": 5.4095, "step": 13496 }, { "epoch": 0.67, "grad_norm": 2.125562906265259, "learning_rate": 1.6653490785117842e-05, "loss": 5.5157, "step": 13500 }, { "epoch": 0.67, "grad_norm": 2.3643898963928223, "learning_rate": 1.6643608873956224e-05, "loss": 5.4778, "step": 13504 }, { "epoch": 0.67, "grad_norm": 1.9573434591293335, "learning_rate": 1.6633726962794603e-05, "loss": 5.3951, "step": 13508 }, { "epoch": 0.67, "grad_norm": 1.767749309539795, "learning_rate": 1.6623845051632985e-05, "loss": 5.5775, "step": 13512 }, { "epoch": 0.67, "grad_norm": 1.7242648601531982, "learning_rate": 1.6613963140471367e-05, "loss": 5.5681, "step": 13516 }, { "epoch": 0.67, "grad_norm": 2.098914384841919, "learning_rate": 1.660408122930975e-05, "loss": 5.4892, "step": 13520 }, { "epoch": 0.67, "grad_norm": 1.9471126794815063, "learning_rate": 1.659419931814813e-05, "loss": 5.5672, "step": 13524 }, { "epoch": 0.67, "grad_norm": 1.9518860578536987, "learning_rate": 1.6584317406986513e-05, "loss": 5.5318, "step": 13528 }, { "epoch": 0.67, "grad_norm": 1.9697927236557007, "learning_rate": 1.6574435495824896e-05, "loss": 5.5275, "step": 13532 }, { "epoch": 0.67, "grad_norm": 2.005502700805664, "learning_rate": 1.6564553584663274e-05, "loss": 5.6122, "step": 13536 }, { "epoch": 0.67, "grad_norm": 2.06354022026062, "learning_rate": 1.6554671673501656e-05, "loss": 5.3431, "step": 13540 }, { "epoch": 0.67, "grad_norm": 1.9697012901306152, "learning_rate": 1.6544789762340038e-05, "loss": 5.5376, "step": 13544 }, { "epoch": 0.67, "grad_norm": 1.9158987998962402, "learning_rate": 1.653490785117842e-05, "loss": 5.5176, "step": 13548 }, { "epoch": 0.67, "grad_norm": 2.0514771938323975, "learning_rate": 1.65250259400168e-05, "loss": 5.5724, "step": 13552 }, { "epoch": 0.67, "grad_norm": 1.8229410648345947, "learning_rate": 1.651514402885518e-05, "loss": 5.4933, "step": 13556 }, { "epoch": 0.67, "grad_norm": 1.6895649433135986, "learning_rate": 1.6505262117693563e-05, "loss": 5.5462, "step": 13560 }, { "epoch": 0.67, "grad_norm": 1.9636627435684204, "learning_rate": 1.6495380206531942e-05, "loss": 5.4918, "step": 13564 }, { "epoch": 0.67, "grad_norm": 1.728353500366211, "learning_rate": 1.6485498295370324e-05, "loss": 5.4126, "step": 13568 }, { "epoch": 0.67, "grad_norm": 1.8992080688476562, "learning_rate": 1.6475616384208706e-05, "loss": 5.4954, "step": 13572 }, { "epoch": 0.67, "grad_norm": 1.8971209526062012, "learning_rate": 1.6465734473047088e-05, "loss": 5.5205, "step": 13576 }, { "epoch": 0.67, "grad_norm": 1.9099971055984497, "learning_rate": 1.6455852561885467e-05, "loss": 5.5401, "step": 13580 }, { "epoch": 0.67, "grad_norm": 1.7198883295059204, "learning_rate": 1.6445970650723852e-05, "loss": 5.5187, "step": 13584 }, { "epoch": 0.67, "grad_norm": 1.854477882385254, "learning_rate": 1.6436088739562234e-05, "loss": 5.5558, "step": 13588 }, { "epoch": 0.67, "grad_norm": 1.9261908531188965, "learning_rate": 1.6426206828400613e-05, "loss": 5.5156, "step": 13592 }, { "epoch": 0.67, "grad_norm": 2.166208505630493, "learning_rate": 1.6416324917238995e-05, "loss": 5.5002, "step": 13596 }, { "epoch": 0.67, "grad_norm": 1.8934109210968018, "learning_rate": 1.6406443006077377e-05, "loss": 5.5092, "step": 13600 }, { "epoch": 0.67, "grad_norm": 1.830947756767273, "learning_rate": 1.639656109491576e-05, "loss": 5.5542, "step": 13604 }, { "epoch": 0.67, "grad_norm": 1.86097252368927, "learning_rate": 1.6386679183754138e-05, "loss": 5.502, "step": 13608 }, { "epoch": 0.67, "grad_norm": 1.9578551054000854, "learning_rate": 1.637679727259252e-05, "loss": 5.5351, "step": 13612 }, { "epoch": 0.67, "grad_norm": 1.9985218048095703, "learning_rate": 1.6366915361430902e-05, "loss": 5.403, "step": 13616 }, { "epoch": 0.67, "grad_norm": 2.346946954727173, "learning_rate": 1.6357033450269284e-05, "loss": 5.4314, "step": 13620 }, { "epoch": 0.67, "grad_norm": 1.9613852500915527, "learning_rate": 1.6347151539107663e-05, "loss": 5.4851, "step": 13624 }, { "epoch": 0.67, "grad_norm": 2.2844598293304443, "learning_rate": 1.6337269627946045e-05, "loss": 5.4646, "step": 13628 }, { "epoch": 0.67, "grad_norm": 2.0212109088897705, "learning_rate": 1.6327387716784427e-05, "loss": 5.5153, "step": 13632 }, { "epoch": 0.67, "grad_norm": 2.0245635509490967, "learning_rate": 1.6317505805622805e-05, "loss": 5.5516, "step": 13636 }, { "epoch": 0.67, "grad_norm": 2.051738977432251, "learning_rate": 1.630762389446119e-05, "loss": 5.6327, "step": 13640 }, { "epoch": 0.67, "grad_norm": 2.0657126903533936, "learning_rate": 1.6297741983299573e-05, "loss": 5.5882, "step": 13644 }, { "epoch": 0.67, "grad_norm": 1.979304313659668, "learning_rate": 1.628786007213795e-05, "loss": 5.3553, "step": 13648 }, { "epoch": 0.67, "grad_norm": 2.10304856300354, "learning_rate": 1.6277978160976334e-05, "loss": 5.5474, "step": 13652 }, { "epoch": 0.67, "grad_norm": 2.0482354164123535, "learning_rate": 1.6268096249814716e-05, "loss": 5.5004, "step": 13656 }, { "epoch": 0.67, "grad_norm": 2.0741443634033203, "learning_rate": 1.6258214338653098e-05, "loss": 5.4887, "step": 13660 }, { "epoch": 0.68, "grad_norm": 1.9850010871887207, "learning_rate": 1.6248332427491476e-05, "loss": 5.4449, "step": 13664 }, { "epoch": 0.68, "grad_norm": 2.456059217453003, "learning_rate": 1.623845051632986e-05, "loss": 5.4471, "step": 13668 }, { "epoch": 0.68, "grad_norm": 1.8275492191314697, "learning_rate": 1.622856860516824e-05, "loss": 5.5139, "step": 13672 }, { "epoch": 0.68, "grad_norm": 2.166975736618042, "learning_rate": 1.6218686694006623e-05, "loss": 5.5475, "step": 13676 }, { "epoch": 0.68, "grad_norm": 2.0723702907562256, "learning_rate": 1.6208804782845e-05, "loss": 5.6062, "step": 13680 }, { "epoch": 0.68, "grad_norm": 2.227518320083618, "learning_rate": 1.6198922871683383e-05, "loss": 5.4955, "step": 13684 }, { "epoch": 0.68, "grad_norm": 1.9052515029907227, "learning_rate": 1.6189040960521765e-05, "loss": 5.5553, "step": 13688 }, { "epoch": 0.68, "grad_norm": 1.945573329925537, "learning_rate": 1.6179159049360147e-05, "loss": 5.3597, "step": 13692 }, { "epoch": 0.68, "grad_norm": 1.874603271484375, "learning_rate": 1.616927713819853e-05, "loss": 5.391, "step": 13696 }, { "epoch": 0.68, "grad_norm": 2.339505672454834, "learning_rate": 1.615939522703691e-05, "loss": 5.4194, "step": 13700 }, { "epoch": 0.68, "grad_norm": 1.7062286138534546, "learning_rate": 1.6149513315875294e-05, "loss": 5.4586, "step": 13704 }, { "epoch": 0.68, "grad_norm": 1.8526628017425537, "learning_rate": 1.6139631404713672e-05, "loss": 5.3614, "step": 13708 }, { "epoch": 0.68, "grad_norm": 1.8179652690887451, "learning_rate": 1.6129749493552054e-05, "loss": 5.4977, "step": 13712 }, { "epoch": 0.68, "grad_norm": 1.889673113822937, "learning_rate": 1.6119867582390436e-05, "loss": 5.5591, "step": 13716 }, { "epoch": 0.68, "grad_norm": 2.039731740951538, "learning_rate": 1.6109985671228815e-05, "loss": 5.4809, "step": 13720 }, { "epoch": 0.68, "grad_norm": 2.3024516105651855, "learning_rate": 1.6100103760067197e-05, "loss": 5.4518, "step": 13724 }, { "epoch": 0.68, "grad_norm": 1.9836472272872925, "learning_rate": 1.609022184890558e-05, "loss": 5.4812, "step": 13728 }, { "epoch": 0.68, "grad_norm": 1.8883979320526123, "learning_rate": 1.608033993774396e-05, "loss": 5.4914, "step": 13732 }, { "epoch": 0.68, "grad_norm": 1.9850894212722778, "learning_rate": 1.607045802658234e-05, "loss": 5.6323, "step": 13736 }, { "epoch": 0.68, "grad_norm": 2.124187707901001, "learning_rate": 1.6060576115420722e-05, "loss": 5.5609, "step": 13740 }, { "epoch": 0.68, "grad_norm": 1.9509978294372559, "learning_rate": 1.6050694204259104e-05, "loss": 5.5234, "step": 13744 }, { "epoch": 0.68, "grad_norm": 1.9340142011642456, "learning_rate": 1.6040812293097486e-05, "loss": 5.491, "step": 13748 }, { "epoch": 0.68, "grad_norm": 2.0254647731781006, "learning_rate": 1.6030930381935865e-05, "loss": 5.4735, "step": 13752 }, { "epoch": 0.68, "grad_norm": 2.143260955810547, "learning_rate": 1.602104847077425e-05, "loss": 5.5477, "step": 13756 }, { "epoch": 0.68, "grad_norm": 1.6931935548782349, "learning_rate": 1.6011166559612632e-05, "loss": 5.5821, "step": 13760 }, { "epoch": 0.68, "grad_norm": 1.9342801570892334, "learning_rate": 1.600128464845101e-05, "loss": 5.4517, "step": 13764 }, { "epoch": 0.68, "grad_norm": 1.8290477991104126, "learning_rate": 1.5991402737289393e-05, "loss": 5.4992, "step": 13768 }, { "epoch": 0.68, "grad_norm": 2.020256757736206, "learning_rate": 1.5981520826127775e-05, "loss": 5.5175, "step": 13772 }, { "epoch": 0.68, "grad_norm": 2.106531858444214, "learning_rate": 1.5971638914966157e-05, "loss": 5.4482, "step": 13776 }, { "epoch": 0.68, "grad_norm": 1.9939652681350708, "learning_rate": 1.5961757003804536e-05, "loss": 5.4952, "step": 13780 }, { "epoch": 0.68, "grad_norm": 2.0970144271850586, "learning_rate": 1.5951875092642918e-05, "loss": 5.5438, "step": 13784 }, { "epoch": 0.68, "grad_norm": 1.9521361589431763, "learning_rate": 1.59419931814813e-05, "loss": 5.5225, "step": 13788 }, { "epoch": 0.68, "grad_norm": 2.0148961544036865, "learning_rate": 1.593211127031968e-05, "loss": 5.4785, "step": 13792 }, { "epoch": 0.68, "grad_norm": 2.1378774642944336, "learning_rate": 1.592222935915806e-05, "loss": 5.5553, "step": 13796 }, { "epoch": 0.68, "grad_norm": 2.312819719314575, "learning_rate": 1.5912347447996443e-05, "loss": 5.4033, "step": 13800 }, { "epoch": 0.68, "grad_norm": 2.2060418128967285, "learning_rate": 1.5902465536834825e-05, "loss": 5.4713, "step": 13804 }, { "epoch": 0.68, "grad_norm": 1.8953157663345337, "learning_rate": 1.5892583625673203e-05, "loss": 5.5035, "step": 13808 }, { "epoch": 0.68, "grad_norm": 1.7217421531677246, "learning_rate": 1.588270171451159e-05, "loss": 5.6139, "step": 13812 }, { "epoch": 0.68, "grad_norm": 1.9969755411148071, "learning_rate": 1.587281980334997e-05, "loss": 5.44, "step": 13816 }, { "epoch": 0.68, "grad_norm": 2.071129560470581, "learning_rate": 1.586293789218835e-05, "loss": 5.4202, "step": 13820 }, { "epoch": 0.68, "grad_norm": 1.8004354238510132, "learning_rate": 1.5853055981026732e-05, "loss": 5.5286, "step": 13824 }, { "epoch": 0.68, "grad_norm": 2.0194168090820312, "learning_rate": 1.5843174069865114e-05, "loss": 5.426, "step": 13828 }, { "epoch": 0.68, "grad_norm": 1.7910535335540771, "learning_rate": 1.5833292158703496e-05, "loss": 5.529, "step": 13832 }, { "epoch": 0.68, "grad_norm": 2.0648398399353027, "learning_rate": 1.5823410247541874e-05, "loss": 5.5632, "step": 13836 }, { "epoch": 0.68, "grad_norm": 1.881971001625061, "learning_rate": 1.5813528336380257e-05, "loss": 5.6164, "step": 13840 }, { "epoch": 0.68, "grad_norm": 2.088383913040161, "learning_rate": 1.580364642521864e-05, "loss": 5.5221, "step": 13844 }, { "epoch": 0.68, "grad_norm": 1.9117428064346313, "learning_rate": 1.5793764514057017e-05, "loss": 5.6231, "step": 13848 }, { "epoch": 0.68, "grad_norm": 2.0459342002868652, "learning_rate": 1.57838826028954e-05, "loss": 5.3971, "step": 13852 }, { "epoch": 0.68, "grad_norm": 1.8177950382232666, "learning_rate": 1.577400069173378e-05, "loss": 5.4996, "step": 13856 }, { "epoch": 0.68, "grad_norm": 2.090265989303589, "learning_rate": 1.5764118780572163e-05, "loss": 5.5479, "step": 13860 }, { "epoch": 0.69, "grad_norm": 2.1461987495422363, "learning_rate": 1.5754236869410542e-05, "loss": 5.4959, "step": 13864 }, { "epoch": 0.69, "grad_norm": 2.008942127227783, "learning_rate": 1.5744354958248928e-05, "loss": 5.3582, "step": 13868 }, { "epoch": 0.69, "grad_norm": 1.7721225023269653, "learning_rate": 1.573447304708731e-05, "loss": 5.4618, "step": 13872 }, { "epoch": 0.69, "grad_norm": 2.161746025085449, "learning_rate": 1.572459113592569e-05, "loss": 5.5163, "step": 13876 }, { "epoch": 0.69, "grad_norm": 2.0746963024139404, "learning_rate": 1.571470922476407e-05, "loss": 5.4898, "step": 13880 }, { "epoch": 0.69, "grad_norm": Infinity, "learning_rate": 1.5707297791392857e-05, "loss": 5.5354, "step": 13884 }, { "epoch": 0.69, "grad_norm": 2.0238466262817383, "learning_rate": 1.5697415880231236e-05, "loss": 5.5099, "step": 13888 }, { "epoch": 0.69, "grad_norm": 1.9300113916397095, "learning_rate": 1.5687533969069618e-05, "loss": 5.682, "step": 13892 }, { "epoch": 0.69, "grad_norm": 1.9683018922805786, "learning_rate": 1.5677652057908e-05, "loss": 5.5058, "step": 13896 }, { "epoch": 0.69, "grad_norm": 1.975710391998291, "learning_rate": 1.5667770146746382e-05, "loss": 5.5139, "step": 13900 }, { "epoch": 0.69, "grad_norm": 1.9731028079986572, "learning_rate": 1.5657888235584764e-05, "loss": 5.565, "step": 13904 }, { "epoch": 0.69, "grad_norm": 2.0347113609313965, "learning_rate": 1.5648006324423146e-05, "loss": 5.6587, "step": 13908 }, { "epoch": 0.69, "grad_norm": 1.9666558504104614, "learning_rate": 1.5638124413261525e-05, "loss": 5.6072, "step": 13912 }, { "epoch": 0.69, "grad_norm": 1.808203935623169, "learning_rate": 1.5628242502099907e-05, "loss": 5.4888, "step": 13916 }, { "epoch": 0.69, "grad_norm": 2.0783326625823975, "learning_rate": 1.561836059093829e-05, "loss": 5.6407, "step": 13920 }, { "epoch": 0.69, "grad_norm": 2.006601572036743, "learning_rate": 1.560847867977667e-05, "loss": 5.5085, "step": 13924 }, { "epoch": 0.69, "grad_norm": 1.8633679151535034, "learning_rate": 1.559859676861505e-05, "loss": 5.4967, "step": 13928 }, { "epoch": 0.69, "grad_norm": 1.9505640268325806, "learning_rate": 1.558871485745343e-05, "loss": 5.4678, "step": 13932 }, { "epoch": 0.69, "grad_norm": 2.0986247062683105, "learning_rate": 1.5578832946291814e-05, "loss": 5.4932, "step": 13936 }, { "epoch": 0.69, "grad_norm": 2.256755828857422, "learning_rate": 1.5568951035130196e-05, "loss": 5.5958, "step": 13940 }, { "epoch": 0.69, "grad_norm": 2.2147109508514404, "learning_rate": 1.5559069123968574e-05, "loss": 5.5194, "step": 13944 }, { "epoch": 0.69, "grad_norm": 1.88095223903656, "learning_rate": 1.5549187212806956e-05, "loss": 5.4609, "step": 13948 }, { "epoch": 0.69, "grad_norm": 1.867007851600647, "learning_rate": 1.553930530164534e-05, "loss": 5.4802, "step": 13952 }, { "epoch": 0.69, "grad_norm": 2.029303789138794, "learning_rate": 1.552942339048372e-05, "loss": 5.5337, "step": 13956 }, { "epoch": 0.69, "grad_norm": 1.9142879247665405, "learning_rate": 1.5519541479322103e-05, "loss": 5.4932, "step": 13960 }, { "epoch": 0.69, "grad_norm": 1.8500617742538452, "learning_rate": 1.5509659568160485e-05, "loss": 5.4916, "step": 13964 }, { "epoch": 0.69, "grad_norm": 2.0694692134857178, "learning_rate": 1.5499777656998867e-05, "loss": 5.4766, "step": 13968 }, { "epoch": 0.69, "grad_norm": 2.122901201248169, "learning_rate": 1.5489895745837245e-05, "loss": 5.6467, "step": 13972 }, { "epoch": 0.69, "grad_norm": 2.108297109603882, "learning_rate": 1.5480013834675627e-05, "loss": 5.5902, "step": 13976 }, { "epoch": 0.69, "grad_norm": 1.9898920059204102, "learning_rate": 1.547013192351401e-05, "loss": 5.5772, "step": 13980 }, { "epoch": 0.69, "grad_norm": 2.2552645206451416, "learning_rate": 1.5460250012352388e-05, "loss": 5.507, "step": 13984 }, { "epoch": 0.69, "grad_norm": 1.9198163747787476, "learning_rate": 1.545036810119077e-05, "loss": 5.4749, "step": 13988 }, { "epoch": 0.69, "grad_norm": 2.101717233657837, "learning_rate": 1.5440486190029152e-05, "loss": 5.444, "step": 13992 }, { "epoch": 0.69, "grad_norm": 2.1184744834899902, "learning_rate": 1.5430604278867534e-05, "loss": 5.6017, "step": 13996 }, { "epoch": 0.69, "grad_norm": 1.7170560359954834, "learning_rate": 1.5420722367705913e-05, "loss": 5.4235, "step": 14000 }, { "epoch": 0.69, "grad_norm": 2.1068732738494873, "learning_rate": 1.5410840456544295e-05, "loss": 5.504, "step": 14004 }, { "epoch": 0.69, "grad_norm": 2.275245189666748, "learning_rate": 1.5400958545382677e-05, "loss": 5.5617, "step": 14008 }, { "epoch": 0.69, "grad_norm": 2.2603347301483154, "learning_rate": 1.539107663422106e-05, "loss": 5.4309, "step": 14012 }, { "epoch": 0.69, "grad_norm": 2.082984685897827, "learning_rate": 1.538119472305944e-05, "loss": 5.408, "step": 14016 }, { "epoch": 0.69, "grad_norm": 1.8457398414611816, "learning_rate": 1.5371312811897823e-05, "loss": 5.4611, "step": 14020 }, { "epoch": 0.69, "grad_norm": 2.0834155082702637, "learning_rate": 1.5361430900736205e-05, "loss": 5.4901, "step": 14024 }, { "epoch": 0.69, "grad_norm": 1.9145028591156006, "learning_rate": 1.5351548989574584e-05, "loss": 5.5115, "step": 14028 }, { "epoch": 0.69, "grad_norm": 1.7850406169891357, "learning_rate": 1.5341667078412966e-05, "loss": 5.4384, "step": 14032 }, { "epoch": 0.69, "grad_norm": 1.964521884918213, "learning_rate": 1.5331785167251348e-05, "loss": 5.3728, "step": 14036 }, { "epoch": 0.69, "grad_norm": 2.125605821609497, "learning_rate": 1.532190325608973e-05, "loss": 5.4972, "step": 14040 }, { "epoch": 0.69, "grad_norm": 1.7855336666107178, "learning_rate": 1.531202134492811e-05, "loss": 5.5624, "step": 14044 }, { "epoch": 0.69, "grad_norm": 1.9751635789871216, "learning_rate": 1.530213943376649e-05, "loss": 5.4388, "step": 14048 }, { "epoch": 0.69, "grad_norm": 1.943023681640625, "learning_rate": 1.5292257522604873e-05, "loss": 5.4861, "step": 14052 }, { "epoch": 0.69, "grad_norm": 1.7670375108718872, "learning_rate": 1.528237561144325e-05, "loss": 5.5438, "step": 14056 }, { "epoch": 0.69, "grad_norm": 2.040696859359741, "learning_rate": 1.5272493700281634e-05, "loss": 5.4905, "step": 14060 }, { "epoch": 0.69, "grad_norm": 2.2104179859161377, "learning_rate": 1.5262611789120016e-05, "loss": 5.4197, "step": 14064 }, { "epoch": 0.7, "grad_norm": 2.070598840713501, "learning_rate": 1.5252729877958396e-05, "loss": 5.3342, "step": 14068 }, { "epoch": 0.7, "grad_norm": 2.1541106700897217, "learning_rate": 1.524284796679678e-05, "loss": 5.5594, "step": 14072 }, { "epoch": 0.7, "grad_norm": 1.8858451843261719, "learning_rate": 1.5232966055635162e-05, "loss": 5.5494, "step": 14076 }, { "epoch": 0.7, "grad_norm": 2.0127689838409424, "learning_rate": 1.5223084144473542e-05, "loss": 5.5005, "step": 14080 }, { "epoch": 0.7, "grad_norm": 1.70602548122406, "learning_rate": 1.5213202233311924e-05, "loss": 5.4694, "step": 14084 }, { "epoch": 0.7, "grad_norm": 2.1480917930603027, "learning_rate": 1.5203320322150305e-05, "loss": 5.5891, "step": 14088 }, { "epoch": 0.7, "grad_norm": 1.8127115964889526, "learning_rate": 1.5193438410988687e-05, "loss": 5.497, "step": 14092 }, { "epoch": 0.7, "grad_norm": 1.768389105796814, "learning_rate": 1.5183556499827067e-05, "loss": 5.5075, "step": 14096 }, { "epoch": 0.7, "grad_norm": 2.2110512256622314, "learning_rate": 1.517367458866545e-05, "loss": 5.4944, "step": 14100 }, { "epoch": 0.7, "grad_norm": 2.2322652339935303, "learning_rate": 1.516379267750383e-05, "loss": 5.5072, "step": 14104 }, { "epoch": 0.7, "grad_norm": 2.103665351867676, "learning_rate": 1.5153910766342212e-05, "loss": 5.5093, "step": 14108 }, { "epoch": 0.7, "grad_norm": 2.0604758262634277, "learning_rate": 1.5144028855180592e-05, "loss": 5.4539, "step": 14112 }, { "epoch": 0.7, "grad_norm": 1.8728346824645996, "learning_rate": 1.5134146944018972e-05, "loss": 5.401, "step": 14116 }, { "epoch": 0.7, "grad_norm": 2.031858444213867, "learning_rate": 1.5124265032857354e-05, "loss": 5.499, "step": 14120 }, { "epoch": 0.7, "grad_norm": 1.828974962234497, "learning_rate": 1.5114383121695735e-05, "loss": 5.4467, "step": 14124 }, { "epoch": 0.7, "grad_norm": 1.8760355710983276, "learning_rate": 1.5104501210534119e-05, "loss": 5.3958, "step": 14128 }, { "epoch": 0.7, "grad_norm": 1.8751137256622314, "learning_rate": 1.50946192993725e-05, "loss": 5.4515, "step": 14132 }, { "epoch": 0.7, "grad_norm": 1.9187356233596802, "learning_rate": 1.5084737388210881e-05, "loss": 5.5581, "step": 14136 }, { "epoch": 0.7, "grad_norm": 2.1077044010162354, "learning_rate": 1.5074855477049263e-05, "loss": 5.5018, "step": 14140 }, { "epoch": 0.7, "grad_norm": 1.9304084777832031, "learning_rate": 1.5064973565887643e-05, "loss": 5.6017, "step": 14144 }, { "epoch": 0.7, "grad_norm": 1.7740778923034668, "learning_rate": 1.5055091654726025e-05, "loss": 5.5123, "step": 14148 }, { "epoch": 0.7, "grad_norm": 1.9732328653335571, "learning_rate": 1.5045209743564406e-05, "loss": 5.4148, "step": 14152 }, { "epoch": 0.7, "grad_norm": 1.98763906955719, "learning_rate": 1.5035327832402788e-05, "loss": 5.5671, "step": 14156 }, { "epoch": 0.7, "grad_norm": 2.025677442550659, "learning_rate": 1.5025445921241168e-05, "loss": 5.5388, "step": 14160 }, { "epoch": 0.7, "grad_norm": 2.1304235458374023, "learning_rate": 1.501556401007955e-05, "loss": 5.549, "step": 14164 }, { "epoch": 0.7, "grad_norm": 1.8666431903839111, "learning_rate": 1.500568209891793e-05, "loss": 5.5133, "step": 14168 }, { "epoch": 0.7, "grad_norm": 2.001925468444824, "learning_rate": 1.4995800187756313e-05, "loss": 5.4878, "step": 14172 }, { "epoch": 0.7, "grad_norm": 2.0786337852478027, "learning_rate": 1.4985918276594693e-05, "loss": 5.5475, "step": 14176 }, { "epoch": 0.7, "grad_norm": 2.0157644748687744, "learning_rate": 1.4976036365433073e-05, "loss": 5.4686, "step": 14180 }, { "epoch": 0.7, "grad_norm": 2.1091387271881104, "learning_rate": 1.4966154454271456e-05, "loss": 5.5175, "step": 14184 }, { "epoch": 0.7, "grad_norm": 1.8892532587051392, "learning_rate": 1.495627254310984e-05, "loss": 5.57, "step": 14188 }, { "epoch": 0.7, "grad_norm": 2.0681941509246826, "learning_rate": 1.4946390631948221e-05, "loss": 5.4503, "step": 14192 }, { "epoch": 0.7, "grad_norm": 2.028324842453003, "learning_rate": 1.4936508720786602e-05, "loss": 5.5231, "step": 14196 }, { "epoch": 0.7, "grad_norm": 1.8590697050094604, "learning_rate": 1.4926626809624982e-05, "loss": 5.4046, "step": 14200 }, { "epoch": 0.7, "grad_norm": 2.1684091091156006, "learning_rate": 1.4916744898463364e-05, "loss": 5.5428, "step": 14204 }, { "epoch": 0.7, "grad_norm": 2.4600932598114014, "learning_rate": 1.4906862987301745e-05, "loss": 5.4973, "step": 14208 }, { "epoch": 0.7, "grad_norm": 2.182546615600586, "learning_rate": 1.4896981076140127e-05, "loss": 5.5168, "step": 14212 }, { "epoch": 0.7, "grad_norm": 2.0207765102386475, "learning_rate": 1.4887099164978507e-05, "loss": 5.4892, "step": 14216 }, { "epoch": 0.7, "grad_norm": 2.07978892326355, "learning_rate": 1.4877217253816889e-05, "loss": 5.5074, "step": 14220 }, { "epoch": 0.7, "grad_norm": 2.0122103691101074, "learning_rate": 1.486733534265527e-05, "loss": 5.5007, "step": 14224 }, { "epoch": 0.7, "grad_norm": 1.951704502105713, "learning_rate": 1.4857453431493651e-05, "loss": 5.5281, "step": 14228 }, { "epoch": 0.7, "grad_norm": 1.903686761856079, "learning_rate": 1.4847571520332032e-05, "loss": 5.483, "step": 14232 }, { "epoch": 0.7, "grad_norm": 1.7512837648391724, "learning_rate": 1.4837689609170414e-05, "loss": 5.5026, "step": 14236 }, { "epoch": 0.7, "grad_norm": 1.9716506004333496, "learning_rate": 1.4827807698008794e-05, "loss": 5.5547, "step": 14240 }, { "epoch": 0.7, "grad_norm": 2.1064813137054443, "learning_rate": 1.4817925786847178e-05, "loss": 5.6868, "step": 14244 }, { "epoch": 0.7, "grad_norm": 1.920245885848999, "learning_rate": 1.480804387568556e-05, "loss": 5.5195, "step": 14248 }, { "epoch": 0.7, "grad_norm": 1.8807624578475952, "learning_rate": 1.479816196452394e-05, "loss": 5.5605, "step": 14252 }, { "epoch": 0.7, "grad_norm": 2.1558022499084473, "learning_rate": 1.4788280053362322e-05, "loss": 5.54, "step": 14256 }, { "epoch": 0.7, "grad_norm": 1.9825403690338135, "learning_rate": 1.4778398142200703e-05, "loss": 5.5888, "step": 14260 }, { "epoch": 0.7, "grad_norm": 1.974697232246399, "learning_rate": 1.4768516231039083e-05, "loss": 5.5791, "step": 14264 }, { "epoch": 0.7, "grad_norm": 2.0667243003845215, "learning_rate": 1.4758634319877465e-05, "loss": 5.4354, "step": 14268 }, { "epoch": 0.71, "grad_norm": 1.8902255296707153, "learning_rate": 1.4748752408715846e-05, "loss": 5.5167, "step": 14272 }, { "epoch": 0.71, "grad_norm": 1.882251501083374, "learning_rate": 1.4738870497554228e-05, "loss": 5.4551, "step": 14276 }, { "epoch": 0.71, "grad_norm": 2.0265276432037354, "learning_rate": 1.4728988586392608e-05, "loss": 5.3989, "step": 14280 }, { "epoch": 0.71, "grad_norm": 2.1499834060668945, "learning_rate": 1.471910667523099e-05, "loss": 5.3913, "step": 14284 }, { "epoch": 0.71, "grad_norm": 1.9374068975448608, "learning_rate": 1.470922476406937e-05, "loss": 5.4951, "step": 14288 }, { "epoch": 0.71, "grad_norm": 2.04496693611145, "learning_rate": 1.4699342852907753e-05, "loss": 5.5568, "step": 14292 }, { "epoch": 0.71, "grad_norm": 2.1718828678131104, "learning_rate": 1.4689460941746133e-05, "loss": 5.4003, "step": 14296 }, { "epoch": 0.71, "grad_norm": 2.170172929763794, "learning_rate": 1.4679579030584517e-05, "loss": 5.4535, "step": 14300 }, { "epoch": 0.71, "grad_norm": 1.9548550844192505, "learning_rate": 1.4669697119422899e-05, "loss": 5.5077, "step": 14304 }, { "epoch": 0.71, "grad_norm": 1.7335338592529297, "learning_rate": 1.4659815208261279e-05, "loss": 5.4265, "step": 14308 }, { "epoch": 0.71, "grad_norm": 2.0178778171539307, "learning_rate": 1.4649933297099661e-05, "loss": 5.3215, "step": 14312 }, { "epoch": 0.71, "grad_norm": 2.198841094970703, "learning_rate": 1.4640051385938042e-05, "loss": 5.5392, "step": 14316 }, { "epoch": 0.71, "grad_norm": 2.093581438064575, "learning_rate": 1.4630169474776424e-05, "loss": 5.4572, "step": 14320 }, { "epoch": 0.71, "grad_norm": 1.7876406908035278, "learning_rate": 1.4620287563614804e-05, "loss": 5.3976, "step": 14324 }, { "epoch": 0.71, "grad_norm": 2.3379602432250977, "learning_rate": 1.4610405652453184e-05, "loss": 5.5972, "step": 14328 }, { "epoch": 0.71, "grad_norm": 2.0745928287506104, "learning_rate": 1.4600523741291566e-05, "loss": 5.4489, "step": 14332 }, { "epoch": 0.71, "grad_norm": 1.9868686199188232, "learning_rate": 1.4590641830129947e-05, "loss": 5.5921, "step": 14336 }, { "epoch": 0.71, "grad_norm": 2.052441358566284, "learning_rate": 1.4580759918968329e-05, "loss": 5.5621, "step": 14340 }, { "epoch": 0.71, "grad_norm": 2.250218629837036, "learning_rate": 1.457087800780671e-05, "loss": 5.3768, "step": 14344 }, { "epoch": 0.71, "grad_norm": 2.0471935272216797, "learning_rate": 1.4560996096645091e-05, "loss": 5.5863, "step": 14348 }, { "epoch": 0.71, "grad_norm": 2.0607481002807617, "learning_rate": 1.4551114185483472e-05, "loss": 5.5404, "step": 14352 }, { "epoch": 0.71, "grad_norm": 1.7724336385726929, "learning_rate": 1.4541232274321854e-05, "loss": 5.4719, "step": 14356 }, { "epoch": 0.71, "grad_norm": 2.1439015865325928, "learning_rate": 1.4531350363160237e-05, "loss": 5.5045, "step": 14360 }, { "epoch": 0.71, "grad_norm": 2.067152500152588, "learning_rate": 1.4521468451998618e-05, "loss": 5.5518, "step": 14364 }, { "epoch": 0.71, "grad_norm": 1.8666131496429443, "learning_rate": 1.4511586540837e-05, "loss": 5.4791, "step": 14368 }, { "epoch": 0.71, "grad_norm": 1.9369677305221558, "learning_rate": 1.450170462967538e-05, "loss": 5.4379, "step": 14372 }, { "epoch": 0.71, "grad_norm": 2.0378055572509766, "learning_rate": 1.4491822718513762e-05, "loss": 5.4874, "step": 14376 }, { "epoch": 0.71, "grad_norm": 1.8923200368881226, "learning_rate": 1.4481940807352143e-05, "loss": 5.5813, "step": 14380 }, { "epoch": 0.71, "grad_norm": 1.9572350978851318, "learning_rate": 1.4472058896190525e-05, "loss": 5.5161, "step": 14384 }, { "epoch": 0.71, "grad_norm": 1.9131194353103638, "learning_rate": 1.4462176985028905e-05, "loss": 5.6163, "step": 14388 }, { "epoch": 0.71, "grad_norm": 2.5276191234588623, "learning_rate": 1.4452295073867287e-05, "loss": 5.5442, "step": 14392 }, { "epoch": 0.71, "grad_norm": 2.0798611640930176, "learning_rate": 1.4442413162705667e-05, "loss": 5.6086, "step": 14396 }, { "epoch": 0.71, "grad_norm": 2.1294023990631104, "learning_rate": 1.4432531251544048e-05, "loss": 5.3953, "step": 14400 }, { "epoch": 0.71, "grad_norm": 2.01753568649292, "learning_rate": 1.442264934038243e-05, "loss": 5.537, "step": 14404 }, { "epoch": 0.71, "grad_norm": 2.0679104328155518, "learning_rate": 1.441276742922081e-05, "loss": 5.4632, "step": 14408 }, { "epoch": 0.71, "grad_norm": 1.921613097190857, "learning_rate": 1.4402885518059192e-05, "loss": 5.2852, "step": 14412 }, { "epoch": 0.71, "grad_norm": 1.9771466255187988, "learning_rate": 1.4393003606897576e-05, "loss": 5.5808, "step": 14416 }, { "epoch": 0.71, "grad_norm": 1.8879215717315674, "learning_rate": 1.4383121695735956e-05, "loss": 5.4957, "step": 14420 }, { "epoch": 0.71, "grad_norm": 2.0186474323272705, "learning_rate": 1.4373239784574339e-05, "loss": 5.4864, "step": 14424 }, { "epoch": 0.71, "grad_norm": 2.147420883178711, "learning_rate": 1.4363357873412719e-05, "loss": 5.5098, "step": 14428 }, { "epoch": 0.71, "grad_norm": 1.9963911771774292, "learning_rate": 1.4353475962251101e-05, "loss": 5.3886, "step": 14432 }, { "epoch": 0.71, "grad_norm": 2.03368878364563, "learning_rate": 1.4343594051089481e-05, "loss": 5.4533, "step": 14436 }, { "epoch": 0.71, "grad_norm": 2.274022340774536, "learning_rate": 1.4333712139927863e-05, "loss": 5.5222, "step": 14440 }, { "epoch": 0.71, "grad_norm": 2.0925748348236084, "learning_rate": 1.4323830228766244e-05, "loss": 5.3865, "step": 14444 }, { "epoch": 0.71, "grad_norm": 2.25508451461792, "learning_rate": 1.4313948317604626e-05, "loss": 5.388, "step": 14448 }, { "epoch": 0.71, "grad_norm": 1.9655667543411255, "learning_rate": 1.4304066406443006e-05, "loss": 5.5274, "step": 14452 }, { "epoch": 0.71, "grad_norm": 1.7187186479568481, "learning_rate": 1.4294184495281388e-05, "loss": 5.5024, "step": 14456 }, { "epoch": 0.71, "grad_norm": 1.9573662281036377, "learning_rate": 1.4284302584119769e-05, "loss": 5.5709, "step": 14460 }, { "epoch": 0.71, "grad_norm": 1.9070782661437988, "learning_rate": 1.4274420672958149e-05, "loss": 5.5638, "step": 14464 }, { "epoch": 0.71, "grad_norm": 1.99477219581604, "learning_rate": 1.4264538761796531e-05, "loss": 5.554, "step": 14468 }, { "epoch": 0.72, "grad_norm": 2.1351230144500732, "learning_rate": 1.4254656850634915e-05, "loss": 5.4401, "step": 14472 }, { "epoch": 0.72, "grad_norm": 2.12768292427063, "learning_rate": 1.4244774939473297e-05, "loss": 5.5931, "step": 14476 }, { "epoch": 0.72, "grad_norm": 1.834502100944519, "learning_rate": 1.4234893028311677e-05, "loss": 5.3383, "step": 14480 }, { "epoch": 0.72, "grad_norm": 1.9401111602783203, "learning_rate": 1.4225011117150058e-05, "loss": 5.5206, "step": 14484 }, { "epoch": 0.72, "grad_norm": 2.1304268836975098, "learning_rate": 1.421512920598844e-05, "loss": 5.4791, "step": 14488 }, { "epoch": 0.72, "grad_norm": 1.8855656385421753, "learning_rate": 1.420524729482682e-05, "loss": 5.4404, "step": 14492 }, { "epoch": 0.72, "grad_norm": 2.1601414680480957, "learning_rate": 1.4195365383665202e-05, "loss": 5.509, "step": 14496 }, { "epoch": 0.72, "grad_norm": 2.054011821746826, "learning_rate": 1.4185483472503582e-05, "loss": 5.5097, "step": 14500 }, { "epoch": 0.72, "grad_norm": 2.2468581199645996, "learning_rate": 1.4175601561341964e-05, "loss": 5.5658, "step": 14504 }, { "epoch": 0.72, "grad_norm": 1.8028780221939087, "learning_rate": 1.4165719650180345e-05, "loss": 5.4528, "step": 14508 }, { "epoch": 0.72, "grad_norm": 1.7557624578475952, "learning_rate": 1.4155837739018727e-05, "loss": 5.4267, "step": 14512 }, { "epoch": 0.72, "grad_norm": 2.2908835411071777, "learning_rate": 1.4145955827857107e-05, "loss": 5.5676, "step": 14516 }, { "epoch": 0.72, "grad_norm": 1.8863370418548584, "learning_rate": 1.413607391669549e-05, "loss": 5.5873, "step": 14520 }, { "epoch": 0.72, "grad_norm": 1.9381186962127686, "learning_rate": 1.412619200553387e-05, "loss": 5.5573, "step": 14524 }, { "epoch": 0.72, "grad_norm": 1.9330207109451294, "learning_rate": 1.4116310094372252e-05, "loss": 5.3673, "step": 14528 }, { "epoch": 0.72, "grad_norm": 2.0471458435058594, "learning_rate": 1.4106428183210636e-05, "loss": 5.4435, "step": 14532 }, { "epoch": 0.72, "grad_norm": 1.922804594039917, "learning_rate": 1.4096546272049016e-05, "loss": 5.4511, "step": 14536 }, { "epoch": 0.72, "grad_norm": 1.946153998374939, "learning_rate": 1.4086664360887398e-05, "loss": 5.4641, "step": 14540 }, { "epoch": 0.72, "grad_norm": 2.0340843200683594, "learning_rate": 1.4076782449725778e-05, "loss": 5.5205, "step": 14544 }, { "epoch": 0.72, "grad_norm": 2.218997001647949, "learning_rate": 1.4066900538564159e-05, "loss": 5.4572, "step": 14548 }, { "epoch": 0.72, "grad_norm": 1.8929450511932373, "learning_rate": 1.405701862740254e-05, "loss": 5.4428, "step": 14552 }, { "epoch": 0.72, "grad_norm": 1.828822374343872, "learning_rate": 1.4047136716240921e-05, "loss": 5.4737, "step": 14556 }, { "epoch": 0.72, "grad_norm": 2.1129636764526367, "learning_rate": 1.4037254805079303e-05, "loss": 5.4952, "step": 14560 }, { "epoch": 0.72, "grad_norm": 2.0485777854919434, "learning_rate": 1.4027372893917684e-05, "loss": 5.5007, "step": 14564 }, { "epoch": 0.72, "grad_norm": 2.2085585594177246, "learning_rate": 1.4017490982756066e-05, "loss": 5.5036, "step": 14568 }, { "epoch": 0.72, "grad_norm": 2.0381059646606445, "learning_rate": 1.4007609071594446e-05, "loss": 5.5335, "step": 14572 }, { "epoch": 0.72, "grad_norm": 1.994460940361023, "learning_rate": 1.3997727160432828e-05, "loss": 5.5262, "step": 14576 }, { "epoch": 0.72, "grad_norm": 1.9527006149291992, "learning_rate": 1.3987845249271208e-05, "loss": 5.4994, "step": 14580 }, { "epoch": 0.72, "grad_norm": 2.07161808013916, "learning_rate": 1.397796333810959e-05, "loss": 5.5022, "step": 14584 }, { "epoch": 0.72, "grad_norm": 2.083160638809204, "learning_rate": 1.3968081426947974e-05, "loss": 5.46, "step": 14588 }, { "epoch": 0.72, "grad_norm": 1.9139795303344727, "learning_rate": 1.3958199515786355e-05, "loss": 5.3918, "step": 14592 }, { "epoch": 0.72, "grad_norm": 2.016361951828003, "learning_rate": 1.3948317604624737e-05, "loss": 5.3577, "step": 14596 }, { "epoch": 0.72, "grad_norm": 1.9085208177566528, "learning_rate": 1.3938435693463117e-05, "loss": 5.5672, "step": 14600 }, { "epoch": 0.72, "grad_norm": 2.156883716583252, "learning_rate": 1.3928553782301499e-05, "loss": 5.4402, "step": 14604 }, { "epoch": 0.72, "grad_norm": 2.1130199432373047, "learning_rate": 1.391867187113988e-05, "loss": 5.5375, "step": 14608 }, { "epoch": 0.72, "grad_norm": 2.0779714584350586, "learning_rate": 1.3908789959978261e-05, "loss": 5.4568, "step": 14612 }, { "epoch": 0.72, "grad_norm": 2.0961239337921143, "learning_rate": 1.3898908048816642e-05, "loss": 5.5331, "step": 14616 }, { "epoch": 0.72, "grad_norm": 1.9321538209915161, "learning_rate": 1.3889026137655022e-05, "loss": 5.4607, "step": 14620 }, { "epoch": 0.72, "grad_norm": 2.171811103820801, "learning_rate": 1.3879144226493404e-05, "loss": 5.5705, "step": 14624 }, { "epoch": 0.72, "grad_norm": 2.0111582279205322, "learning_rate": 1.3869262315331785e-05, "loss": 5.5365, "step": 14628 }, { "epoch": 0.72, "grad_norm": 2.2230725288391113, "learning_rate": 1.3859380404170167e-05, "loss": 5.4544, "step": 14632 }, { "epoch": 0.72, "grad_norm": 2.1781952381134033, "learning_rate": 1.3849498493008547e-05, "loss": 5.5207, "step": 14636 }, { "epoch": 0.72, "grad_norm": 2.1648108959198, "learning_rate": 1.3839616581846929e-05, "loss": 5.542, "step": 14640 }, { "epoch": 0.72, "grad_norm": 2.008898973464966, "learning_rate": 1.3829734670685313e-05, "loss": 5.4819, "step": 14644 }, { "epoch": 0.72, "grad_norm": 1.8579167127609253, "learning_rate": 1.3819852759523693e-05, "loss": 5.5307, "step": 14648 }, { "epoch": 0.72, "grad_norm": 1.951011300086975, "learning_rate": 1.3809970848362075e-05, "loss": 5.5083, "step": 14652 }, { "epoch": 0.72, "grad_norm": 2.622732639312744, "learning_rate": 1.3800088937200456e-05, "loss": 5.4687, "step": 14656 }, { "epoch": 0.72, "grad_norm": 1.8241809606552124, "learning_rate": 1.3790207026038838e-05, "loss": 5.5702, "step": 14660 }, { "epoch": 0.72, "grad_norm": 2.2202184200286865, "learning_rate": 1.3780325114877218e-05, "loss": 5.5717, "step": 14664 }, { "epoch": 0.72, "grad_norm": 2.125215530395508, "learning_rate": 1.37704432037156e-05, "loss": 5.4724, "step": 14668 }, { "epoch": 0.72, "grad_norm": 2.0341951847076416, "learning_rate": 1.376056129255398e-05, "loss": 5.4859, "step": 14672 }, { "epoch": 0.73, "grad_norm": 1.9950518608093262, "learning_rate": 1.3750679381392363e-05, "loss": 5.4983, "step": 14676 }, { "epoch": 0.73, "grad_norm": 1.9778791666030884, "learning_rate": 1.3740797470230743e-05, "loss": 5.3464, "step": 14680 }, { "epoch": 0.73, "grad_norm": 2.052849054336548, "learning_rate": 1.3730915559069123e-05, "loss": 5.4766, "step": 14684 }, { "epoch": 0.73, "grad_norm": 2.0208585262298584, "learning_rate": 1.3721033647907505e-05, "loss": 5.4662, "step": 14688 }, { "epoch": 0.73, "grad_norm": 2.0381686687469482, "learning_rate": 1.3711151736745886e-05, "loss": 5.4048, "step": 14692 }, { "epoch": 0.73, "grad_norm": 2.0504868030548096, "learning_rate": 1.3701269825584268e-05, "loss": 5.5422, "step": 14696 }, { "epoch": 0.73, "grad_norm": 1.9056216478347778, "learning_rate": 1.3691387914422648e-05, "loss": 5.5829, "step": 14700 }, { "epoch": 0.73, "grad_norm": 2.1584672927856445, "learning_rate": 1.3681506003261032e-05, "loss": 5.4341, "step": 14704 }, { "epoch": 0.73, "grad_norm": 1.8544187545776367, "learning_rate": 1.3671624092099414e-05, "loss": 5.5552, "step": 14708 }, { "epoch": 0.73, "grad_norm": 1.9360532760620117, "learning_rate": 1.3661742180937794e-05, "loss": 5.413, "step": 14712 }, { "epoch": 0.73, "grad_norm": 1.7825835943222046, "learning_rate": 1.3651860269776176e-05, "loss": 5.4916, "step": 14716 }, { "epoch": 0.73, "grad_norm": 2.0297420024871826, "learning_rate": 1.3641978358614557e-05, "loss": 5.4825, "step": 14720 }, { "epoch": 0.73, "grad_norm": 2.0158278942108154, "learning_rate": 1.3632096447452939e-05, "loss": 5.4502, "step": 14724 }, { "epoch": 0.73, "grad_norm": 2.265287160873413, "learning_rate": 1.362221453629132e-05, "loss": 5.5644, "step": 14728 }, { "epoch": 0.73, "grad_norm": 2.0918755531311035, "learning_rate": 1.3612332625129701e-05, "loss": 5.6084, "step": 14732 }, { "epoch": 0.73, "grad_norm": 2.141103744506836, "learning_rate": 1.3602450713968082e-05, "loss": 5.4327, "step": 14736 }, { "epoch": 0.73, "grad_norm": 2.070760488510132, "learning_rate": 1.3592568802806464e-05, "loss": 5.5944, "step": 14740 }, { "epoch": 0.73, "grad_norm": 1.9426864385604858, "learning_rate": 1.3582686891644844e-05, "loss": 5.4326, "step": 14744 }, { "epoch": 0.73, "grad_norm": 1.8541244268417358, "learning_rate": 1.3572804980483224e-05, "loss": 5.5438, "step": 14748 }, { "epoch": 0.73, "grad_norm": 2.081083297729492, "learning_rate": 1.3562923069321606e-05, "loss": 5.5656, "step": 14752 }, { "epoch": 0.73, "grad_norm": 1.923309326171875, "learning_rate": 1.3553041158159987e-05, "loss": 5.5546, "step": 14756 }, { "epoch": 0.73, "grad_norm": 1.746881365776062, "learning_rate": 1.3543159246998372e-05, "loss": 5.4799, "step": 14760 }, { "epoch": 0.73, "grad_norm": 1.91403067111969, "learning_rate": 1.3533277335836753e-05, "loss": 5.4892, "step": 14764 }, { "epoch": 0.73, "grad_norm": 2.0929675102233887, "learning_rate": 1.3523395424675133e-05, "loss": 5.5852, "step": 14768 }, { "epoch": 0.73, "grad_norm": 1.9422674179077148, "learning_rate": 1.3513513513513515e-05, "loss": 5.5581, "step": 14772 }, { "epoch": 0.73, "grad_norm": 1.684131383895874, "learning_rate": 1.3503631602351895e-05, "loss": 5.553, "step": 14776 }, { "epoch": 0.73, "grad_norm": 1.8600718975067139, "learning_rate": 1.3493749691190278e-05, "loss": 5.5774, "step": 14780 }, { "epoch": 0.73, "grad_norm": 1.7786033153533936, "learning_rate": 1.3483867780028658e-05, "loss": 5.4997, "step": 14784 }, { "epoch": 0.73, "grad_norm": 2.239494562149048, "learning_rate": 1.347398586886704e-05, "loss": 5.6186, "step": 14788 }, { "epoch": 0.73, "grad_norm": 2.078833818435669, "learning_rate": 1.346410395770542e-05, "loss": 5.5609, "step": 14792 }, { "epoch": 0.73, "grad_norm": 1.867530107498169, "learning_rate": 1.3454222046543802e-05, "loss": 5.5624, "step": 14796 }, { "epoch": 0.73, "grad_norm": 1.8725926876068115, "learning_rate": 1.3444340135382183e-05, "loss": 5.5675, "step": 14800 }, { "epoch": 0.73, "grad_norm": 1.9592565298080444, "learning_rate": 1.3434458224220565e-05, "loss": 5.4278, "step": 14804 }, { "epoch": 0.73, "grad_norm": 2.0653085708618164, "learning_rate": 1.3424576313058945e-05, "loss": 5.4732, "step": 14808 }, { "epoch": 0.73, "grad_norm": 2.0164620876312256, "learning_rate": 1.3414694401897327e-05, "loss": 5.4636, "step": 14812 }, { "epoch": 0.73, "grad_norm": 1.9894605875015259, "learning_rate": 1.3404812490735708e-05, "loss": 5.5102, "step": 14816 }, { "epoch": 0.73, "grad_norm": 2.0743892192840576, "learning_rate": 1.3394930579574091e-05, "loss": 5.4852, "step": 14820 }, { "epoch": 0.73, "grad_norm": 2.000945806503296, "learning_rate": 1.3385048668412473e-05, "loss": 5.4698, "step": 14824 }, { "epoch": 0.73, "grad_norm": 2.219153881072998, "learning_rate": 1.3375166757250854e-05, "loss": 5.5966, "step": 14828 }, { "epoch": 0.73, "grad_norm": 2.0266921520233154, "learning_rate": 1.3365284846089234e-05, "loss": 5.5036, "step": 14832 }, { "epoch": 0.73, "grad_norm": 2.1432504653930664, "learning_rate": 1.3355402934927616e-05, "loss": 5.5122, "step": 14836 }, { "epoch": 0.73, "grad_norm": 1.975110411643982, "learning_rate": 1.3345521023765997e-05, "loss": 5.4677, "step": 14840 }, { "epoch": 0.73, "grad_norm": 2.1612823009490967, "learning_rate": 1.3335639112604379e-05, "loss": 5.544, "step": 14844 }, { "epoch": 0.73, "grad_norm": 2.0813982486724854, "learning_rate": 1.3325757201442759e-05, "loss": 5.3651, "step": 14848 }, { "epoch": 0.73, "grad_norm": 1.8772978782653809, "learning_rate": 1.3315875290281141e-05, "loss": 5.4319, "step": 14852 }, { "epoch": 0.73, "grad_norm": 1.8763823509216309, "learning_rate": 1.3305993379119521e-05, "loss": 5.6508, "step": 14856 }, { "epoch": 0.73, "grad_norm": 2.0054590702056885, "learning_rate": 1.3296111467957903e-05, "loss": 5.4899, "step": 14860 }, { "epoch": 0.73, "grad_norm": 2.0155956745147705, "learning_rate": 1.3286229556796284e-05, "loss": 5.4597, "step": 14864 }, { "epoch": 0.73, "grad_norm": 1.9215456247329712, "learning_rate": 1.3276347645634666e-05, "loss": 5.5587, "step": 14868 }, { "epoch": 0.73, "grad_norm": 2.2192320823669434, "learning_rate": 1.3266465734473046e-05, "loss": 5.5397, "step": 14872 }, { "epoch": 0.74, "grad_norm": 1.8037538528442383, "learning_rate": 1.325658382331143e-05, "loss": 5.503, "step": 14876 }, { "epoch": 0.74, "grad_norm": 2.096147298812866, "learning_rate": 1.3246701912149812e-05, "loss": 5.4808, "step": 14880 }, { "epoch": 0.74, "grad_norm": 2.0961217880249023, "learning_rate": 1.3236820000988192e-05, "loss": 5.5546, "step": 14884 }, { "epoch": 0.74, "grad_norm": 2.1718828678131104, "learning_rate": 1.3226938089826574e-05, "loss": 5.4201, "step": 14888 }, { "epoch": 0.74, "grad_norm": 2.062138795852661, "learning_rate": 1.3217056178664955e-05, "loss": 5.46, "step": 14892 }, { "epoch": 0.74, "grad_norm": 2.0585410594940186, "learning_rate": 1.3207174267503337e-05, "loss": 5.5545, "step": 14896 }, { "epoch": 0.74, "grad_norm": 1.8028877973556519, "learning_rate": 1.3197292356341717e-05, "loss": 5.4255, "step": 14900 }, { "epoch": 0.74, "grad_norm": 2.002005100250244, "learning_rate": 1.3187410445180098e-05, "loss": 5.5847, "step": 14904 }, { "epoch": 0.74, "grad_norm": 2.1855287551879883, "learning_rate": 1.317752853401848e-05, "loss": 5.536, "step": 14908 }, { "epoch": 0.74, "grad_norm": 2.193171977996826, "learning_rate": 1.316764662285686e-05, "loss": 5.4059, "step": 14912 }, { "epoch": 0.74, "grad_norm": 2.0406908988952637, "learning_rate": 1.3157764711695242e-05, "loss": 5.5851, "step": 14916 }, { "epoch": 0.74, "grad_norm": 2.342682123184204, "learning_rate": 1.3147882800533623e-05, "loss": 5.5068, "step": 14920 }, { "epoch": 0.74, "grad_norm": 2.0584092140197754, "learning_rate": 1.3138000889372005e-05, "loss": 5.3973, "step": 14924 }, { "epoch": 0.74, "grad_norm": 1.7771698236465454, "learning_rate": 1.3128118978210385e-05, "loss": 5.4812, "step": 14928 }, { "epoch": 0.74, "grad_norm": 1.923229694366455, "learning_rate": 1.3118237067048769e-05, "loss": 5.4176, "step": 14932 }, { "epoch": 0.74, "grad_norm": 1.9571110010147095, "learning_rate": 1.310835515588715e-05, "loss": 5.5085, "step": 14936 }, { "epoch": 0.74, "grad_norm": 1.9827457666397095, "learning_rate": 1.3098473244725531e-05, "loss": 5.4454, "step": 14940 }, { "epoch": 0.74, "grad_norm": 2.2986981868743896, "learning_rate": 1.3088591333563913e-05, "loss": 5.562, "step": 14944 }, { "epoch": 0.74, "grad_norm": 2.087019920349121, "learning_rate": 1.3078709422402294e-05, "loss": 5.5164, "step": 14948 }, { "epoch": 0.74, "grad_norm": 1.9258410930633545, "learning_rate": 1.3068827511240676e-05, "loss": 5.5252, "step": 14952 }, { "epoch": 0.74, "grad_norm": 1.833736538887024, "learning_rate": 1.3058945600079056e-05, "loss": 5.4423, "step": 14956 }, { "epoch": 0.74, "grad_norm": 2.1392202377319336, "learning_rate": 1.3049063688917438e-05, "loss": 5.5637, "step": 14960 }, { "epoch": 0.74, "grad_norm": 1.9656065702438354, "learning_rate": 1.3039181777755818e-05, "loss": 5.474, "step": 14964 }, { "epoch": 0.74, "grad_norm": 1.8016414642333984, "learning_rate": 1.3029299866594199e-05, "loss": 5.4101, "step": 14968 }, { "epoch": 0.74, "grad_norm": 2.141220808029175, "learning_rate": 1.301941795543258e-05, "loss": 5.4747, "step": 14972 }, { "epoch": 0.74, "grad_norm": 2.1080868244171143, "learning_rate": 1.3009536044270961e-05, "loss": 5.3842, "step": 14976 }, { "epoch": 0.74, "grad_norm": 1.9357255697250366, "learning_rate": 1.2999654133109343e-05, "loss": 5.5716, "step": 14980 }, { "epoch": 0.74, "grad_norm": 2.3347742557525635, "learning_rate": 1.2989772221947724e-05, "loss": 5.5935, "step": 14984 }, { "epoch": 0.74, "grad_norm": 1.999286413192749, "learning_rate": 1.2979890310786106e-05, "loss": 5.4976, "step": 14988 }, { "epoch": 0.74, "grad_norm": 2.040515184402466, "learning_rate": 1.297000839962449e-05, "loss": 5.6029, "step": 14992 }, { "epoch": 0.74, "grad_norm": 1.9321497678756714, "learning_rate": 1.296012648846287e-05, "loss": 5.503, "step": 14996 }, { "epoch": 0.74, "grad_norm": 1.834435224533081, "learning_rate": 1.2950244577301252e-05, "loss": 5.5076, "step": 15000 }, { "epoch": 0.74, "grad_norm": 2.410569906234741, "learning_rate": 1.2940362666139632e-05, "loss": 5.6112, "step": 15004 }, { "epoch": 0.74, "grad_norm": 2.110943555831909, "learning_rate": 1.2930480754978014e-05, "loss": 5.4589, "step": 15008 }, { "epoch": 0.74, "grad_norm": 2.2104272842407227, "learning_rate": 1.2920598843816395e-05, "loss": 5.4494, "step": 15012 }, { "epoch": 0.74, "grad_norm": 2.0038228034973145, "learning_rate": 1.2910716932654777e-05, "loss": 5.4229, "step": 15016 }, { "epoch": 0.74, "grad_norm": 2.0080738067626953, "learning_rate": 1.2900835021493157e-05, "loss": 5.5645, "step": 15020 }, { "epoch": 0.74, "grad_norm": 2.075934886932373, "learning_rate": 1.2890953110331539e-05, "loss": 5.6104, "step": 15024 }, { "epoch": 0.74, "grad_norm": 1.9630203247070312, "learning_rate": 1.288107119916992e-05, "loss": 5.4888, "step": 15028 }, { "epoch": 0.74, "grad_norm": 2.1975886821746826, "learning_rate": 1.28711892880083e-05, "loss": 5.5283, "step": 15032 }, { "epoch": 0.74, "grad_norm": 1.9507403373718262, "learning_rate": 1.2861307376846682e-05, "loss": 5.6203, "step": 15036 }, { "epoch": 0.74, "grad_norm": 2.0065085887908936, "learning_rate": 1.2851425465685062e-05, "loss": 5.4493, "step": 15040 }, { "epoch": 0.74, "grad_norm": 2.170250177383423, "learning_rate": 1.2841543554523444e-05, "loss": 5.5062, "step": 15044 }, { "epoch": 0.74, "grad_norm": 1.9755939245224, "learning_rate": 1.2831661643361828e-05, "loss": 5.4901, "step": 15048 }, { "epoch": 0.74, "grad_norm": 2.0391461849212646, "learning_rate": 1.2821779732200208e-05, "loss": 5.4896, "step": 15052 }, { "epoch": 0.74, "grad_norm": 1.8779929876327515, "learning_rate": 1.281189782103859e-05, "loss": 5.4311, "step": 15056 }, { "epoch": 0.74, "grad_norm": 2.123504400253296, "learning_rate": 1.2802015909876971e-05, "loss": 5.5117, "step": 15060 }, { "epoch": 0.74, "grad_norm": 1.885321021080017, "learning_rate": 1.2792133998715353e-05, "loss": 5.515, "step": 15064 }, { "epoch": 0.74, "grad_norm": 2.281585454940796, "learning_rate": 1.2782252087553733e-05, "loss": 5.4979, "step": 15068 }, { "epoch": 0.74, "grad_norm": 2.1277706623077393, "learning_rate": 1.2772370176392115e-05, "loss": 5.4285, "step": 15072 }, { "epoch": 0.74, "grad_norm": 2.0085339546203613, "learning_rate": 1.2762488265230496e-05, "loss": 5.4741, "step": 15076 }, { "epoch": 0.75, "grad_norm": 2.0528817176818848, "learning_rate": 1.2752606354068878e-05, "loss": 5.5861, "step": 15080 }, { "epoch": 0.75, "grad_norm": 2.0713131427764893, "learning_rate": 1.2742724442907258e-05, "loss": 5.5236, "step": 15084 }, { "epoch": 0.75, "grad_norm": 1.9570958614349365, "learning_rate": 1.273284253174564e-05, "loss": 5.4983, "step": 15088 }, { "epoch": 0.75, "grad_norm": 1.8373630046844482, "learning_rate": 1.272296062058402e-05, "loss": 5.4913, "step": 15092 }, { "epoch": 0.75, "grad_norm": 2.0442957878112793, "learning_rate": 1.2713078709422403e-05, "loss": 5.4757, "step": 15096 }, { "epoch": 0.75, "grad_norm": 2.090407609939575, "learning_rate": 1.2703196798260783e-05, "loss": 5.4915, "step": 15100 }, { "epoch": 0.75, "grad_norm": 2.1005630493164062, "learning_rate": 1.2693314887099167e-05, "loss": 5.5734, "step": 15104 }, { "epoch": 0.75, "grad_norm": 2.062366008758545, "learning_rate": 1.2683432975937549e-05, "loss": 5.5959, "step": 15108 }, { "epoch": 0.75, "grad_norm": 1.9203088283538818, "learning_rate": 1.267355106477593e-05, "loss": 5.4388, "step": 15112 }, { "epoch": 0.75, "grad_norm": 2.0604312419891357, "learning_rate": 1.266366915361431e-05, "loss": 5.5115, "step": 15116 }, { "epoch": 0.75, "grad_norm": 2.230130672454834, "learning_rate": 1.2653787242452692e-05, "loss": 5.596, "step": 15120 }, { "epoch": 0.75, "grad_norm": 2.0392181873321533, "learning_rate": 1.2643905331291072e-05, "loss": 5.57, "step": 15124 }, { "epoch": 0.75, "grad_norm": 2.253793954849243, "learning_rate": 1.2634023420129454e-05, "loss": 5.5402, "step": 15128 }, { "epoch": 0.75, "grad_norm": 1.9613460302352905, "learning_rate": 1.2624141508967834e-05, "loss": 5.3467, "step": 15132 }, { "epoch": 0.75, "grad_norm": 1.8354169130325317, "learning_rate": 1.2614259597806216e-05, "loss": 5.5377, "step": 15136 }, { "epoch": 0.75, "grad_norm": 1.8670376539230347, "learning_rate": 1.2604377686644597e-05, "loss": 5.4526, "step": 15140 }, { "epoch": 0.75, "grad_norm": 2.1439993381500244, "learning_rate": 1.2594495775482979e-05, "loss": 5.4915, "step": 15144 }, { "epoch": 0.75, "grad_norm": 1.9621843099594116, "learning_rate": 1.258461386432136e-05, "loss": 5.415, "step": 15148 }, { "epoch": 0.75, "grad_norm": 2.0130698680877686, "learning_rate": 1.2574731953159741e-05, "loss": 5.5646, "step": 15152 }, { "epoch": 0.75, "grad_norm": 2.1963608264923096, "learning_rate": 1.2564850041998122e-05, "loss": 5.5678, "step": 15156 }, { "epoch": 0.75, "grad_norm": 2.007394790649414, "learning_rate": 1.2554968130836504e-05, "loss": 5.4874, "step": 15160 }, { "epoch": 0.75, "grad_norm": 2.03348708152771, "learning_rate": 1.2545086219674888e-05, "loss": 5.3595, "step": 15164 }, { "epoch": 0.75, "grad_norm": 2.1427929401397705, "learning_rate": 1.2535204308513268e-05, "loss": 5.5416, "step": 15168 }, { "epoch": 0.75, "grad_norm": 2.0188114643096924, "learning_rate": 1.252532239735165e-05, "loss": 5.4923, "step": 15172 }, { "epoch": 0.75, "grad_norm": 1.9621504545211792, "learning_rate": 1.251544048619003e-05, "loss": 5.5167, "step": 15176 }, { "epoch": 0.75, "grad_norm": 2.1401383876800537, "learning_rate": 1.2505558575028412e-05, "loss": 5.3883, "step": 15180 }, { "epoch": 0.75, "grad_norm": 2.0091779232025146, "learning_rate": 1.2495676663866793e-05, "loss": 5.5855, "step": 15184 }, { "epoch": 0.75, "grad_norm": 2.0588550567626953, "learning_rate": 1.2485794752705173e-05, "loss": 5.4961, "step": 15188 }, { "epoch": 0.75, "grad_norm": 2.0023372173309326, "learning_rate": 1.2475912841543555e-05, "loss": 5.3909, "step": 15192 }, { "epoch": 0.75, "grad_norm": 2.395747184753418, "learning_rate": 1.2466030930381936e-05, "loss": 5.4807, "step": 15196 }, { "epoch": 0.75, "grad_norm": 1.8098511695861816, "learning_rate": 1.2456149019220318e-05, "loss": 5.4387, "step": 15200 }, { "epoch": 0.75, "grad_norm": 1.9918867349624634, "learning_rate": 1.24462671080587e-05, "loss": 5.396, "step": 15204 }, { "epoch": 0.75, "grad_norm": 2.0140202045440674, "learning_rate": 1.2436385196897082e-05, "loss": 5.4098, "step": 15208 }, { "epoch": 0.75, "grad_norm": 1.9749282598495483, "learning_rate": 1.2426503285735462e-05, "loss": 5.4362, "step": 15212 }, { "epoch": 0.75, "grad_norm": 2.1992135047912598, "learning_rate": 1.2416621374573842e-05, "loss": 5.5986, "step": 15216 }, { "epoch": 0.75, "grad_norm": 2.3077468872070312, "learning_rate": 1.2406739463412225e-05, "loss": 5.5058, "step": 15220 }, { "epoch": 0.75, "grad_norm": 1.964931845664978, "learning_rate": 1.2396857552250605e-05, "loss": 5.3997, "step": 15224 }, { "epoch": 0.75, "grad_norm": 1.9411903619766235, "learning_rate": 1.2386975641088987e-05, "loss": 5.4008, "step": 15228 }, { "epoch": 0.75, "grad_norm": 2.01727557182312, "learning_rate": 1.2377093729927367e-05, "loss": 5.6187, "step": 15232 }, { "epoch": 0.75, "grad_norm": 1.9451463222503662, "learning_rate": 1.2367211818765751e-05, "loss": 5.5272, "step": 15236 }, { "epoch": 0.75, "grad_norm": 2.1012978553771973, "learning_rate": 1.2357329907604131e-05, "loss": 5.3657, "step": 15240 }, { "epoch": 0.75, "grad_norm": 2.219510078430176, "learning_rate": 1.2347447996442513e-05, "loss": 5.526, "step": 15244 }, { "epoch": 0.75, "grad_norm": 2.013444185256958, "learning_rate": 1.2337566085280894e-05, "loss": 5.4749, "step": 15248 }, { "epoch": 0.75, "grad_norm": 1.7549456357955933, "learning_rate": 1.2327684174119274e-05, "loss": 5.3478, "step": 15252 }, { "epoch": 0.75, "grad_norm": 1.8471717834472656, "learning_rate": 1.2317802262957656e-05, "loss": 5.5201, "step": 15256 }, { "epoch": 0.75, "grad_norm": 2.324028491973877, "learning_rate": 1.2307920351796037e-05, "loss": 5.5589, "step": 15260 }, { "epoch": 0.75, "grad_norm": 2.0255486965179443, "learning_rate": 1.229803844063442e-05, "loss": 5.5575, "step": 15264 }, { "epoch": 0.75, "grad_norm": 2.1873011589050293, "learning_rate": 1.22881565294728e-05, "loss": 5.5165, "step": 15268 }, { "epoch": 0.75, "grad_norm": 2.1346065998077393, "learning_rate": 1.2278274618311183e-05, "loss": 5.5053, "step": 15272 }, { "epoch": 0.75, "grad_norm": 2.1076815128326416, "learning_rate": 1.2268392707149563e-05, "loss": 5.4105, "step": 15276 }, { "epoch": 0.75, "grad_norm": 1.9122978448867798, "learning_rate": 1.2258510795987945e-05, "loss": 5.4235, "step": 15280 }, { "epoch": 0.76, "grad_norm": 2.054979085922241, "learning_rate": 1.2248628884826326e-05, "loss": 5.5838, "step": 15284 }, { "epoch": 0.76, "grad_norm": 1.9995005130767822, "learning_rate": 1.2238746973664706e-05, "loss": 5.4584, "step": 15288 }, { "epoch": 0.76, "grad_norm": 2.0757248401641846, "learning_rate": 1.222886506250309e-05, "loss": 5.5064, "step": 15292 }, { "epoch": 0.76, "grad_norm": 1.831465721130371, "learning_rate": 1.221898315134147e-05, "loss": 5.5416, "step": 15296 }, { "epoch": 0.76, "grad_norm": 2.3364105224609375, "learning_rate": 1.2209101240179852e-05, "loss": 5.5332, "step": 15300 }, { "epoch": 0.76, "grad_norm": 1.9546422958374023, "learning_rate": 1.2199219329018233e-05, "loss": 5.4726, "step": 15304 }, { "epoch": 0.76, "grad_norm": 1.8395370244979858, "learning_rate": 1.2189337417856615e-05, "loss": 5.4443, "step": 15308 }, { "epoch": 0.76, "grad_norm": 2.080458402633667, "learning_rate": 1.2179455506694995e-05, "loss": 5.5154, "step": 15312 }, { "epoch": 0.76, "grad_norm": 2.017116069793701, "learning_rate": 1.2169573595533375e-05, "loss": 5.5048, "step": 15316 }, { "epoch": 0.76, "grad_norm": 2.0325398445129395, "learning_rate": 1.2159691684371759e-05, "loss": 5.3668, "step": 15320 }, { "epoch": 0.76, "grad_norm": 2.246100664138794, "learning_rate": 1.214980977321014e-05, "loss": 5.5176, "step": 15324 }, { "epoch": 0.76, "grad_norm": 2.0287086963653564, "learning_rate": 1.2139927862048522e-05, "loss": 5.5352, "step": 15328 }, { "epoch": 0.76, "grad_norm": 2.278211832046509, "learning_rate": 1.2130045950886902e-05, "loss": 5.441, "step": 15332 }, { "epoch": 0.76, "grad_norm": 2.143902063369751, "learning_rate": 1.2120164039725284e-05, "loss": 5.5263, "step": 15336 }, { "epoch": 0.76, "grad_norm": 2.0122079849243164, "learning_rate": 1.2110282128563664e-05, "loss": 5.3925, "step": 15340 }, { "epoch": 0.76, "grad_norm": 2.094726324081421, "learning_rate": 1.2100400217402046e-05, "loss": 5.4837, "step": 15344 }, { "epoch": 0.76, "grad_norm": 2.0904650688171387, "learning_rate": 1.2090518306240428e-05, "loss": 5.4732, "step": 15348 }, { "epoch": 0.76, "grad_norm": 1.869597315788269, "learning_rate": 1.2080636395078809e-05, "loss": 5.4292, "step": 15352 }, { "epoch": 0.76, "grad_norm": 2.0959110260009766, "learning_rate": 1.2070754483917191e-05, "loss": 5.5033, "step": 15356 }, { "epoch": 0.76, "grad_norm": 2.153909921646118, "learning_rate": 1.2060872572755571e-05, "loss": 5.4692, "step": 15360 }, { "epoch": 0.76, "grad_norm": 2.0445823669433594, "learning_rate": 1.2050990661593953e-05, "loss": 5.3999, "step": 15364 }, { "epoch": 0.76, "grad_norm": 2.0181336402893066, "learning_rate": 1.2041108750432334e-05, "loss": 5.4045, "step": 15368 }, { "epoch": 0.76, "grad_norm": 2.008654832839966, "learning_rate": 1.2031226839270716e-05, "loss": 5.6017, "step": 15372 }, { "epoch": 0.76, "grad_norm": 2.126469850540161, "learning_rate": 1.2021344928109096e-05, "loss": 5.4578, "step": 15376 }, { "epoch": 0.76, "grad_norm": 2.031398057937622, "learning_rate": 1.2011463016947478e-05, "loss": 5.5091, "step": 15380 }, { "epoch": 0.76, "grad_norm": 2.102151870727539, "learning_rate": 1.200158110578586e-05, "loss": 5.4637, "step": 15384 }, { "epoch": 0.76, "grad_norm": 1.9239962100982666, "learning_rate": 1.199169919462424e-05, "loss": 5.5014, "step": 15388 }, { "epoch": 0.76, "grad_norm": 1.9613525867462158, "learning_rate": 1.1981817283462623e-05, "loss": 5.5283, "step": 15392 }, { "epoch": 0.76, "grad_norm": 1.973003625869751, "learning_rate": 1.1971935372301003e-05, "loss": 5.4865, "step": 15396 }, { "epoch": 0.76, "grad_norm": 2.3486452102661133, "learning_rate": 1.1962053461139385e-05, "loss": 5.5125, "step": 15400 }, { "epoch": 0.76, "grad_norm": 2.088740825653076, "learning_rate": 1.1952171549977765e-05, "loss": 5.4416, "step": 15404 }, { "epoch": 0.76, "grad_norm": 1.9701464176177979, "learning_rate": 1.1942289638816147e-05, "loss": 5.2871, "step": 15408 }, { "epoch": 0.76, "grad_norm": 2.2388153076171875, "learning_rate": 1.193240772765453e-05, "loss": 5.551, "step": 15412 }, { "epoch": 0.76, "grad_norm": 2.2453413009643555, "learning_rate": 1.192252581649291e-05, "loss": 5.4372, "step": 15416 }, { "epoch": 0.76, "grad_norm": 2.1105456352233887, "learning_rate": 1.1912643905331292e-05, "loss": 5.5284, "step": 15420 }, { "epoch": 0.76, "grad_norm": 2.197547197341919, "learning_rate": 1.1902761994169672e-05, "loss": 5.5067, "step": 15424 }, { "epoch": 0.76, "grad_norm": 1.9585908651351929, "learning_rate": 1.1892880083008054e-05, "loss": 5.4292, "step": 15428 }, { "epoch": 0.76, "grad_norm": 2.037917137145996, "learning_rate": 1.1882998171846435e-05, "loss": 5.455, "step": 15432 }, { "epoch": 0.76, "grad_norm": 1.797452449798584, "learning_rate": 1.1873116260684817e-05, "loss": 5.4833, "step": 15436 }, { "epoch": 0.76, "grad_norm": 1.8614048957824707, "learning_rate": 1.1863234349523199e-05, "loss": 5.5127, "step": 15440 }, { "epoch": 0.76, "grad_norm": 1.9821441173553467, "learning_rate": 1.185335243836158e-05, "loss": 5.5038, "step": 15444 }, { "epoch": 0.76, "grad_norm": 2.3709757328033447, "learning_rate": 1.1843470527199961e-05, "loss": 5.4745, "step": 15448 }, { "epoch": 0.76, "grad_norm": 1.8125073909759521, "learning_rate": 1.1833588616038342e-05, "loss": 5.4642, "step": 15452 }, { "epoch": 0.76, "grad_norm": 2.0087850093841553, "learning_rate": 1.1823706704876724e-05, "loss": 5.4628, "step": 15456 }, { "epoch": 0.76, "grad_norm": 1.9169509410858154, "learning_rate": 1.1813824793715104e-05, "loss": 5.4821, "step": 15460 }, { "epoch": 0.76, "grad_norm": 2.01556396484375, "learning_rate": 1.1803942882553488e-05, "loss": 5.4142, "step": 15464 }, { "epoch": 0.76, "grad_norm": 1.9918155670166016, "learning_rate": 1.1794060971391868e-05, "loss": 5.6412, "step": 15468 }, { "epoch": 0.76, "grad_norm": 2.052454710006714, "learning_rate": 1.1784179060230249e-05, "loss": 5.4347, "step": 15472 }, { "epoch": 0.76, "grad_norm": 1.8537468910217285, "learning_rate": 1.177429714906863e-05, "loss": 5.5036, "step": 15476 }, { "epoch": 0.76, "grad_norm": 1.9864877462387085, "learning_rate": 1.1764415237907011e-05, "loss": 5.4979, "step": 15480 }, { "epoch": 0.77, "grad_norm": 2.200070858001709, "learning_rate": 1.1754533326745393e-05, "loss": 5.5684, "step": 15484 }, { "epoch": 0.77, "grad_norm": 1.7679542303085327, "learning_rate": 1.1744651415583773e-05, "loss": 5.4283, "step": 15488 }, { "epoch": 0.77, "grad_norm": 1.8458738327026367, "learning_rate": 1.1734769504422157e-05, "loss": 5.5566, "step": 15492 }, { "epoch": 0.77, "grad_norm": 2.3362741470336914, "learning_rate": 1.1724887593260538e-05, "loss": 5.4377, "step": 15496 }, { "epoch": 0.77, "grad_norm": 1.7595479488372803, "learning_rate": 1.1715005682098918e-05, "loss": 5.4785, "step": 15500 }, { "epoch": 0.77, "grad_norm": 2.1757829189300537, "learning_rate": 1.17051237709373e-05, "loss": 5.5964, "step": 15504 }, { "epoch": 0.77, "grad_norm": 2.0845088958740234, "learning_rate": 1.169524185977568e-05, "loss": 5.5294, "step": 15508 }, { "epoch": 0.77, "grad_norm": 1.937070608139038, "learning_rate": 1.1685359948614062e-05, "loss": 5.5338, "step": 15512 }, { "epoch": 0.77, "grad_norm": 2.137470006942749, "learning_rate": 1.1675478037452443e-05, "loss": 5.4788, "step": 15516 }, { "epoch": 0.77, "grad_norm": 2.1320180892944336, "learning_rate": 1.1665596126290827e-05, "loss": 5.6173, "step": 15520 }, { "epoch": 0.77, "grad_norm": 2.0288352966308594, "learning_rate": 1.1655714215129207e-05, "loss": 5.3012, "step": 15524 }, { "epoch": 0.77, "grad_norm": 1.784881830215454, "learning_rate": 1.1645832303967589e-05, "loss": 5.5805, "step": 15528 }, { "epoch": 0.77, "grad_norm": 2.08077073097229, "learning_rate": 1.163595039280597e-05, "loss": 5.4992, "step": 15532 }, { "epoch": 0.77, "grad_norm": 1.7977101802825928, "learning_rate": 1.162606848164435e-05, "loss": 5.4872, "step": 15536 }, { "epoch": 0.77, "grad_norm": 2.186459541320801, "learning_rate": 1.1616186570482732e-05, "loss": 5.578, "step": 15540 }, { "epoch": 0.77, "grad_norm": 2.183048725128174, "learning_rate": 1.1606304659321112e-05, "loss": 5.4738, "step": 15544 }, { "epoch": 0.77, "grad_norm": 1.8500030040740967, "learning_rate": 1.1596422748159494e-05, "loss": 5.5345, "step": 15548 }, { "epoch": 0.77, "grad_norm": 1.8691611289978027, "learning_rate": 1.1586540836997876e-05, "loss": 5.5283, "step": 15552 }, { "epoch": 0.77, "grad_norm": 2.1216142177581787, "learning_rate": 1.1576658925836258e-05, "loss": 5.581, "step": 15556 }, { "epoch": 0.77, "grad_norm": 2.0468342304229736, "learning_rate": 1.1566777014674639e-05, "loss": 5.5649, "step": 15560 }, { "epoch": 0.77, "grad_norm": 2.060667037963867, "learning_rate": 1.155689510351302e-05, "loss": 5.4901, "step": 15564 }, { "epoch": 0.77, "grad_norm": 1.803126573562622, "learning_rate": 1.1547013192351401e-05, "loss": 5.4285, "step": 15568 }, { "epoch": 0.77, "grad_norm": 2.05906081199646, "learning_rate": 1.1537131281189781e-05, "loss": 5.4268, "step": 15572 }, { "epoch": 0.77, "grad_norm": 2.0089757442474365, "learning_rate": 1.1527249370028164e-05, "loss": 5.5307, "step": 15576 }, { "epoch": 0.77, "grad_norm": 1.9152356386184692, "learning_rate": 1.1517367458866546e-05, "loss": 5.4297, "step": 15580 }, { "epoch": 0.77, "grad_norm": 1.9012370109558105, "learning_rate": 1.1507485547704928e-05, "loss": 5.5271, "step": 15584 }, { "epoch": 0.77, "grad_norm": 2.0786564350128174, "learning_rate": 1.1497603636543308e-05, "loss": 5.5957, "step": 15588 }, { "epoch": 0.77, "grad_norm": 2.03715443611145, "learning_rate": 1.148772172538169e-05, "loss": 5.5501, "step": 15592 }, { "epoch": 0.77, "grad_norm": 2.1424834728240967, "learning_rate": 1.147783981422007e-05, "loss": 5.5153, "step": 15596 }, { "epoch": 0.77, "grad_norm": 1.9324986934661865, "learning_rate": 1.146795790305845e-05, "loss": 5.5928, "step": 15600 }, { "epoch": 0.77, "grad_norm": 1.986244559288025, "learning_rate": 1.1458075991896833e-05, "loss": 5.4466, "step": 15604 }, { "epoch": 0.77, "grad_norm": 2.3215315341949463, "learning_rate": 1.1448194080735215e-05, "loss": 5.4948, "step": 15608 }, { "epoch": 0.77, "grad_norm": 2.2035694122314453, "learning_rate": 1.1438312169573597e-05, "loss": 5.6002, "step": 15612 }, { "epoch": 0.77, "grad_norm": 2.0092086791992188, "learning_rate": 1.1428430258411977e-05, "loss": 5.4083, "step": 15616 }, { "epoch": 0.77, "grad_norm": 2.008274555206299, "learning_rate": 1.141854834725036e-05, "loss": 5.3977, "step": 15620 }, { "epoch": 0.77, "grad_norm": 2.084843873977661, "learning_rate": 1.140866643608874e-05, "loss": 5.4896, "step": 15624 }, { "epoch": 0.77, "grad_norm": 2.138129949569702, "learning_rate": 1.1398784524927122e-05, "loss": 5.556, "step": 15628 }, { "epoch": 0.77, "grad_norm": 2.161590814590454, "learning_rate": 1.1388902613765502e-05, "loss": 5.4909, "step": 15632 }, { "epoch": 0.77, "grad_norm": 2.1914632320404053, "learning_rate": 1.1379020702603884e-05, "loss": 5.5578, "step": 15636 }, { "epoch": 0.77, "grad_norm": 2.254403829574585, "learning_rate": 1.1369138791442266e-05, "loss": 5.6521, "step": 15640 }, { "epoch": 0.77, "grad_norm": 1.9875483512878418, "learning_rate": 1.1359256880280647e-05, "loss": 5.4972, "step": 15644 }, { "epoch": 0.77, "grad_norm": 2.2201669216156006, "learning_rate": 1.1349374969119029e-05, "loss": 5.6261, "step": 15648 }, { "epoch": 0.77, "grad_norm": 2.054435968399048, "learning_rate": 1.1339493057957409e-05, "loss": 5.5603, "step": 15652 }, { "epoch": 0.77, "grad_norm": 1.902565598487854, "learning_rate": 1.1329611146795791e-05, "loss": 5.6618, "step": 15656 }, { "epoch": 0.77, "grad_norm": 2.2018725872039795, "learning_rate": 1.1319729235634172e-05, "loss": 5.429, "step": 15660 }, { "epoch": 0.77, "grad_norm": 1.9127063751220703, "learning_rate": 1.1309847324472554e-05, "loss": 5.5171, "step": 15664 }, { "epoch": 0.77, "grad_norm": 1.8942164182662964, "learning_rate": 1.1299965413310936e-05, "loss": 5.5357, "step": 15668 }, { "epoch": 0.77, "grad_norm": 2.2651684284210205, "learning_rate": 1.1290083502149316e-05, "loss": 5.4591, "step": 15672 }, { "epoch": 0.77, "grad_norm": 2.189774990081787, "learning_rate": 1.1280201590987698e-05, "loss": 5.4338, "step": 15676 }, { "epoch": 0.77, "grad_norm": 2.1134681701660156, "learning_rate": 1.1270319679826078e-05, "loss": 5.5294, "step": 15680 }, { "epoch": 0.77, "grad_norm": 2.2125091552734375, "learning_rate": 1.126043776866446e-05, "loss": 5.5269, "step": 15684 }, { "epoch": 0.78, "grad_norm": 1.9919397830963135, "learning_rate": 1.1250555857502841e-05, "loss": 5.4072, "step": 15688 }, { "epoch": 0.78, "grad_norm": 1.958975911140442, "learning_rate": 1.1240673946341223e-05, "loss": 5.4421, "step": 15692 }, { "epoch": 0.78, "grad_norm": 2.033118724822998, "learning_rate": 1.1230792035179605e-05, "loss": 5.5095, "step": 15696 }, { "epoch": 0.78, "grad_norm": 2.1201207637786865, "learning_rate": 1.1220910124017985e-05, "loss": 5.4767, "step": 15700 }, { "epoch": 0.78, "grad_norm": 1.9773985147476196, "learning_rate": 1.1211028212856367e-05, "loss": 5.424, "step": 15704 }, { "epoch": 0.78, "grad_norm": 1.841871976852417, "learning_rate": 1.1201146301694748e-05, "loss": 5.521, "step": 15708 }, { "epoch": 0.78, "grad_norm": 1.9351176023483276, "learning_rate": 1.119126439053313e-05, "loss": 5.5646, "step": 15712 }, { "epoch": 0.78, "grad_norm": 2.0061535835266113, "learning_rate": 1.118138247937151e-05, "loss": 5.36, "step": 15716 }, { "epoch": 0.78, "grad_norm": 2.4020817279815674, "learning_rate": 1.1171500568209892e-05, "loss": 5.4701, "step": 15720 }, { "epoch": 0.78, "grad_norm": 1.9213863611221313, "learning_rate": 1.1161618657048274e-05, "loss": 5.4668, "step": 15724 }, { "epoch": 0.78, "grad_norm": 2.3538432121276855, "learning_rate": 1.1151736745886655e-05, "loss": 5.5128, "step": 15728 }, { "epoch": 0.78, "grad_norm": 2.147163152694702, "learning_rate": 1.1141854834725037e-05, "loss": 5.4579, "step": 15732 }, { "epoch": 0.78, "grad_norm": 2.1335911750793457, "learning_rate": 1.1131972923563417e-05, "loss": 5.4277, "step": 15736 }, { "epoch": 0.78, "grad_norm": 2.1882131099700928, "learning_rate": 1.11220910124018e-05, "loss": 5.6003, "step": 15740 }, { "epoch": 0.78, "grad_norm": 1.9644808769226074, "learning_rate": 1.111220910124018e-05, "loss": 5.5355, "step": 15744 }, { "epoch": 0.78, "grad_norm": 1.7521815299987793, "learning_rate": 1.1102327190078562e-05, "loss": 5.4743, "step": 15748 }, { "epoch": 0.78, "grad_norm": 2.1638858318328857, "learning_rate": 1.1092445278916944e-05, "loss": 5.3701, "step": 15752 }, { "epoch": 0.78, "grad_norm": 1.8966166973114014, "learning_rate": 1.1082563367755324e-05, "loss": 5.5355, "step": 15756 }, { "epoch": 0.78, "grad_norm": 1.981217861175537, "learning_rate": 1.1072681456593706e-05, "loss": 5.4366, "step": 15760 }, { "epoch": 0.78, "grad_norm": 2.0059125423431396, "learning_rate": 1.1062799545432086e-05, "loss": 5.4749, "step": 15764 }, { "epoch": 0.78, "grad_norm": 2.042475461959839, "learning_rate": 1.1052917634270469e-05, "loss": 5.4066, "step": 15768 }, { "epoch": 0.78, "grad_norm": 2.1431596279144287, "learning_rate": 1.1043035723108849e-05, "loss": 5.4831, "step": 15772 }, { "epoch": 0.78, "grad_norm": 1.903968095779419, "learning_rate": 1.1033153811947231e-05, "loss": 5.4749, "step": 15776 }, { "epoch": 0.78, "grad_norm": 2.2087221145629883, "learning_rate": 1.1023271900785613e-05, "loss": 5.5542, "step": 15780 }, { "epoch": 0.78, "grad_norm": 1.990248680114746, "learning_rate": 1.1013389989623993e-05, "loss": 5.4592, "step": 15784 }, { "epoch": 0.78, "grad_norm": 1.9822028875350952, "learning_rate": 1.1003508078462375e-05, "loss": 5.5245, "step": 15788 }, { "epoch": 0.78, "grad_norm": 2.0607571601867676, "learning_rate": 1.0993626167300756e-05, "loss": 5.5105, "step": 15792 }, { "epoch": 0.78, "grad_norm": 2.2510719299316406, "learning_rate": 1.0983744256139138e-05, "loss": 5.4485, "step": 15796 }, { "epoch": 0.78, "grad_norm": 1.9407929182052612, "learning_rate": 1.0973862344977518e-05, "loss": 5.4835, "step": 15800 }, { "epoch": 0.78, "grad_norm": 2.0228731632232666, "learning_rate": 1.09639804338159e-05, "loss": 5.4121, "step": 15804 }, { "epoch": 0.78, "grad_norm": 2.0110514163970947, "learning_rate": 1.0954098522654282e-05, "loss": 5.4097, "step": 15808 }, { "epoch": 0.78, "grad_norm": 2.005176305770874, "learning_rate": 1.0944216611492664e-05, "loss": 5.5651, "step": 15812 }, { "epoch": 0.78, "grad_norm": 2.08233380317688, "learning_rate": 1.0934334700331045e-05, "loss": 5.5942, "step": 15816 }, { "epoch": 0.78, "grad_norm": 1.76272714138031, "learning_rate": 1.0924452789169425e-05, "loss": 5.3883, "step": 15820 }, { "epoch": 0.78, "grad_norm": 1.8395804166793823, "learning_rate": 1.0914570878007807e-05, "loss": 5.372, "step": 15824 }, { "epoch": 0.78, "grad_norm": 2.197016954421997, "learning_rate": 1.0904688966846188e-05, "loss": 5.4239, "step": 15828 }, { "epoch": 0.78, "grad_norm": 2.1127779483795166, "learning_rate": 1.089480705568457e-05, "loss": 5.358, "step": 15832 }, { "epoch": 0.78, "grad_norm": 2.1906561851501465, "learning_rate": 1.0884925144522952e-05, "loss": 5.528, "step": 15836 }, { "epoch": 0.78, "grad_norm": 2.2963180541992188, "learning_rate": 1.0875043233361334e-05, "loss": 5.6399, "step": 15840 }, { "epoch": 0.78, "grad_norm": 2.1306509971618652, "learning_rate": 1.0865161322199714e-05, "loss": 5.4986, "step": 15844 }, { "epoch": 0.78, "grad_norm": 1.8235065937042236, "learning_rate": 1.0855279411038096e-05, "loss": 5.5502, "step": 15848 }, { "epoch": 0.78, "grad_norm": 2.058922529220581, "learning_rate": 1.0845397499876477e-05, "loss": 5.5531, "step": 15852 }, { "epoch": 0.78, "grad_norm": 1.818724274635315, "learning_rate": 1.0835515588714857e-05, "loss": 5.4607, "step": 15856 }, { "epoch": 0.78, "grad_norm": 2.0503363609313965, "learning_rate": 1.0825633677553239e-05, "loss": 5.4722, "step": 15860 }, { "epoch": 0.78, "grad_norm": 2.076927423477173, "learning_rate": 1.081575176639162e-05, "loss": 5.4039, "step": 15864 }, { "epoch": 0.78, "grad_norm": 1.991584300994873, "learning_rate": 1.0805869855230003e-05, "loss": 5.4651, "step": 15868 }, { "epoch": 0.78, "grad_norm": 2.276181936264038, "learning_rate": 1.0795987944068383e-05, "loss": 5.5605, "step": 15872 }, { "epoch": 0.78, "grad_norm": 2.0964319705963135, "learning_rate": 1.0786106032906766e-05, "loss": 5.4315, "step": 15876 }, { "epoch": 0.78, "grad_norm": 2.0152671337127686, "learning_rate": 1.0776224121745146e-05, "loss": 5.5034, "step": 15880 }, { "epoch": 0.78, "grad_norm": 2.065906524658203, "learning_rate": 1.0766342210583528e-05, "loss": 5.5383, "step": 15884 }, { "epoch": 0.79, "grad_norm": 1.9010733366012573, "learning_rate": 1.0756460299421908e-05, "loss": 5.5424, "step": 15888 }, { "epoch": 0.79, "grad_norm": 1.9711081981658936, "learning_rate": 1.0746578388260289e-05, "loss": 5.4899, "step": 15892 }, { "epoch": 0.79, "grad_norm": 1.9570908546447754, "learning_rate": 1.0736696477098672e-05, "loss": 5.5072, "step": 15896 }, { "epoch": 0.79, "grad_norm": 2.1524243354797363, "learning_rate": 1.0726814565937053e-05, "loss": 5.4421, "step": 15900 }, { "epoch": 0.79, "grad_norm": 1.8811511993408203, "learning_rate": 1.0716932654775435e-05, "loss": 5.3984, "step": 15904 }, { "epoch": 0.79, "grad_norm": 1.914753794670105, "learning_rate": 1.0707050743613815e-05, "loss": 5.5874, "step": 15908 }, { "epoch": 0.79, "grad_norm": 1.9757344722747803, "learning_rate": 1.0697168832452197e-05, "loss": 5.5402, "step": 15912 }, { "epoch": 0.79, "grad_norm": 2.1234004497528076, "learning_rate": 1.0689757399080982e-05, "loss": 5.4948, "step": 15916 }, { "epoch": 0.79, "grad_norm": 1.9321492910385132, "learning_rate": 1.0679875487919364e-05, "loss": 5.3983, "step": 15920 }, { "epoch": 0.79, "grad_norm": 2.0924885272979736, "learning_rate": 1.0669993576757746e-05, "loss": 5.3964, "step": 15924 }, { "epoch": 0.79, "grad_norm": 2.1206881999969482, "learning_rate": 1.0660111665596127e-05, "loss": 5.4496, "step": 15928 }, { "epoch": 0.79, "grad_norm": 2.230121374130249, "learning_rate": 1.0650229754434509e-05, "loss": 5.3782, "step": 15932 }, { "epoch": 0.79, "grad_norm": 1.9307682514190674, "learning_rate": 1.0640347843272889e-05, "loss": 5.4723, "step": 15936 }, { "epoch": 0.79, "grad_norm": 2.326744794845581, "learning_rate": 1.0630465932111271e-05, "loss": 5.4944, "step": 15940 }, { "epoch": 0.79, "grad_norm": 2.0075511932373047, "learning_rate": 1.0620584020949651e-05, "loss": 5.4213, "step": 15944 }, { "epoch": 0.79, "grad_norm": 2.0154924392700195, "learning_rate": 1.0610702109788034e-05, "loss": 5.5201, "step": 15948 }, { "epoch": 0.79, "grad_norm": 2.3839402198791504, "learning_rate": 1.0600820198626416e-05, "loss": 5.5373, "step": 15952 }, { "epoch": 0.79, "grad_norm": 1.8833853006362915, "learning_rate": 1.0590938287464796e-05, "loss": 5.5104, "step": 15956 }, { "epoch": 0.79, "grad_norm": 2.0425021648406982, "learning_rate": 1.0581056376303178e-05, "loss": 5.5934, "step": 15960 }, { "epoch": 0.79, "grad_norm": 1.8929284811019897, "learning_rate": 1.0571174465141558e-05, "loss": 5.5095, "step": 15964 }, { "epoch": 0.79, "grad_norm": 1.8911198377609253, "learning_rate": 1.056129255397994e-05, "loss": 5.4591, "step": 15968 }, { "epoch": 0.79, "grad_norm": 1.856986403465271, "learning_rate": 1.055141064281832e-05, "loss": 5.35, "step": 15972 }, { "epoch": 0.79, "grad_norm": 1.8045761585235596, "learning_rate": 1.0541528731656703e-05, "loss": 5.5255, "step": 15976 }, { "epoch": 0.79, "grad_norm": 1.9339076280593872, "learning_rate": 1.0531646820495085e-05, "loss": 5.4632, "step": 15980 }, { "epoch": 0.79, "grad_norm": 2.076307535171509, "learning_rate": 1.0521764909333465e-05, "loss": 5.453, "step": 15984 }, { "epoch": 0.79, "grad_norm": 2.0790350437164307, "learning_rate": 1.0511882998171847e-05, "loss": 5.4504, "step": 15988 }, { "epoch": 0.79, "grad_norm": 2.015014171600342, "learning_rate": 1.0502001087010228e-05, "loss": 5.4082, "step": 15992 }, { "epoch": 0.79, "grad_norm": 1.9248104095458984, "learning_rate": 1.049211917584861e-05, "loss": 5.5261, "step": 15996 }, { "epoch": 0.79, "grad_norm": 1.9845161437988281, "learning_rate": 1.048223726468699e-05, "loss": 5.495, "step": 16000 }, { "epoch": 0.79, "grad_norm": 2.0011472702026367, "learning_rate": 1.0472355353525372e-05, "loss": 5.3597, "step": 16004 }, { "epoch": 0.79, "grad_norm": 2.0203723907470703, "learning_rate": 1.0462473442363753e-05, "loss": 5.5997, "step": 16008 }, { "epoch": 0.79, "grad_norm": 1.9304739236831665, "learning_rate": 1.0452591531202136e-05, "loss": 5.4136, "step": 16012 }, { "epoch": 0.79, "grad_norm": 2.1026952266693115, "learning_rate": 1.0442709620040517e-05, "loss": 5.5538, "step": 16016 }, { "epoch": 0.79, "grad_norm": 1.9498411417007446, "learning_rate": 1.0432827708878897e-05, "loss": 5.5066, "step": 16020 }, { "epoch": 0.79, "grad_norm": 2.043534517288208, "learning_rate": 1.0422945797717279e-05, "loss": 5.4616, "step": 16024 }, { "epoch": 0.79, "grad_norm": 1.9264411926269531, "learning_rate": 1.041306388655566e-05, "loss": 5.5081, "step": 16028 }, { "epoch": 0.79, "grad_norm": 1.9634205102920532, "learning_rate": 1.0403181975394042e-05, "loss": 5.436, "step": 16032 }, { "epoch": 0.79, "grad_norm": 1.9793167114257812, "learning_rate": 1.0393300064232422e-05, "loss": 5.4457, "step": 16036 }, { "epoch": 0.79, "grad_norm": 2.0029139518737793, "learning_rate": 1.0383418153070806e-05, "loss": 5.5097, "step": 16040 }, { "epoch": 0.79, "grad_norm": 1.93049156665802, "learning_rate": 1.0373536241909186e-05, "loss": 5.5088, "step": 16044 }, { "epoch": 0.79, "grad_norm": 1.9566268920898438, "learning_rate": 1.0363654330747566e-05, "loss": 5.509, "step": 16048 }, { "epoch": 0.79, "grad_norm": 1.9150116443634033, "learning_rate": 1.0353772419585948e-05, "loss": 5.432, "step": 16052 }, { "epoch": 0.79, "grad_norm": 1.9457064867019653, "learning_rate": 1.0343890508424329e-05, "loss": 5.5555, "step": 16056 }, { "epoch": 0.79, "grad_norm": 2.037177562713623, "learning_rate": 1.0334008597262711e-05, "loss": 5.4792, "step": 16060 }, { "epoch": 0.79, "grad_norm": 2.0516912937164307, "learning_rate": 1.0324126686101091e-05, "loss": 5.5053, "step": 16064 }, { "epoch": 0.79, "grad_norm": 1.7669730186462402, "learning_rate": 1.0314244774939475e-05, "loss": 5.3994, "step": 16068 }, { "epoch": 0.79, "grad_norm": 2.1283509731292725, "learning_rate": 1.0304362863777855e-05, "loss": 5.466, "step": 16072 }, { "epoch": 0.79, "grad_norm": 1.7946540117263794, "learning_rate": 1.0294480952616237e-05, "loss": 5.4215, "step": 16076 }, { "epoch": 0.79, "grad_norm": 2.273894786834717, "learning_rate": 1.0284599041454618e-05, "loss": 5.408, "step": 16080 }, { "epoch": 0.79, "grad_norm": 2.055126667022705, "learning_rate": 1.0274717130292998e-05, "loss": 5.5304, "step": 16084 }, { "epoch": 0.79, "grad_norm": 2.0089943408966064, "learning_rate": 1.026483521913138e-05, "loss": 5.4147, "step": 16088 }, { "epoch": 0.8, "grad_norm": 2.053406000137329, "learning_rate": 1.025495330796976e-05, "loss": 5.5624, "step": 16092 }, { "epoch": 0.8, "grad_norm": 1.9701050519943237, "learning_rate": 1.0245071396808144e-05, "loss": 5.3615, "step": 16096 }, { "epoch": 0.8, "grad_norm": 1.9590353965759277, "learning_rate": 1.0235189485646525e-05, "loss": 5.4707, "step": 16100 }, { "epoch": 0.8, "grad_norm": 2.097073793411255, "learning_rate": 1.0225307574484907e-05, "loss": 5.4783, "step": 16104 }, { "epoch": 0.8, "grad_norm": 1.9956692457199097, "learning_rate": 1.0215425663323287e-05, "loss": 5.4288, "step": 16108 }, { "epoch": 0.8, "grad_norm": 1.8568942546844482, "learning_rate": 1.020554375216167e-05, "loss": 5.5691, "step": 16112 }, { "epoch": 0.8, "grad_norm": 1.9717293977737427, "learning_rate": 1.019566184100005e-05, "loss": 5.4763, "step": 16116 }, { "epoch": 0.8, "grad_norm": 2.085775375366211, "learning_rate": 1.018577992983843e-05, "loss": 5.5406, "step": 16120 }, { "epoch": 0.8, "grad_norm": 1.9938271045684814, "learning_rate": 1.0175898018676814e-05, "loss": 5.52, "step": 16124 }, { "epoch": 0.8, "grad_norm": 2.1408092975616455, "learning_rate": 1.0166016107515194e-05, "loss": 5.4718, "step": 16128 }, { "epoch": 0.8, "grad_norm": 2.084689140319824, "learning_rate": 1.0156134196353576e-05, "loss": 5.5263, "step": 16132 }, { "epoch": 0.8, "grad_norm": 1.8502254486083984, "learning_rate": 1.0146252285191956e-05, "loss": 5.4056, "step": 16136 }, { "epoch": 0.8, "grad_norm": 1.832261323928833, "learning_rate": 1.0136370374030339e-05, "loss": 5.4597, "step": 16140 }, { "epoch": 0.8, "grad_norm": 1.9201068878173828, "learning_rate": 1.0126488462868719e-05, "loss": 5.5378, "step": 16144 }, { "epoch": 0.8, "grad_norm": 1.8467752933502197, "learning_rate": 1.0116606551707101e-05, "loss": 5.4567, "step": 16148 }, { "epoch": 0.8, "grad_norm": 2.1131222248077393, "learning_rate": 1.0106724640545481e-05, "loss": 5.35, "step": 16152 }, { "epoch": 0.8, "grad_norm": 2.0209567546844482, "learning_rate": 1.0096842729383863e-05, "loss": 5.5339, "step": 16156 }, { "epoch": 0.8, "grad_norm": 2.1684625148773193, "learning_rate": 1.0086960818222245e-05, "loss": 5.4267, "step": 16160 }, { "epoch": 0.8, "grad_norm": 2.1010987758636475, "learning_rate": 1.0077078907060626e-05, "loss": 5.4042, "step": 16164 }, { "epoch": 0.8, "grad_norm": 1.9701296091079712, "learning_rate": 1.0067196995899008e-05, "loss": 5.6227, "step": 16168 }, { "epoch": 0.8, "grad_norm": 1.8271695375442505, "learning_rate": 1.0057315084737388e-05, "loss": 5.4151, "step": 16172 }, { "epoch": 0.8, "grad_norm": 2.2199959754943848, "learning_rate": 1.004743317357577e-05, "loss": 5.5352, "step": 16176 }, { "epoch": 0.8, "grad_norm": 2.2069809436798096, "learning_rate": 1.003755126241415e-05, "loss": 5.4987, "step": 16180 }, { "epoch": 0.8, "grad_norm": 2.027318239212036, "learning_rate": 1.0027669351252533e-05, "loss": 5.5284, "step": 16184 }, { "epoch": 0.8, "grad_norm": 1.9697614908218384, "learning_rate": 1.0017787440090915e-05, "loss": 5.5906, "step": 16188 }, { "epoch": 0.8, "grad_norm": 2.252358913421631, "learning_rate": 1.0007905528929295e-05, "loss": 5.4911, "step": 16192 }, { "epoch": 0.8, "grad_norm": 2.1323323249816895, "learning_rate": 9.998023617767677e-06, "loss": 5.5276, "step": 16196 }, { "epoch": 0.8, "grad_norm": 2.277160167694092, "learning_rate": 9.988141706606058e-06, "loss": 5.4718, "step": 16200 }, { "epoch": 0.8, "grad_norm": 1.8769733905792236, "learning_rate": 9.97825979544444e-06, "loss": 5.3622, "step": 16204 }, { "epoch": 0.8, "grad_norm": 1.8731465339660645, "learning_rate": 9.96837788428282e-06, "loss": 5.5215, "step": 16208 }, { "epoch": 0.8, "grad_norm": 1.8808999061584473, "learning_rate": 9.958495973121202e-06, "loss": 5.3953, "step": 16212 }, { "epoch": 0.8, "grad_norm": 1.8657152652740479, "learning_rate": 9.948614061959584e-06, "loss": 5.3316, "step": 16216 }, { "epoch": 0.8, "grad_norm": 1.849173665046692, "learning_rate": 9.938732150797965e-06, "loss": 5.4471, "step": 16220 }, { "epoch": 0.8, "grad_norm": 2.220717668533325, "learning_rate": 9.928850239636347e-06, "loss": 5.5204, "step": 16224 }, { "epoch": 0.8, "grad_norm": 2.0210342407226562, "learning_rate": 9.918968328474727e-06, "loss": 5.3364, "step": 16228 }, { "epoch": 0.8, "grad_norm": 1.9695372581481934, "learning_rate": 9.909086417313109e-06, "loss": 5.485, "step": 16232 }, { "epoch": 0.8, "grad_norm": 1.875001311302185, "learning_rate": 9.89920450615149e-06, "loss": 5.3977, "step": 16236 }, { "epoch": 0.8, "grad_norm": 2.136852741241455, "learning_rate": 9.889322594989871e-06, "loss": 5.5784, "step": 16240 }, { "epoch": 0.8, "grad_norm": 1.8972970247268677, "learning_rate": 9.879440683828253e-06, "loss": 5.4973, "step": 16244 }, { "epoch": 0.8, "grad_norm": 2.14034366607666, "learning_rate": 9.869558772666634e-06, "loss": 5.546, "step": 16248 }, { "epoch": 0.8, "grad_norm": 2.0028293132781982, "learning_rate": 9.859676861505016e-06, "loss": 5.3869, "step": 16252 }, { "epoch": 0.8, "grad_norm": 1.9485490322113037, "learning_rate": 9.849794950343396e-06, "loss": 5.4852, "step": 16256 }, { "epoch": 0.8, "grad_norm": 1.9260238409042358, "learning_rate": 9.839913039181778e-06, "loss": 5.473, "step": 16260 }, { "epoch": 0.8, "grad_norm": 2.1630001068115234, "learning_rate": 9.830031128020159e-06, "loss": 5.4207, "step": 16264 }, { "epoch": 0.8, "grad_norm": 1.9328508377075195, "learning_rate": 9.82014921685854e-06, "loss": 5.4433, "step": 16268 }, { "epoch": 0.8, "grad_norm": 1.9832642078399658, "learning_rate": 9.810267305696923e-06, "loss": 5.4387, "step": 16272 }, { "epoch": 0.8, "grad_norm": 2.0616111755371094, "learning_rate": 9.800385394535303e-06, "loss": 5.3895, "step": 16276 }, { "epoch": 0.8, "grad_norm": 2.1709606647491455, "learning_rate": 9.790503483373685e-06, "loss": 5.5639, "step": 16280 }, { "epoch": 0.8, "grad_norm": 2.1553335189819336, "learning_rate": 9.780621572212066e-06, "loss": 5.5419, "step": 16284 }, { "epoch": 0.8, "grad_norm": 1.7869793176651, "learning_rate": 9.770739661050448e-06, "loss": 5.4425, "step": 16288 }, { "epoch": 0.8, "grad_norm": 1.902388095855713, "learning_rate": 9.760857749888828e-06, "loss": 5.3651, "step": 16292 }, { "epoch": 0.81, "grad_norm": 2.135723829269409, "learning_rate": 9.750975838727212e-06, "loss": 5.5838, "step": 16296 }, { "epoch": 0.81, "grad_norm": 2.3749701976776123, "learning_rate": 9.741093927565592e-06, "loss": 5.5562, "step": 16300 }, { "epoch": 0.81, "grad_norm": 1.9421731233596802, "learning_rate": 9.731212016403973e-06, "loss": 5.432, "step": 16304 }, { "epoch": 0.81, "grad_norm": 1.87296462059021, "learning_rate": 9.721330105242355e-06, "loss": 5.5429, "step": 16308 }, { "epoch": 0.81, "grad_norm": 2.0159077644348145, "learning_rate": 9.711448194080735e-06, "loss": 5.5088, "step": 16312 }, { "epoch": 0.81, "grad_norm": 1.9539657831192017, "learning_rate": 9.701566282919117e-06, "loss": 5.49, "step": 16316 }, { "epoch": 0.81, "grad_norm": 2.0840256214141846, "learning_rate": 9.691684371757497e-06, "loss": 5.4972, "step": 16320 }, { "epoch": 0.81, "grad_norm": 2.0817222595214844, "learning_rate": 9.68180246059588e-06, "loss": 5.5185, "step": 16324 }, { "epoch": 0.81, "grad_norm": 2.172551393508911, "learning_rate": 9.671920549434262e-06, "loss": 5.5465, "step": 16328 }, { "epoch": 0.81, "grad_norm": 1.8779146671295166, "learning_rate": 9.662038638272644e-06, "loss": 5.5047, "step": 16332 }, { "epoch": 0.81, "grad_norm": 1.9017332792282104, "learning_rate": 9.652156727111024e-06, "loss": 5.4705, "step": 16336 }, { "epoch": 0.81, "grad_norm": 2.0198209285736084, "learning_rate": 9.642274815949404e-06, "loss": 5.4886, "step": 16340 }, { "epoch": 0.81, "grad_norm": 2.0686516761779785, "learning_rate": 9.632392904787786e-06, "loss": 5.5188, "step": 16344 }, { "epoch": 0.81, "grad_norm": 1.962902545928955, "learning_rate": 9.622510993626167e-06, "loss": 5.5414, "step": 16348 }, { "epoch": 0.81, "grad_norm": 2.0159122943878174, "learning_rate": 9.612629082464549e-06, "loss": 5.422, "step": 16352 }, { "epoch": 0.81, "grad_norm": 2.2383811473846436, "learning_rate": 9.60274717130293e-06, "loss": 5.5384, "step": 16356 }, { "epoch": 0.81, "grad_norm": 1.993220567703247, "learning_rate": 9.592865260141313e-06, "loss": 5.3551, "step": 16360 }, { "epoch": 0.81, "grad_norm": 2.130995035171509, "learning_rate": 9.582983348979693e-06, "loss": 5.4926, "step": 16364 }, { "epoch": 0.81, "grad_norm": 1.9019581079483032, "learning_rate": 9.573101437818074e-06, "loss": 5.5623, "step": 16368 }, { "epoch": 0.81, "grad_norm": 2.1175732612609863, "learning_rate": 9.563219526656456e-06, "loss": 5.4529, "step": 16372 }, { "epoch": 0.81, "grad_norm": 1.8680050373077393, "learning_rate": 9.553337615494836e-06, "loss": 5.4273, "step": 16376 }, { "epoch": 0.81, "grad_norm": 2.0988821983337402, "learning_rate": 9.543455704333218e-06, "loss": 5.477, "step": 16380 }, { "epoch": 0.81, "grad_norm": 2.198651075363159, "learning_rate": 9.5335737931716e-06, "loss": 5.5427, "step": 16384 }, { "epoch": 0.81, "grad_norm": 2.1054868698120117, "learning_rate": 9.523691882009982e-06, "loss": 5.4173, "step": 16388 }, { "epoch": 0.81, "grad_norm": 2.0211637020111084, "learning_rate": 9.513809970848363e-06, "loss": 5.6035, "step": 16392 }, { "epoch": 0.81, "grad_norm": 2.0547540187835693, "learning_rate": 9.503928059686745e-06, "loss": 5.4983, "step": 16396 }, { "epoch": 0.81, "grad_norm": 1.9957647323608398, "learning_rate": 9.494046148525125e-06, "loss": 5.5374, "step": 16400 }, { "epoch": 0.81, "grad_norm": 2.0535998344421387, "learning_rate": 9.484164237363505e-06, "loss": 5.5643, "step": 16404 }, { "epoch": 0.81, "grad_norm": 2.1046228408813477, "learning_rate": 9.474282326201887e-06, "loss": 5.4727, "step": 16408 }, { "epoch": 0.81, "grad_norm": 2.1698873043060303, "learning_rate": 9.46440041504027e-06, "loss": 5.5637, "step": 16412 }, { "epoch": 0.81, "grad_norm": 1.9448730945587158, "learning_rate": 9.454518503878652e-06, "loss": 5.4932, "step": 16416 }, { "epoch": 0.81, "grad_norm": 2.0663201808929443, "learning_rate": 9.444636592717032e-06, "loss": 5.5067, "step": 16420 }, { "epoch": 0.81, "grad_norm": 1.8125361204147339, "learning_rate": 9.434754681555414e-06, "loss": 5.5381, "step": 16424 }, { "epoch": 0.81, "grad_norm": 2.077420473098755, "learning_rate": 9.424872770393794e-06, "loss": 5.5642, "step": 16428 }, { "epoch": 0.81, "grad_norm": 1.9312140941619873, "learning_rate": 9.414990859232176e-06, "loss": 5.3661, "step": 16432 }, { "epoch": 0.81, "grad_norm": 1.8529763221740723, "learning_rate": 9.405108948070557e-06, "loss": 5.4776, "step": 16436 }, { "epoch": 0.81, "grad_norm": 1.8764407634735107, "learning_rate": 9.395227036908939e-06, "loss": 5.4928, "step": 16440 }, { "epoch": 0.81, "grad_norm": 2.139594316482544, "learning_rate": 9.385345125747321e-06, "loss": 5.4108, "step": 16444 }, { "epoch": 0.81, "grad_norm": 2.1105079650878906, "learning_rate": 9.375463214585701e-06, "loss": 5.4876, "step": 16448 }, { "epoch": 0.81, "grad_norm": 1.9484140872955322, "learning_rate": 9.365581303424083e-06, "loss": 5.4148, "step": 16452 }, { "epoch": 0.81, "grad_norm": 2.0571391582489014, "learning_rate": 9.355699392262464e-06, "loss": 5.6282, "step": 16456 }, { "epoch": 0.81, "grad_norm": 1.7625576257705688, "learning_rate": 9.345817481100846e-06, "loss": 5.5073, "step": 16460 }, { "epoch": 0.81, "grad_norm": 2.1183722019195557, "learning_rate": 9.335935569939226e-06, "loss": 5.4659, "step": 16464 }, { "epoch": 0.81, "grad_norm": 2.135255813598633, "learning_rate": 9.326053658777608e-06, "loss": 5.4262, "step": 16468 }, { "epoch": 0.81, "grad_norm": 1.7497916221618652, "learning_rate": 9.31617174761599e-06, "loss": 5.537, "step": 16472 }, { "epoch": 0.81, "grad_norm": 1.9297901391983032, "learning_rate": 9.30628983645437e-06, "loss": 5.4848, "step": 16476 }, { "epoch": 0.81, "grad_norm": 1.992133617401123, "learning_rate": 9.296407925292753e-06, "loss": 5.5307, "step": 16480 }, { "epoch": 0.81, "grad_norm": 1.8346421718597412, "learning_rate": 9.286526014131133e-06, "loss": 5.4071, "step": 16484 }, { "epoch": 0.81, "grad_norm": 1.9083247184753418, "learning_rate": 9.276644102969515e-06, "loss": 5.4248, "step": 16488 }, { "epoch": 0.81, "grad_norm": 2.0954606533050537, "learning_rate": 9.266762191807895e-06, "loss": 5.5175, "step": 16492 }, { "epoch": 0.82, "grad_norm": 2.101158857345581, "learning_rate": 9.256880280646278e-06, "loss": 5.5345, "step": 16496 }, { "epoch": 0.82, "grad_norm": 2.170283794403076, "learning_rate": 9.24699836948466e-06, "loss": 5.5719, "step": 16500 }, { "epoch": 0.82, "grad_norm": 2.078697681427002, "learning_rate": 9.23711645832304e-06, "loss": 5.4199, "step": 16504 }, { "epoch": 0.82, "grad_norm": 2.1254682540893555, "learning_rate": 9.227234547161422e-06, "loss": 5.3647, "step": 16508 }, { "epoch": 0.82, "grad_norm": 2.2468132972717285, "learning_rate": 9.217352635999802e-06, "loss": 5.4673, "step": 16512 }, { "epoch": 0.82, "grad_norm": 2.053579330444336, "learning_rate": 9.207470724838184e-06, "loss": 5.4083, "step": 16516 }, { "epoch": 0.82, "grad_norm": 2.2088301181793213, "learning_rate": 9.197588813676565e-06, "loss": 5.5694, "step": 16520 }, { "epoch": 0.82, "grad_norm": 1.9974719285964966, "learning_rate": 9.187706902514947e-06, "loss": 5.4396, "step": 16524 }, { "epoch": 0.82, "grad_norm": 2.066420555114746, "learning_rate": 9.177824991353329e-06, "loss": 5.5418, "step": 16528 }, { "epoch": 0.82, "grad_norm": 2.2263917922973633, "learning_rate": 9.16794308019171e-06, "loss": 5.4904, "step": 16532 }, { "epoch": 0.82, "grad_norm": 2.0956313610076904, "learning_rate": 9.158061169030091e-06, "loss": 5.47, "step": 16536 }, { "epoch": 0.82, "grad_norm": 2.120701551437378, "learning_rate": 9.148179257868472e-06, "loss": 5.4498, "step": 16540 }, { "epoch": 0.82, "grad_norm": 2.068040609359741, "learning_rate": 9.138297346706854e-06, "loss": 5.5373, "step": 16544 }, { "epoch": 0.82, "grad_norm": 2.125322103500366, "learning_rate": 9.128415435545234e-06, "loss": 5.4782, "step": 16548 }, { "epoch": 0.82, "grad_norm": 1.8984034061431885, "learning_rate": 9.118533524383616e-06, "loss": 5.5614, "step": 16552 }, { "epoch": 0.82, "grad_norm": 2.038201332092285, "learning_rate": 9.108651613221998e-06, "loss": 5.5308, "step": 16556 }, { "epoch": 0.82, "grad_norm": 2.083704948425293, "learning_rate": 9.098769702060379e-06, "loss": 5.3986, "step": 16560 }, { "epoch": 0.82, "grad_norm": 2.2222559452056885, "learning_rate": 9.08888779089876e-06, "loss": 5.4164, "step": 16564 }, { "epoch": 0.82, "grad_norm": 2.319937229156494, "learning_rate": 9.079005879737141e-06, "loss": 5.5564, "step": 16568 }, { "epoch": 0.82, "grad_norm": 1.930305004119873, "learning_rate": 9.069123968575523e-06, "loss": 5.5181, "step": 16572 }, { "epoch": 0.82, "grad_norm": 2.175090789794922, "learning_rate": 9.059242057413904e-06, "loss": 5.4816, "step": 16576 }, { "epoch": 0.82, "grad_norm": 1.9041398763656616, "learning_rate": 9.049360146252286e-06, "loss": 5.4462, "step": 16580 }, { "epoch": 0.82, "grad_norm": 1.92721426486969, "learning_rate": 9.039478235090668e-06, "loss": 5.4993, "step": 16584 }, { "epoch": 0.82, "grad_norm": 2.098320484161377, "learning_rate": 9.029596323929048e-06, "loss": 5.4334, "step": 16588 }, { "epoch": 0.82, "grad_norm": 1.988296389579773, "learning_rate": 9.01971441276743e-06, "loss": 5.5241, "step": 16592 }, { "epoch": 0.82, "grad_norm": 1.9492675065994263, "learning_rate": 9.00983250160581e-06, "loss": 5.4024, "step": 16596 }, { "epoch": 0.82, "grad_norm": 2.0381886959075928, "learning_rate": 8.999950590444192e-06, "loss": 5.4861, "step": 16600 }, { "epoch": 0.82, "grad_norm": 2.2015647888183594, "learning_rate": 8.990068679282573e-06, "loss": 5.4589, "step": 16604 }, { "epoch": 0.82, "grad_norm": 1.970509648323059, "learning_rate": 8.980186768120955e-06, "loss": 5.5072, "step": 16608 }, { "epoch": 0.82, "grad_norm": 2.048265218734741, "learning_rate": 8.970304856959337e-06, "loss": 5.4781, "step": 16612 }, { "epoch": 0.82, "grad_norm": 2.1781177520751953, "learning_rate": 8.960422945797719e-06, "loss": 5.4382, "step": 16616 }, { "epoch": 0.82, "grad_norm": 2.3206918239593506, "learning_rate": 8.9505410346361e-06, "loss": 5.5371, "step": 16620 }, { "epoch": 0.82, "grad_norm": 2.129166603088379, "learning_rate": 8.94065912347448e-06, "loss": 5.4773, "step": 16624 }, { "epoch": 0.82, "grad_norm": 1.8786160945892334, "learning_rate": 8.930777212312862e-06, "loss": 5.3599, "step": 16628 }, { "epoch": 0.82, "grad_norm": 2.114015579223633, "learning_rate": 8.920895301151242e-06, "loss": 5.5262, "step": 16632 }, { "epoch": 0.82, "grad_norm": 1.9730268716812134, "learning_rate": 8.911013389989624e-06, "loss": 5.3921, "step": 16636 }, { "epoch": 0.82, "grad_norm": 2.007050037384033, "learning_rate": 8.901131478828005e-06, "loss": 5.4521, "step": 16640 }, { "epoch": 0.82, "grad_norm": 1.9782698154449463, "learning_rate": 8.891249567666388e-06, "loss": 5.4024, "step": 16644 }, { "epoch": 0.82, "grad_norm": 2.118109941482544, "learning_rate": 8.881367656504769e-06, "loss": 5.4549, "step": 16648 }, { "epoch": 0.82, "grad_norm": 1.9269788265228271, "learning_rate": 8.871485745343149e-06, "loss": 5.3879, "step": 16652 }, { "epoch": 0.82, "grad_norm": 2.0356998443603516, "learning_rate": 8.861603834181531e-06, "loss": 5.4729, "step": 16656 }, { "epoch": 0.82, "grad_norm": 1.9945244789123535, "learning_rate": 8.851721923019912e-06, "loss": 5.489, "step": 16660 }, { "epoch": 0.82, "grad_norm": 2.1839029788970947, "learning_rate": 8.841840011858294e-06, "loss": 5.3626, "step": 16664 }, { "epoch": 0.82, "grad_norm": 1.9056282043457031, "learning_rate": 8.831958100696674e-06, "loss": 5.5155, "step": 16668 }, { "epoch": 0.82, "grad_norm": 1.971134066581726, "learning_rate": 8.822076189535058e-06, "loss": 5.4582, "step": 16672 }, { "epoch": 0.82, "grad_norm": 2.2608683109283447, "learning_rate": 8.812194278373438e-06, "loss": 5.5671, "step": 16676 }, { "epoch": 0.82, "grad_norm": 1.8873885869979858, "learning_rate": 8.80231236721182e-06, "loss": 5.5378, "step": 16680 }, { "epoch": 0.82, "grad_norm": 2.1268815994262695, "learning_rate": 8.7924304560502e-06, "loss": 5.5464, "step": 16684 }, { "epoch": 0.82, "grad_norm": 2.120333671569824, "learning_rate": 8.782548544888581e-06, "loss": 5.4566, "step": 16688 }, { "epoch": 0.82, "grad_norm": 2.202099323272705, "learning_rate": 8.772666633726963e-06, "loss": 5.3333, "step": 16692 }, { "epoch": 0.82, "grad_norm": 1.8824645280838013, "learning_rate": 8.762784722565343e-06, "loss": 5.5016, "step": 16696 }, { "epoch": 0.83, "grad_norm": 2.0147457122802734, "learning_rate": 8.752902811403727e-06, "loss": 5.4586, "step": 16700 }, { "epoch": 0.83, "grad_norm": 2.041895627975464, "learning_rate": 8.743020900242107e-06, "loss": 5.5022, "step": 16704 }, { "epoch": 0.83, "grad_norm": 2.077690362930298, "learning_rate": 8.73313898908049e-06, "loss": 5.5781, "step": 16708 }, { "epoch": 0.83, "grad_norm": 1.9477964639663696, "learning_rate": 8.72325707791887e-06, "loss": 5.5584, "step": 16712 }, { "epoch": 0.83, "grad_norm": 1.8398356437683105, "learning_rate": 8.713375166757252e-06, "loss": 5.5032, "step": 16716 }, { "epoch": 0.83, "grad_norm": 1.9973992109298706, "learning_rate": 8.703493255595632e-06, "loss": 5.37, "step": 16720 }, { "epoch": 0.83, "grad_norm": 2.0176520347595215, "learning_rate": 8.693611344434013e-06, "loss": 5.5138, "step": 16724 }, { "epoch": 0.83, "grad_norm": 2.1837217807769775, "learning_rate": 8.683729433272396e-06, "loss": 5.5308, "step": 16728 }, { "epoch": 0.83, "grad_norm": 2.0156595706939697, "learning_rate": 8.673847522110777e-06, "loss": 5.5722, "step": 16732 }, { "epoch": 0.83, "grad_norm": 2.3676466941833496, "learning_rate": 8.663965610949159e-06, "loss": 5.5775, "step": 16736 }, { "epoch": 0.83, "grad_norm": 2.270716667175293, "learning_rate": 8.65408369978754e-06, "loss": 5.4107, "step": 16740 }, { "epoch": 0.83, "grad_norm": 1.9551721811294556, "learning_rate": 8.644201788625921e-06, "loss": 5.6022, "step": 16744 }, { "epoch": 0.83, "grad_norm": 1.9827896356582642, "learning_rate": 8.634319877464302e-06, "loss": 5.4923, "step": 16748 }, { "epoch": 0.83, "grad_norm": 2.1360533237457275, "learning_rate": 8.624437966302682e-06, "loss": 5.4328, "step": 16752 }, { "epoch": 0.83, "grad_norm": 2.094109296798706, "learning_rate": 8.614556055141066e-06, "loss": 5.416, "step": 16756 }, { "epoch": 0.83, "grad_norm": 1.9513869285583496, "learning_rate": 8.604674143979446e-06, "loss": 5.403, "step": 16760 }, { "epoch": 0.83, "grad_norm": 2.0020523071289062, "learning_rate": 8.594792232817828e-06, "loss": 5.4628, "step": 16764 }, { "epoch": 0.83, "grad_norm": 2.405801296234131, "learning_rate": 8.584910321656209e-06, "loss": 5.5799, "step": 16768 }, { "epoch": 0.83, "grad_norm": 2.114650249481201, "learning_rate": 8.57502841049459e-06, "loss": 5.4151, "step": 16772 }, { "epoch": 0.83, "grad_norm": 1.9269883632659912, "learning_rate": 8.565146499332971e-06, "loss": 5.4321, "step": 16776 }, { "epoch": 0.83, "grad_norm": 1.894822597503662, "learning_rate": 8.555264588171353e-06, "loss": 5.4419, "step": 16780 }, { "epoch": 0.83, "grad_norm": 2.0186209678649902, "learning_rate": 8.545382677009735e-06, "loss": 5.5657, "step": 16784 }, { "epoch": 0.83, "grad_norm": 1.920372486114502, "learning_rate": 8.535500765848115e-06, "loss": 5.5334, "step": 16788 }, { "epoch": 0.83, "grad_norm": 2.1055715084075928, "learning_rate": 8.525618854686497e-06, "loss": 5.4952, "step": 16792 }, { "epoch": 0.83, "grad_norm": 2.0137712955474854, "learning_rate": 8.515736943524878e-06, "loss": 5.5128, "step": 16796 }, { "epoch": 0.83, "grad_norm": 2.0473227500915527, "learning_rate": 8.50585503236326e-06, "loss": 5.6793, "step": 16800 }, { "epoch": 0.83, "grad_norm": 2.0757429599761963, "learning_rate": 8.49597312120164e-06, "loss": 5.4888, "step": 16804 }, { "epoch": 0.83, "grad_norm": 1.9529187679290771, "learning_rate": 8.486091210040022e-06, "loss": 5.2731, "step": 16808 }, { "epoch": 0.83, "grad_norm": 1.897220492362976, "learning_rate": 8.476209298878403e-06, "loss": 5.4103, "step": 16812 }, { "epoch": 0.83, "grad_norm": 2.0591204166412354, "learning_rate": 8.466327387716785e-06, "loss": 5.5609, "step": 16816 }, { "epoch": 0.83, "grad_norm": 1.8429813385009766, "learning_rate": 8.456445476555167e-06, "loss": 5.3453, "step": 16820 }, { "epoch": 0.83, "grad_norm": 1.854067087173462, "learning_rate": 8.446563565393547e-06, "loss": 5.4855, "step": 16824 }, { "epoch": 0.83, "grad_norm": 1.87723708152771, "learning_rate": 8.43668165423193e-06, "loss": 5.5087, "step": 16828 }, { "epoch": 0.83, "grad_norm": 2.25486159324646, "learning_rate": 8.42679974307031e-06, "loss": 5.6419, "step": 16832 }, { "epoch": 0.83, "grad_norm": 2.1054129600524902, "learning_rate": 8.416917831908692e-06, "loss": 5.4713, "step": 16836 }, { "epoch": 0.83, "grad_norm": 1.9546363353729248, "learning_rate": 8.407035920747072e-06, "loss": 5.51, "step": 16840 }, { "epoch": 0.83, "grad_norm": 1.8574483394622803, "learning_rate": 8.397154009585454e-06, "loss": 5.4273, "step": 16844 }, { "epoch": 0.83, "grad_norm": 2.135690927505493, "learning_rate": 8.387272098423836e-06, "loss": 5.4408, "step": 16848 }, { "epoch": 0.83, "grad_norm": 1.9898631572723389, "learning_rate": 8.377390187262217e-06, "loss": 5.2905, "step": 16852 }, { "epoch": 0.83, "grad_norm": 2.016470193862915, "learning_rate": 8.367508276100599e-06, "loss": 5.3266, "step": 16856 }, { "epoch": 0.83, "grad_norm": 1.9973735809326172, "learning_rate": 8.357626364938979e-06, "loss": 5.4987, "step": 16860 }, { "epoch": 0.83, "grad_norm": 2.116567373275757, "learning_rate": 8.347744453777361e-06, "loss": 5.4306, "step": 16864 }, { "epoch": 0.83, "grad_norm": 2.044475793838501, "learning_rate": 8.337862542615741e-06, "loss": 5.4907, "step": 16868 }, { "epoch": 0.83, "grad_norm": 2.090527296066284, "learning_rate": 8.327980631454123e-06, "loss": 5.3698, "step": 16872 }, { "epoch": 0.83, "grad_norm": 1.98384428024292, "learning_rate": 8.318098720292506e-06, "loss": 5.469, "step": 16876 }, { "epoch": 0.83, "grad_norm": 1.775121808052063, "learning_rate": 8.308216809130886e-06, "loss": 5.461, "step": 16880 }, { "epoch": 0.83, "grad_norm": 1.9661427736282349, "learning_rate": 8.298334897969268e-06, "loss": 5.494, "step": 16884 }, { "epoch": 0.83, "grad_norm": 2.0031895637512207, "learning_rate": 8.288452986807648e-06, "loss": 5.5011, "step": 16888 }, { "epoch": 0.83, "grad_norm": 2.221911907196045, "learning_rate": 8.27857107564603e-06, "loss": 5.4296, "step": 16892 }, { "epoch": 0.83, "grad_norm": 2.0504343509674072, "learning_rate": 8.26868916448441e-06, "loss": 5.5495, "step": 16896 }, { "epoch": 0.84, "grad_norm": 2.1068339347839355, "learning_rate": 8.258807253322794e-06, "loss": 5.408, "step": 16900 }, { "epoch": 0.84, "grad_norm": 2.0044867992401123, "learning_rate": 8.248925342161175e-06, "loss": 5.512, "step": 16904 }, { "epoch": 0.84, "grad_norm": 2.3192813396453857, "learning_rate": 8.239043430999555e-06, "loss": 5.4185, "step": 16908 }, { "epoch": 0.84, "grad_norm": 1.8410991430282593, "learning_rate": 8.229161519837937e-06, "loss": 5.4222, "step": 16912 }, { "epoch": 0.84, "grad_norm": 2.0134191513061523, "learning_rate": 8.219279608676318e-06, "loss": 5.4357, "step": 16916 }, { "epoch": 0.84, "grad_norm": 2.0390844345092773, "learning_rate": 8.2093976975147e-06, "loss": 5.5363, "step": 16920 }, { "epoch": 0.84, "grad_norm": 2.12786602973938, "learning_rate": 8.19951578635308e-06, "loss": 5.31, "step": 16924 }, { "epoch": 0.84, "grad_norm": 1.9766027927398682, "learning_rate": 8.189633875191464e-06, "loss": 5.534, "step": 16928 }, { "epoch": 0.84, "grad_norm": 1.7689497470855713, "learning_rate": 8.179751964029844e-06, "loss": 5.3465, "step": 16932 }, { "epoch": 0.84, "grad_norm": 2.117271900177002, "learning_rate": 8.169870052868225e-06, "loss": 5.4583, "step": 16936 }, { "epoch": 0.84, "grad_norm": 2.0808498859405518, "learning_rate": 8.159988141706607e-06, "loss": 5.477, "step": 16940 }, { "epoch": 0.84, "grad_norm": 2.0178062915802, "learning_rate": 8.150106230544987e-06, "loss": 5.5178, "step": 16944 }, { "epoch": 0.84, "grad_norm": 1.7878342866897583, "learning_rate": 8.140224319383369e-06, "loss": 5.533, "step": 16948 }, { "epoch": 0.84, "grad_norm": 2.0112874507904053, "learning_rate": 8.13034240822175e-06, "loss": 5.342, "step": 16952 }, { "epoch": 0.84, "grad_norm": 2.224484443664551, "learning_rate": 8.120460497060133e-06, "loss": 5.4574, "step": 16956 }, { "epoch": 0.84, "grad_norm": 2.29886531829834, "learning_rate": 8.110578585898514e-06, "loss": 5.5541, "step": 16960 }, { "epoch": 0.84, "grad_norm": 1.8924994468688965, "learning_rate": 8.100696674736896e-06, "loss": 5.4196, "step": 16964 }, { "epoch": 0.84, "grad_norm": 2.2159488201141357, "learning_rate": 8.090814763575276e-06, "loss": 5.4989, "step": 16968 }, { "epoch": 0.84, "grad_norm": 2.170715570449829, "learning_rate": 8.080932852413656e-06, "loss": 5.5074, "step": 16972 }, { "epoch": 0.84, "grad_norm": 2.0515708923339844, "learning_rate": 8.071050941252038e-06, "loss": 5.4033, "step": 16976 }, { "epoch": 0.84, "grad_norm": 2.0467865467071533, "learning_rate": 8.061169030090419e-06, "loss": 5.4566, "step": 16980 }, { "epoch": 0.84, "grad_norm": 1.9163670539855957, "learning_rate": 8.0512871189288e-06, "loss": 5.5335, "step": 16984 }, { "epoch": 0.84, "grad_norm": 2.1390318870544434, "learning_rate": 8.041405207767183e-06, "loss": 5.5707, "step": 16988 }, { "epoch": 0.84, "grad_norm": 1.9965319633483887, "learning_rate": 8.031523296605565e-06, "loss": 5.4165, "step": 16992 }, { "epoch": 0.84, "grad_norm": 2.137233257293701, "learning_rate": 8.021641385443945e-06, "loss": 5.3596, "step": 16996 }, { "epoch": 0.84, "grad_norm": 2.152256727218628, "learning_rate": 8.011759474282327e-06, "loss": 5.3396, "step": 17000 }, { "epoch": 0.84, "grad_norm": 2.28680682182312, "learning_rate": 8.001877563120708e-06, "loss": 5.5507, "step": 17004 }, { "epoch": 0.84, "grad_norm": 2.26821231842041, "learning_rate": 7.991995651959088e-06, "loss": 5.4969, "step": 17008 }, { "epoch": 0.84, "grad_norm": 2.275667428970337, "learning_rate": 7.98211374079747e-06, "loss": 5.441, "step": 17012 }, { "epoch": 0.84, "grad_norm": 2.080756902694702, "learning_rate": 7.972231829635852e-06, "loss": 5.4398, "step": 17016 }, { "epoch": 0.84, "grad_norm": 2.10422420501709, "learning_rate": 7.962349918474234e-06, "loss": 5.437, "step": 17020 }, { "epoch": 0.84, "grad_norm": 1.858323335647583, "learning_rate": 7.952468007312615e-06, "loss": 5.5432, "step": 17024 }, { "epoch": 0.84, "grad_norm": 2.4101650714874268, "learning_rate": 7.942586096150997e-06, "loss": 5.412, "step": 17028 }, { "epoch": 0.84, "grad_norm": 2.2219436168670654, "learning_rate": 7.932704184989377e-06, "loss": 5.5279, "step": 17032 }, { "epoch": 0.84, "grad_norm": 2.182474374771118, "learning_rate": 7.922822273827757e-06, "loss": 5.4662, "step": 17036 }, { "epoch": 0.84, "grad_norm": 2.060351610183716, "learning_rate": 7.91294036266614e-06, "loss": 5.504, "step": 17040 }, { "epoch": 0.84, "grad_norm": 2.1096701622009277, "learning_rate": 7.903058451504522e-06, "loss": 5.4539, "step": 17044 }, { "epoch": 0.84, "grad_norm": 2.0492708683013916, "learning_rate": 7.893176540342904e-06, "loss": 5.4398, "step": 17048 }, { "epoch": 0.84, "grad_norm": 2.032947301864624, "learning_rate": 7.883294629181284e-06, "loss": 5.5253, "step": 17052 }, { "epoch": 0.84, "grad_norm": 2.0764636993408203, "learning_rate": 7.873412718019666e-06, "loss": 5.4655, "step": 17056 }, { "epoch": 0.84, "grad_norm": 2.105656862258911, "learning_rate": 7.863530806858046e-06, "loss": 5.4761, "step": 17060 }, { "epoch": 0.84, "grad_norm": 1.975953459739685, "learning_rate": 7.853648895696428e-06, "loss": 5.5364, "step": 17064 }, { "epoch": 0.84, "grad_norm": 2.0592944622039795, "learning_rate": 7.843766984534809e-06, "loss": 5.4987, "step": 17068 }, { "epoch": 0.84, "grad_norm": 2.1122117042541504, "learning_rate": 7.833885073373191e-06, "loss": 5.4162, "step": 17072 }, { "epoch": 0.84, "grad_norm": 2.143172264099121, "learning_rate": 7.824003162211573e-06, "loss": 5.4959, "step": 17076 }, { "epoch": 0.84, "grad_norm": 1.9919787645339966, "learning_rate": 7.814121251049953e-06, "loss": 5.469, "step": 17080 }, { "epoch": 0.84, "grad_norm": 1.9146004915237427, "learning_rate": 7.804239339888335e-06, "loss": 5.4748, "step": 17084 }, { "epoch": 0.84, "grad_norm": 2.3150486946105957, "learning_rate": 7.794357428726716e-06, "loss": 5.4056, "step": 17088 }, { "epoch": 0.84, "grad_norm": 2.1717705726623535, "learning_rate": 7.784475517565098e-06, "loss": 5.5389, "step": 17092 }, { "epoch": 0.84, "grad_norm": 2.1674489974975586, "learning_rate": 7.774593606403478e-06, "loss": 5.3378, "step": 17096 }, { "epoch": 0.84, "grad_norm": 2.17425537109375, "learning_rate": 7.76471169524186e-06, "loss": 5.5094, "step": 17100 }, { "epoch": 0.85, "grad_norm": 2.2170867919921875, "learning_rate": 7.754829784080242e-06, "loss": 5.4591, "step": 17104 }, { "epoch": 0.85, "grad_norm": 2.0710206031799316, "learning_rate": 7.744947872918623e-06, "loss": 5.461, "step": 17108 }, { "epoch": 0.85, "grad_norm": 1.9662617444992065, "learning_rate": 7.735065961757005e-06, "loss": 5.5232, "step": 17112 }, { "epoch": 0.85, "grad_norm": 2.1950018405914307, "learning_rate": 7.725184050595385e-06, "loss": 5.4845, "step": 17116 }, { "epoch": 0.85, "grad_norm": 2.166281223297119, "learning_rate": 7.715302139433767e-06, "loss": 5.4899, "step": 17120 }, { "epoch": 0.85, "grad_norm": 2.0825867652893066, "learning_rate": 7.705420228272148e-06, "loss": 5.5889, "step": 17124 }, { "epoch": 0.85, "grad_norm": 2.0458121299743652, "learning_rate": 7.69553831711053e-06, "loss": 5.4465, "step": 17128 }, { "epoch": 0.85, "grad_norm": 1.972931146621704, "learning_rate": 7.685656405948912e-06, "loss": 5.447, "step": 17132 }, { "epoch": 0.85, "grad_norm": 2.2071616649627686, "learning_rate": 7.675774494787292e-06, "loss": 5.5405, "step": 17136 }, { "epoch": 0.85, "grad_norm": 2.24798583984375, "learning_rate": 7.665892583625674e-06, "loss": 5.5034, "step": 17140 }, { "epoch": 0.85, "grad_norm": 2.3352463245391846, "learning_rate": 7.656010672464054e-06, "loss": 5.5322, "step": 17144 }, { "epoch": 0.85, "grad_norm": 2.1701347827911377, "learning_rate": 7.646128761302436e-06, "loss": 5.6042, "step": 17148 }, { "epoch": 0.85, "grad_norm": 2.0654942989349365, "learning_rate": 7.636246850140817e-06, "loss": 5.5102, "step": 17152 }, { "epoch": 0.85, "grad_norm": 2.410454273223877, "learning_rate": 7.626364938979198e-06, "loss": 5.4408, "step": 17156 }, { "epoch": 0.85, "grad_norm": 2.0221352577209473, "learning_rate": 7.616483027817581e-06, "loss": 5.4086, "step": 17160 }, { "epoch": 0.85, "grad_norm": 2.211092233657837, "learning_rate": 7.606601116655962e-06, "loss": 5.4589, "step": 17164 }, { "epoch": 0.85, "grad_norm": 1.9467920064926147, "learning_rate": 7.596719205494343e-06, "loss": 5.4867, "step": 17168 }, { "epoch": 0.85, "grad_norm": 2.1144025325775146, "learning_rate": 7.586837294332725e-06, "loss": 5.5143, "step": 17172 }, { "epoch": 0.85, "grad_norm": 2.1652915477752686, "learning_rate": 7.576955383171106e-06, "loss": 5.533, "step": 17176 }, { "epoch": 0.85, "grad_norm": 1.9289984703063965, "learning_rate": 7.567073472009486e-06, "loss": 5.5406, "step": 17180 }, { "epoch": 0.85, "grad_norm": 2.028322458267212, "learning_rate": 7.557191560847867e-06, "loss": 5.4118, "step": 17184 }, { "epoch": 0.85, "grad_norm": 2.2385053634643555, "learning_rate": 7.54730964968625e-06, "loss": 5.5023, "step": 17188 }, { "epoch": 0.85, "grad_norm": 1.8756178617477417, "learning_rate": 7.5374277385246315e-06, "loss": 5.4028, "step": 17192 }, { "epoch": 0.85, "grad_norm": 2.0008492469787598, "learning_rate": 7.527545827363013e-06, "loss": 5.4572, "step": 17196 }, { "epoch": 0.85, "grad_norm": 1.9606680870056152, "learning_rate": 7.517663916201394e-06, "loss": 5.3813, "step": 17200 }, { "epoch": 0.85, "grad_norm": 1.765757441520691, "learning_rate": 7.507782005039775e-06, "loss": 5.433, "step": 17204 }, { "epoch": 0.85, "grad_norm": 2.2999327182769775, "learning_rate": 7.497900093878156e-06, "loss": 5.4443, "step": 17208 }, { "epoch": 0.85, "grad_norm": 1.831790804862976, "learning_rate": 7.488018182716537e-06, "loss": 5.4745, "step": 17212 }, { "epoch": 0.85, "grad_norm": 2.0281448364257812, "learning_rate": 7.47813627155492e-06, "loss": 5.3546, "step": 17216 }, { "epoch": 0.85, "grad_norm": 2.163875102996826, "learning_rate": 7.468254360393301e-06, "loss": 5.4957, "step": 17220 }, { "epoch": 0.85, "grad_norm": 2.0201468467712402, "learning_rate": 7.458372449231682e-06, "loss": 5.3797, "step": 17224 }, { "epoch": 0.85, "grad_norm": 1.9520927667617798, "learning_rate": 7.448490538070063e-06, "loss": 5.5663, "step": 17228 }, { "epoch": 0.85, "grad_norm": 1.9706037044525146, "learning_rate": 7.4386086269084445e-06, "loss": 5.4462, "step": 17232 }, { "epoch": 0.85, "grad_norm": 2.2334280014038086, "learning_rate": 7.428726715746826e-06, "loss": 5.5028, "step": 17236 }, { "epoch": 0.85, "grad_norm": 2.089432716369629, "learning_rate": 7.418844804585207e-06, "loss": 5.5537, "step": 17240 }, { "epoch": 0.85, "grad_norm": 2.0354325771331787, "learning_rate": 7.408962893423589e-06, "loss": 5.4, "step": 17244 }, { "epoch": 0.85, "grad_norm": 1.9282554388046265, "learning_rate": 7.39908098226197e-06, "loss": 5.3915, "step": 17248 }, { "epoch": 0.85, "grad_norm": 2.133868455886841, "learning_rate": 7.389199071100351e-06, "loss": 5.4868, "step": 17252 }, { "epoch": 0.85, "grad_norm": 2.0873701572418213, "learning_rate": 7.379317159938733e-06, "loss": 5.6375, "step": 17256 }, { "epoch": 0.85, "grad_norm": 1.9751038551330566, "learning_rate": 7.369435248777114e-06, "loss": 5.4291, "step": 17260 }, { "epoch": 0.85, "grad_norm": 1.8549004793167114, "learning_rate": 7.359553337615495e-06, "loss": 5.4953, "step": 17264 }, { "epoch": 0.85, "grad_norm": 1.9882365465164185, "learning_rate": 7.349671426453876e-06, "loss": 5.5191, "step": 17268 }, { "epoch": 0.85, "grad_norm": 2.0008509159088135, "learning_rate": 7.339789515292258e-06, "loss": 5.573, "step": 17272 }, { "epoch": 0.85, "grad_norm": 1.9084336757659912, "learning_rate": 7.3299076041306395e-06, "loss": 5.5864, "step": 17276 }, { "epoch": 0.85, "grad_norm": 2.3234941959381104, "learning_rate": 7.320025692969021e-06, "loss": 5.4648, "step": 17280 }, { "epoch": 0.85, "grad_norm": 2.033445358276367, "learning_rate": 7.310143781807402e-06, "loss": 5.5663, "step": 17284 }, { "epoch": 0.85, "grad_norm": 2.036726474761963, "learning_rate": 7.300261870645783e-06, "loss": 5.3706, "step": 17288 }, { "epoch": 0.85, "grad_norm": 2.135927677154541, "learning_rate": 7.290379959484164e-06, "loss": 5.4365, "step": 17292 }, { "epoch": 0.85, "grad_norm": 2.0434539318084717, "learning_rate": 7.280498048322546e-06, "loss": 5.4914, "step": 17296 }, { "epoch": 0.85, "grad_norm": 1.9364794492721558, "learning_rate": 7.270616137160927e-06, "loss": 5.358, "step": 17300 }, { "epoch": 0.85, "grad_norm": 2.1555495262145996, "learning_rate": 7.260734225999309e-06, "loss": 5.4667, "step": 17304 }, { "epoch": 0.86, "grad_norm": 2.0687687397003174, "learning_rate": 7.25085231483769e-06, "loss": 5.5075, "step": 17308 }, { "epoch": 0.86, "grad_norm": 2.2169644832611084, "learning_rate": 7.240970403676071e-06, "loss": 5.5186, "step": 17312 }, { "epoch": 0.86, "grad_norm": 2.0690207481384277, "learning_rate": 7.2310884925144525e-06, "loss": 5.4944, "step": 17316 }, { "epoch": 0.86, "grad_norm": 2.172851324081421, "learning_rate": 7.221206581352834e-06, "loss": 5.5178, "step": 17320 }, { "epoch": 0.86, "grad_norm": 2.178602457046509, "learning_rate": 7.211324670191215e-06, "loss": 5.5693, "step": 17324 }, { "epoch": 0.86, "grad_norm": 1.9525049924850464, "learning_rate": 7.201442759029596e-06, "loss": 5.3773, "step": 17328 }, { "epoch": 0.86, "grad_norm": 2.0250043869018555, "learning_rate": 7.191560847867978e-06, "loss": 5.4672, "step": 17332 }, { "epoch": 0.86, "grad_norm": 2.229799747467041, "learning_rate": 7.1816789367063594e-06, "loss": 5.4451, "step": 17336 }, { "epoch": 0.86, "grad_norm": 2.2048773765563965, "learning_rate": 7.171797025544741e-06, "loss": 5.5388, "step": 17340 }, { "epoch": 0.86, "grad_norm": 2.1948986053466797, "learning_rate": 7.161915114383122e-06, "loss": 5.4121, "step": 17344 }, { "epoch": 0.86, "grad_norm": 2.410446882247925, "learning_rate": 7.152033203221503e-06, "loss": 5.5068, "step": 17348 }, { "epoch": 0.86, "grad_norm": 2.0198326110839844, "learning_rate": 7.142151292059884e-06, "loss": 5.4786, "step": 17352 }, { "epoch": 0.86, "grad_norm": 2.1943955421447754, "learning_rate": 7.1322693808982655e-06, "loss": 5.4957, "step": 17356 }, { "epoch": 0.86, "grad_norm": 2.1132426261901855, "learning_rate": 7.122387469736648e-06, "loss": 5.3965, "step": 17360 }, { "epoch": 0.86, "grad_norm": 2.0462357997894287, "learning_rate": 7.112505558575029e-06, "loss": 5.5153, "step": 17364 }, { "epoch": 0.86, "grad_norm": 2.0501723289489746, "learning_rate": 7.10262364741341e-06, "loss": 5.5062, "step": 17368 }, { "epoch": 0.86, "grad_norm": 2.148674726486206, "learning_rate": 7.092741736251791e-06, "loss": 5.2946, "step": 17372 }, { "epoch": 0.86, "grad_norm": 2.0384411811828613, "learning_rate": 7.082859825090172e-06, "loss": 5.6131, "step": 17376 }, { "epoch": 0.86, "grad_norm": 2.235848903656006, "learning_rate": 7.072977913928554e-06, "loss": 5.3982, "step": 17380 }, { "epoch": 0.86, "grad_norm": 2.0050299167633057, "learning_rate": 7.063096002766935e-06, "loss": 5.5681, "step": 17384 }, { "epoch": 0.86, "grad_norm": 1.9482308626174927, "learning_rate": 7.053214091605318e-06, "loss": 5.4242, "step": 17388 }, { "epoch": 0.86, "grad_norm": 2.077125072479248, "learning_rate": 7.043332180443699e-06, "loss": 5.4092, "step": 17392 }, { "epoch": 0.86, "grad_norm": 2.2242355346679688, "learning_rate": 7.033450269282079e-06, "loss": 5.4268, "step": 17396 }, { "epoch": 0.86, "grad_norm": 2.2366597652435303, "learning_rate": 7.0235683581204605e-06, "loss": 5.4304, "step": 17400 }, { "epoch": 0.86, "grad_norm": 2.3268561363220215, "learning_rate": 7.013686446958842e-06, "loss": 5.4141, "step": 17404 }, { "epoch": 0.86, "grad_norm": 2.1040186882019043, "learning_rate": 7.003804535797223e-06, "loss": 5.4129, "step": 17408 }, { "epoch": 0.86, "grad_norm": 2.0050957202911377, "learning_rate": 6.993922624635604e-06, "loss": 5.5029, "step": 17412 }, { "epoch": 0.86, "grad_norm": 1.914214849472046, "learning_rate": 6.984040713473987e-06, "loss": 5.5394, "step": 17416 }, { "epoch": 0.86, "grad_norm": 2.112946033477783, "learning_rate": 6.974158802312368e-06, "loss": 5.4547, "step": 17420 }, { "epoch": 0.86, "grad_norm": 1.980510950088501, "learning_rate": 6.9642768911507495e-06, "loss": 5.4997, "step": 17424 }, { "epoch": 0.86, "grad_norm": 1.985985517501831, "learning_rate": 6.954394979989131e-06, "loss": 5.3671, "step": 17428 }, { "epoch": 0.86, "grad_norm": 1.897262454032898, "learning_rate": 6.944513068827511e-06, "loss": 5.4578, "step": 17432 }, { "epoch": 0.86, "grad_norm": 1.9851828813552856, "learning_rate": 6.934631157665892e-06, "loss": 5.4318, "step": 17436 }, { "epoch": 0.86, "grad_norm": 1.8977246284484863, "learning_rate": 6.9247492465042735e-06, "loss": 5.4777, "step": 17440 }, { "epoch": 0.86, "grad_norm": 2.1280171871185303, "learning_rate": 6.9148673353426564e-06, "loss": 5.583, "step": 17444 }, { "epoch": 0.86, "grad_norm": 2.3764641284942627, "learning_rate": 6.904985424181038e-06, "loss": 5.5559, "step": 17448 }, { "epoch": 0.86, "grad_norm": 1.9994136095046997, "learning_rate": 6.895103513019419e-06, "loss": 5.5662, "step": 17452 }, { "epoch": 0.86, "grad_norm": 2.108659267425537, "learning_rate": 6.8852216018578e-06, "loss": 5.4098, "step": 17456 }, { "epoch": 0.86, "grad_norm": 1.9477959871292114, "learning_rate": 6.875339690696181e-06, "loss": 5.4161, "step": 17460 }, { "epoch": 0.86, "grad_norm": 2.2120134830474854, "learning_rate": 6.865457779534562e-06, "loss": 5.5344, "step": 17464 }, { "epoch": 0.86, "grad_norm": 1.9351931810379028, "learning_rate": 6.855575868372943e-06, "loss": 5.5115, "step": 17468 }, { "epoch": 0.86, "grad_norm": 1.9376587867736816, "learning_rate": 6.845693957211324e-06, "loss": 5.4218, "step": 17472 }, { "epoch": 0.86, "grad_norm": 1.8527143001556396, "learning_rate": 6.835812046049707e-06, "loss": 5.4781, "step": 17476 }, { "epoch": 0.86, "grad_norm": 1.9370919466018677, "learning_rate": 6.825930134888088e-06, "loss": 5.6185, "step": 17480 }, { "epoch": 0.86, "grad_norm": 1.8956094980239868, "learning_rate": 6.816048223726469e-06, "loss": 5.5335, "step": 17484 }, { "epoch": 0.86, "grad_norm": 2.1358373165130615, "learning_rate": 6.806166312564851e-06, "loss": 5.5501, "step": 17488 }, { "epoch": 0.86, "grad_norm": 1.8500255346298218, "learning_rate": 6.796284401403232e-06, "loss": 5.4718, "step": 17492 }, { "epoch": 0.86, "grad_norm": 1.9620647430419922, "learning_rate": 6.786402490241612e-06, "loss": 5.4566, "step": 17496 }, { "epoch": 0.86, "grad_norm": 2.0902743339538574, "learning_rate": 6.776520579079993e-06, "loss": 5.484, "step": 17500 }, { "epoch": 0.86, "grad_norm": 2.329399824142456, "learning_rate": 6.766638667918376e-06, "loss": 5.5337, "step": 17504 }, { "epoch": 0.87, "grad_norm": 1.9751675128936768, "learning_rate": 6.7567567567567575e-06, "loss": 5.4638, "step": 17508 }, { "epoch": 0.87, "grad_norm": 2.188885450363159, "learning_rate": 6.746874845595139e-06, "loss": 5.5174, "step": 17512 }, { "epoch": 0.87, "grad_norm": 2.1230249404907227, "learning_rate": 6.73699293443352e-06, "loss": 5.4525, "step": 17516 }, { "epoch": 0.87, "grad_norm": 2.0748202800750732, "learning_rate": 6.727111023271901e-06, "loss": 5.5633, "step": 17520 }, { "epoch": 0.87, "grad_norm": 1.8766546249389648, "learning_rate": 6.717229112110282e-06, "loss": 5.5295, "step": 17524 }, { "epoch": 0.87, "grad_norm": 2.1104044914245605, "learning_rate": 6.707347200948664e-06, "loss": 5.5265, "step": 17528 }, { "epoch": 0.87, "grad_norm": 1.9020673036575317, "learning_rate": 6.697465289787046e-06, "loss": 5.4684, "step": 17532 }, { "epoch": 0.87, "grad_norm": 1.9456652402877808, "learning_rate": 6.687583378625427e-06, "loss": 5.3826, "step": 17536 }, { "epoch": 0.87, "grad_norm": 2.117117166519165, "learning_rate": 6.677701467463808e-06, "loss": 5.4525, "step": 17540 }, { "epoch": 0.87, "grad_norm": 2.0873782634735107, "learning_rate": 6.667819556302189e-06, "loss": 5.5402, "step": 17544 }, { "epoch": 0.87, "grad_norm": 2.0289838314056396, "learning_rate": 6.6579376451405705e-06, "loss": 5.5603, "step": 17548 }, { "epoch": 0.87, "grad_norm": 2.2275471687316895, "learning_rate": 6.648055733978952e-06, "loss": 5.3792, "step": 17552 }, { "epoch": 0.87, "grad_norm": 1.9133155345916748, "learning_rate": 6.638173822817333e-06, "loss": 5.4289, "step": 17556 }, { "epoch": 0.87, "grad_norm": 2.193645477294922, "learning_rate": 6.628291911655715e-06, "loss": 5.5224, "step": 17560 }, { "epoch": 0.87, "grad_norm": 2.1608972549438477, "learning_rate": 6.618410000494096e-06, "loss": 5.5412, "step": 17564 }, { "epoch": 0.87, "grad_norm": 2.141594648361206, "learning_rate": 6.6085280893324774e-06, "loss": 5.4787, "step": 17568 }, { "epoch": 0.87, "grad_norm": 1.9416935443878174, "learning_rate": 6.598646178170859e-06, "loss": 5.3674, "step": 17572 }, { "epoch": 0.87, "grad_norm": 2.110677480697632, "learning_rate": 6.58876426700924e-06, "loss": 5.4024, "step": 17576 }, { "epoch": 0.87, "grad_norm": 2.2235372066497803, "learning_rate": 6.578882355847621e-06, "loss": 5.4458, "step": 17580 }, { "epoch": 0.87, "grad_norm": 2.280282974243164, "learning_rate": 6.569000444686002e-06, "loss": 5.5981, "step": 17584 }, { "epoch": 0.87, "grad_norm": 2.1084625720977783, "learning_rate": 6.559118533524384e-06, "loss": 5.4475, "step": 17588 }, { "epoch": 0.87, "grad_norm": 2.004232406616211, "learning_rate": 6.5492366223627656e-06, "loss": 5.4314, "step": 17592 }, { "epoch": 0.87, "grad_norm": 1.9286199808120728, "learning_rate": 6.539354711201147e-06, "loss": 5.3943, "step": 17596 }, { "epoch": 0.87, "grad_norm": 1.9742239713668823, "learning_rate": 6.529472800039528e-06, "loss": 5.4633, "step": 17600 }, { "epoch": 0.87, "grad_norm": 2.1503305435180664, "learning_rate": 6.519590888877909e-06, "loss": 5.4654, "step": 17604 }, { "epoch": 0.87, "grad_norm": 1.996319055557251, "learning_rate": 6.50970897771629e-06, "loss": 5.4479, "step": 17608 }, { "epoch": 0.87, "grad_norm": 2.1689870357513428, "learning_rate": 6.499827066554672e-06, "loss": 5.4242, "step": 17612 }, { "epoch": 0.87, "grad_norm": 2.0061464309692383, "learning_rate": 6.489945155393053e-06, "loss": 5.3833, "step": 17616 }, { "epoch": 0.87, "grad_norm": 2.1201388835906982, "learning_rate": 6.480063244231435e-06, "loss": 5.4374, "step": 17620 }, { "epoch": 0.87, "grad_norm": 2.196545124053955, "learning_rate": 6.470181333069816e-06, "loss": 5.3564, "step": 17624 }, { "epoch": 0.87, "grad_norm": 2.073232412338257, "learning_rate": 6.460299421908197e-06, "loss": 5.4057, "step": 17628 }, { "epoch": 0.87, "grad_norm": 1.9354524612426758, "learning_rate": 6.4504175107465785e-06, "loss": 5.4718, "step": 17632 }, { "epoch": 0.87, "grad_norm": 2.032994508743286, "learning_rate": 6.44053559958496e-06, "loss": 5.358, "step": 17636 }, { "epoch": 0.87, "grad_norm": 2.130598545074463, "learning_rate": 6.430653688423341e-06, "loss": 5.4658, "step": 17640 }, { "epoch": 0.87, "grad_norm": 1.8692468404769897, "learning_rate": 6.420771777261722e-06, "loss": 5.4004, "step": 17644 }, { "epoch": 0.87, "grad_norm": 1.9368531703948975, "learning_rate": 6.410889866100104e-06, "loss": 5.4809, "step": 17648 }, { "epoch": 0.87, "grad_norm": 2.235506534576416, "learning_rate": 6.4010079549384855e-06, "loss": 5.4532, "step": 17652 }, { "epoch": 0.87, "grad_norm": 1.9980324506759644, "learning_rate": 6.391126043776867e-06, "loss": 5.5156, "step": 17656 }, { "epoch": 0.87, "grad_norm": 1.947649598121643, "learning_rate": 6.381244132615248e-06, "loss": 5.3676, "step": 17660 }, { "epoch": 0.87, "grad_norm": 2.365041971206665, "learning_rate": 6.371362221453629e-06, "loss": 5.4387, "step": 17664 }, { "epoch": 0.87, "grad_norm": 2.2820627689361572, "learning_rate": 6.36148031029201e-06, "loss": 5.4541, "step": 17668 }, { "epoch": 0.87, "grad_norm": 2.02691650390625, "learning_rate": 6.3515983991303915e-06, "loss": 5.5084, "step": 17672 }, { "epoch": 0.87, "grad_norm": 2.0064783096313477, "learning_rate": 6.3417164879687744e-06, "loss": 5.5205, "step": 17676 }, { "epoch": 0.87, "grad_norm": 1.9961150884628296, "learning_rate": 6.331834576807155e-06, "loss": 5.3904, "step": 17680 }, { "epoch": 0.87, "grad_norm": 2.2273404598236084, "learning_rate": 6.321952665645536e-06, "loss": 5.4756, "step": 17684 }, { "epoch": 0.87, "grad_norm": 2.078472852706909, "learning_rate": 6.312070754483917e-06, "loss": 5.5535, "step": 17688 }, { "epoch": 0.87, "grad_norm": 1.9248629808425903, "learning_rate": 6.3021888433222984e-06, "loss": 5.4378, "step": 17692 }, { "epoch": 0.87, "grad_norm": 2.1985530853271484, "learning_rate": 6.29230693216068e-06, "loss": 5.4345, "step": 17696 }, { "epoch": 0.87, "grad_norm": 2.087536096572876, "learning_rate": 6.282425020999061e-06, "loss": 5.5135, "step": 17700 }, { "epoch": 0.87, "grad_norm": 2.0699515342712402, "learning_rate": 6.272543109837444e-06, "loss": 5.3821, "step": 17704 }, { "epoch": 0.87, "grad_norm": 2.309680223464966, "learning_rate": 6.262661198675825e-06, "loss": 5.3426, "step": 17708 }, { "epoch": 0.88, "grad_norm": 1.9877557754516602, "learning_rate": 6.252779287514206e-06, "loss": 5.554, "step": 17712 }, { "epoch": 0.88, "grad_norm": 2.1621484756469727, "learning_rate": 6.2428973763525866e-06, "loss": 5.4897, "step": 17716 }, { "epoch": 0.88, "grad_norm": 2.0568161010742188, "learning_rate": 6.233015465190968e-06, "loss": 5.4726, "step": 17720 }, { "epoch": 0.88, "grad_norm": 1.8659361600875854, "learning_rate": 6.22313355402935e-06, "loss": 5.461, "step": 17724 }, { "epoch": 0.88, "grad_norm": 1.8461517095565796, "learning_rate": 6.213251642867731e-06, "loss": 5.3407, "step": 17728 }, { "epoch": 0.88, "grad_norm": 2.2194485664367676, "learning_rate": 6.203369731706112e-06, "loss": 5.3863, "step": 17732 }, { "epoch": 0.88, "grad_norm": 2.2594525814056396, "learning_rate": 6.1934878205444935e-06, "loss": 5.4737, "step": 17736 }, { "epoch": 0.88, "grad_norm": 2.067777156829834, "learning_rate": 6.1836059093828755e-06, "loss": 5.4817, "step": 17740 }, { "epoch": 0.88, "grad_norm": 1.9696800708770752, "learning_rate": 6.173723998221257e-06, "loss": 5.5215, "step": 17744 }, { "epoch": 0.88, "grad_norm": 1.9700802564620972, "learning_rate": 6.163842087059637e-06, "loss": 5.5821, "step": 17748 }, { "epoch": 0.88, "grad_norm": 2.2519845962524414, "learning_rate": 6.153960175898018e-06, "loss": 5.5501, "step": 17752 }, { "epoch": 0.88, "grad_norm": 2.1531550884246826, "learning_rate": 6.1440782647364e-06, "loss": 5.354, "step": 17756 }, { "epoch": 0.88, "grad_norm": 2.299639940261841, "learning_rate": 6.134196353574782e-06, "loss": 5.5487, "step": 17760 }, { "epoch": 0.88, "grad_norm": 1.9032407999038696, "learning_rate": 6.124314442413163e-06, "loss": 5.4551, "step": 17764 }, { "epoch": 0.88, "grad_norm": 2.121720552444458, "learning_rate": 6.114432531251545e-06, "loss": 5.4185, "step": 17768 }, { "epoch": 0.88, "grad_norm": 1.955588698387146, "learning_rate": 6.104550620089926e-06, "loss": 5.5947, "step": 17772 }, { "epoch": 0.88, "grad_norm": 1.9518580436706543, "learning_rate": 6.094668708928307e-06, "loss": 5.4159, "step": 17776 }, { "epoch": 0.88, "grad_norm": 2.2284739017486572, "learning_rate": 6.084786797766688e-06, "loss": 5.4806, "step": 17780 }, { "epoch": 0.88, "grad_norm": 1.9473198652267456, "learning_rate": 6.07490488660507e-06, "loss": 5.3457, "step": 17784 }, { "epoch": 0.88, "grad_norm": 2.25762939453125, "learning_rate": 6.065022975443451e-06, "loss": 5.3905, "step": 17788 }, { "epoch": 0.88, "grad_norm": 2.13055682182312, "learning_rate": 6.055141064281832e-06, "loss": 5.4596, "step": 17792 }, { "epoch": 0.88, "grad_norm": 2.078608751296997, "learning_rate": 6.045259153120214e-06, "loss": 5.4836, "step": 17796 }, { "epoch": 0.88, "grad_norm": 2.032860040664673, "learning_rate": 6.0353772419585954e-06, "loss": 5.3315, "step": 17800 }, { "epoch": 0.88, "grad_norm": 2.18186616897583, "learning_rate": 6.025495330796977e-06, "loss": 5.5165, "step": 17804 }, { "epoch": 0.88, "grad_norm": 1.8949894905090332, "learning_rate": 6.015613419635358e-06, "loss": 5.4296, "step": 17808 }, { "epoch": 0.88, "grad_norm": 1.9019147157669067, "learning_rate": 6.005731508473739e-06, "loss": 5.3742, "step": 17812 }, { "epoch": 0.88, "grad_norm": 1.9749938249588013, "learning_rate": 5.99584959731212e-06, "loss": 5.4702, "step": 17816 }, { "epoch": 0.88, "grad_norm": 1.9528026580810547, "learning_rate": 5.9859676861505015e-06, "loss": 5.4179, "step": 17820 }, { "epoch": 0.88, "grad_norm": 2.04555082321167, "learning_rate": 5.976085774988883e-06, "loss": 5.4807, "step": 17824 }, { "epoch": 0.88, "grad_norm": 2.2078750133514404, "learning_rate": 5.966203863827265e-06, "loss": 5.4206, "step": 17828 }, { "epoch": 0.88, "grad_norm": 2.1232731342315674, "learning_rate": 5.956321952665646e-06, "loss": 5.5452, "step": 17832 }, { "epoch": 0.88, "grad_norm": 1.724265217781067, "learning_rate": 5.946440041504027e-06, "loss": 5.3831, "step": 17836 }, { "epoch": 0.88, "grad_norm": 2.0802602767944336, "learning_rate": 5.936558130342408e-06, "loss": 5.3925, "step": 17840 }, { "epoch": 0.88, "grad_norm": 1.913464069366455, "learning_rate": 5.92667621918079e-06, "loss": 5.4786, "step": 17844 }, { "epoch": 0.88, "grad_norm": 1.9357552528381348, "learning_rate": 5.916794308019171e-06, "loss": 5.4415, "step": 17848 }, { "epoch": 0.88, "grad_norm": 1.9869678020477295, "learning_rate": 5.906912396857552e-06, "loss": 5.4731, "step": 17852 }, { "epoch": 0.88, "grad_norm": 2.1964402198791504, "learning_rate": 5.897030485695934e-06, "loss": 5.4538, "step": 17856 }, { "epoch": 0.88, "grad_norm": 2.2341887950897217, "learning_rate": 5.887148574534315e-06, "loss": 5.4484, "step": 17860 }, { "epoch": 0.88, "grad_norm": 2.2685790061950684, "learning_rate": 5.8772666633726965e-06, "loss": 5.4935, "step": 17864 }, { "epoch": 0.88, "grad_norm": 1.9977366924285889, "learning_rate": 5.867384752211079e-06, "loss": 5.4866, "step": 17868 }, { "epoch": 0.88, "grad_norm": 1.8025336265563965, "learning_rate": 5.857502841049459e-06, "loss": 5.5315, "step": 17872 }, { "epoch": 0.88, "grad_norm": 1.8028703927993774, "learning_rate": 5.84762092988784e-06, "loss": 5.4048, "step": 17876 }, { "epoch": 0.88, "grad_norm": 2.0206375122070312, "learning_rate": 5.837739018726221e-06, "loss": 5.5057, "step": 17880 }, { "epoch": 0.88, "grad_norm": 2.2146549224853516, "learning_rate": 5.8278571075646034e-06, "loss": 5.4375, "step": 17884 }, { "epoch": 0.88, "grad_norm": 2.1485488414764404, "learning_rate": 5.817975196402985e-06, "loss": 5.4375, "step": 17888 }, { "epoch": 0.88, "grad_norm": 1.9976389408111572, "learning_rate": 5.808093285241366e-06, "loss": 5.5645, "step": 17892 }, { "epoch": 0.88, "grad_norm": 2.1577677726745605, "learning_rate": 5.798211374079747e-06, "loss": 5.4917, "step": 17896 }, { "epoch": 0.88, "grad_norm": 2.085784673690796, "learning_rate": 5.788329462918129e-06, "loss": 5.5115, "step": 17900 }, { "epoch": 0.88, "grad_norm": 2.250061511993408, "learning_rate": 5.77844755175651e-06, "loss": 5.4932, "step": 17904 }, { "epoch": 0.88, "grad_norm": 2.076542854309082, "learning_rate": 5.768565640594891e-06, "loss": 5.3858, "step": 17908 }, { "epoch": 0.89, "grad_norm": 2.0458431243896484, "learning_rate": 5.761154207223677e-06, "loss": 5.5596, "step": 17912 }, { "epoch": 0.89, "grad_norm": 2.075693130493164, "learning_rate": 5.7512722960620585e-06, "loss": 5.4962, "step": 17916 }, { "epoch": 0.89, "grad_norm": 2.09002685546875, "learning_rate": 5.74139038490044e-06, "loss": 5.3926, "step": 17920 }, { "epoch": 0.89, "grad_norm": 2.0116419792175293, "learning_rate": 5.731508473738821e-06, "loss": 5.4649, "step": 17924 }, { "epoch": 0.89, "grad_norm": 2.0170979499816895, "learning_rate": 5.721626562577203e-06, "loss": 5.4906, "step": 17928 }, { "epoch": 0.89, "grad_norm": 2.033344268798828, "learning_rate": 5.711744651415584e-06, "loss": 5.5144, "step": 17932 }, { "epoch": 0.89, "grad_norm": 1.9842782020568848, "learning_rate": 5.701862740253965e-06, "loss": 5.4478, "step": 17936 }, { "epoch": 0.89, "grad_norm": 2.059737205505371, "learning_rate": 5.691980829092347e-06, "loss": 5.4969, "step": 17940 }, { "epoch": 0.89, "grad_norm": 2.116508960723877, "learning_rate": 5.682098917930728e-06, "loss": 5.5734, "step": 17944 }, { "epoch": 0.89, "grad_norm": 2.062220573425293, "learning_rate": 5.672217006769109e-06, "loss": 5.3472, "step": 17948 }, { "epoch": 0.89, "grad_norm": 2.176339626312256, "learning_rate": 5.66233509560749e-06, "loss": 5.3356, "step": 17952 }, { "epoch": 0.89, "grad_norm": 2.2417047023773193, "learning_rate": 5.652453184445872e-06, "loss": 5.429, "step": 17956 }, { "epoch": 0.89, "grad_norm": 1.9376815557479858, "learning_rate": 5.6425712732842535e-06, "loss": 5.4263, "step": 17960 }, { "epoch": 0.89, "grad_norm": 1.9888767004013062, "learning_rate": 5.632689362122635e-06, "loss": 5.5286, "step": 17964 }, { "epoch": 0.89, "grad_norm": 2.1054704189300537, "learning_rate": 5.622807450961016e-06, "loss": 5.4473, "step": 17968 }, { "epoch": 0.89, "grad_norm": 2.3070156574249268, "learning_rate": 5.612925539799398e-06, "loss": 5.6394, "step": 17972 }, { "epoch": 0.89, "grad_norm": 1.956694483757019, "learning_rate": 5.603043628637779e-06, "loss": 5.3773, "step": 17976 }, { "epoch": 0.89, "grad_norm": 2.1512622833251953, "learning_rate": 5.59316171747616e-06, "loss": 5.5051, "step": 17980 }, { "epoch": 0.89, "grad_norm": 1.9715803861618042, "learning_rate": 5.583279806314542e-06, "loss": 5.4564, "step": 17984 }, { "epoch": 0.89, "grad_norm": 2.07094144821167, "learning_rate": 5.573397895152923e-06, "loss": 5.486, "step": 17988 }, { "epoch": 0.89, "grad_norm": 2.0776047706604004, "learning_rate": 5.563515983991304e-06, "loss": 5.4101, "step": 17992 }, { "epoch": 0.89, "grad_norm": 2.0641090869903564, "learning_rate": 5.553634072829685e-06, "loss": 5.4413, "step": 17996 }, { "epoch": 0.89, "grad_norm": 2.069200038909912, "learning_rate": 5.543752161668067e-06, "loss": 5.4239, "step": 18000 }, { "epoch": 0.89, "grad_norm": 1.9378856420516968, "learning_rate": 5.5338702505064486e-06, "loss": 5.4994, "step": 18004 }, { "epoch": 0.89, "grad_norm": 1.9170506000518799, "learning_rate": 5.52398833934483e-06, "loss": 5.4589, "step": 18008 }, { "epoch": 0.89, "grad_norm": 2.1123738288879395, "learning_rate": 5.514106428183211e-06, "loss": 5.4226, "step": 18012 }, { "epoch": 0.89, "grad_norm": 1.9895274639129639, "learning_rate": 5.504224517021592e-06, "loss": 5.522, "step": 18016 }, { "epoch": 0.89, "grad_norm": 2.216384172439575, "learning_rate": 5.494342605859973e-06, "loss": 5.4323, "step": 18020 }, { "epoch": 0.89, "grad_norm": 2.1332645416259766, "learning_rate": 5.484460694698355e-06, "loss": 5.3666, "step": 18024 }, { "epoch": 0.89, "grad_norm": 2.0537400245666504, "learning_rate": 5.474578783536737e-06, "loss": 5.4988, "step": 18028 }, { "epoch": 0.89, "grad_norm": 1.9897429943084717, "learning_rate": 5.464696872375118e-06, "loss": 5.4833, "step": 18032 }, { "epoch": 0.89, "grad_norm": 2.066513776779175, "learning_rate": 5.454814961213499e-06, "loss": 5.4636, "step": 18036 }, { "epoch": 0.89, "grad_norm": 2.251376152038574, "learning_rate": 5.44493305005188e-06, "loss": 5.5205, "step": 18040 }, { "epoch": 0.89, "grad_norm": 2.1560051441192627, "learning_rate": 5.4350511388902615e-06, "loss": 5.4888, "step": 18044 }, { "epoch": 0.89, "grad_norm": 1.9921746253967285, "learning_rate": 5.425169227728643e-06, "loss": 5.503, "step": 18048 }, { "epoch": 0.89, "grad_norm": 1.9806662797927856, "learning_rate": 5.415287316567024e-06, "loss": 5.5299, "step": 18052 }, { "epoch": 0.89, "grad_norm": 2.086308479309082, "learning_rate": 5.405405405405406e-06, "loss": 5.3775, "step": 18056 }, { "epoch": 0.89, "grad_norm": 1.9652711153030396, "learning_rate": 5.395523494243787e-06, "loss": 5.5683, "step": 18060 }, { "epoch": 0.89, "grad_norm": 1.8376736640930176, "learning_rate": 5.3856415830821685e-06, "loss": 5.3905, "step": 18064 }, { "epoch": 0.89, "grad_norm": 2.1515750885009766, "learning_rate": 5.37575967192055e-06, "loss": 5.421, "step": 18068 }, { "epoch": 0.89, "grad_norm": 2.18635630607605, "learning_rate": 5.365877760758931e-06, "loss": 5.4513, "step": 18072 }, { "epoch": 0.89, "grad_norm": 2.2322137355804443, "learning_rate": 5.355995849597312e-06, "loss": 5.5696, "step": 18076 }, { "epoch": 0.89, "grad_norm": 2.1388771533966064, "learning_rate": 5.346113938435693e-06, "loss": 5.5154, "step": 18080 }, { "epoch": 0.89, "grad_norm": 2.1032564640045166, "learning_rate": 5.336232027274075e-06, "loss": 5.4737, "step": 18084 }, { "epoch": 0.89, "grad_norm": 2.2419564723968506, "learning_rate": 5.326350116112457e-06, "loss": 5.4804, "step": 18088 }, { "epoch": 0.89, "grad_norm": 2.1092734336853027, "learning_rate": 5.316468204950838e-06, "loss": 5.523, "step": 18092 }, { "epoch": 0.89, "grad_norm": 1.9642736911773682, "learning_rate": 5.306586293789219e-06, "loss": 5.4615, "step": 18096 }, { "epoch": 0.89, "grad_norm": 2.285712480545044, "learning_rate": 5.296704382627601e-06, "loss": 5.4944, "step": 18100 }, { "epoch": 0.89, "grad_norm": 1.8048274517059326, "learning_rate": 5.2868224714659814e-06, "loss": 5.3293, "step": 18104 }, { "epoch": 0.89, "grad_norm": 1.9001215696334839, "learning_rate": 5.276940560304363e-06, "loss": 5.3799, "step": 18108 }, { "epoch": 0.89, "grad_norm": 2.052248954772949, "learning_rate": 5.267058649142744e-06, "loss": 5.54, "step": 18112 }, { "epoch": 0.9, "grad_norm": 1.9618264436721802, "learning_rate": 5.257176737981126e-06, "loss": 5.5206, "step": 18116 }, { "epoch": 0.9, "grad_norm": 2.086357355117798, "learning_rate": 5.247294826819507e-06, "loss": 5.4713, "step": 18120 }, { "epoch": 0.9, "grad_norm": 1.989790678024292, "learning_rate": 5.237412915657888e-06, "loss": 5.4183, "step": 18124 }, { "epoch": 0.9, "grad_norm": 2.4142305850982666, "learning_rate": 5.22753100449627e-06, "loss": 5.4867, "step": 18128 }, { "epoch": 0.9, "grad_norm": 1.9405925273895264, "learning_rate": 5.217649093334652e-06, "loss": 5.4259, "step": 18132 }, { "epoch": 0.9, "grad_norm": 1.9569774866104126, "learning_rate": 5.207767182173033e-06, "loss": 5.4619, "step": 18136 }, { "epoch": 0.9, "grad_norm": 2.1723146438598633, "learning_rate": 5.197885271011413e-06, "loss": 5.4719, "step": 18140 }, { "epoch": 0.9, "grad_norm": 2.3420112133026123, "learning_rate": 5.188003359849795e-06, "loss": 5.4274, "step": 18144 }, { "epoch": 0.9, "grad_norm": 2.2983791828155518, "learning_rate": 5.1781214486881765e-06, "loss": 5.5184, "step": 18148 }, { "epoch": 0.9, "grad_norm": 2.061795711517334, "learning_rate": 5.168239537526558e-06, "loss": 5.4416, "step": 18152 }, { "epoch": 0.9, "grad_norm": 2.11879301071167, "learning_rate": 5.15835762636494e-06, "loss": 5.3997, "step": 18156 }, { "epoch": 0.9, "grad_norm": 2.072601556777954, "learning_rate": 5.148475715203321e-06, "loss": 5.4744, "step": 18160 }, { "epoch": 0.9, "grad_norm": 2.037374258041382, "learning_rate": 5.138593804041702e-06, "loss": 5.5549, "step": 18164 }, { "epoch": 0.9, "grad_norm": 2.1012215614318848, "learning_rate": 5.128711892880083e-06, "loss": 5.5601, "step": 18168 }, { "epoch": 0.9, "grad_norm": 1.9614689350128174, "learning_rate": 5.118829981718465e-06, "loss": 5.3775, "step": 18172 }, { "epoch": 0.9, "grad_norm": 2.3375091552734375, "learning_rate": 5.108948070556846e-06, "loss": 5.5373, "step": 18176 }, { "epoch": 0.9, "grad_norm": 1.9838519096374512, "learning_rate": 5.099066159395227e-06, "loss": 5.3491, "step": 18180 }, { "epoch": 0.9, "grad_norm": 2.0909204483032227, "learning_rate": 5.089184248233608e-06, "loss": 5.3386, "step": 18184 }, { "epoch": 0.9, "grad_norm": 2.396127223968506, "learning_rate": 5.07930233707199e-06, "loss": 5.4791, "step": 18188 }, { "epoch": 0.9, "grad_norm": 2.117344379425049, "learning_rate": 5.0694204259103715e-06, "loss": 5.3322, "step": 18192 }, { "epoch": 0.9, "grad_norm": 2.0109291076660156, "learning_rate": 5.059538514748753e-06, "loss": 5.4152, "step": 18196 }, { "epoch": 0.9, "grad_norm": 2.051154851913452, "learning_rate": 5.049656603587134e-06, "loss": 5.504, "step": 18200 }, { "epoch": 0.9, "grad_norm": 1.9939186573028564, "learning_rate": 5.039774692425515e-06, "loss": 5.377, "step": 18204 }, { "epoch": 0.9, "grad_norm": 2.0900635719299316, "learning_rate": 5.029892781263896e-06, "loss": 5.4448, "step": 18208 }, { "epoch": 0.9, "grad_norm": 1.7526922225952148, "learning_rate": 5.020010870102278e-06, "loss": 5.2948, "step": 18212 }, { "epoch": 0.9, "grad_norm": 2.082535982131958, "learning_rate": 5.01012895894066e-06, "loss": 5.4976, "step": 18216 }, { "epoch": 0.9, "grad_norm": 2.088693380355835, "learning_rate": 5.000247047779041e-06, "loss": 5.4579, "step": 18220 }, { "epoch": 0.9, "grad_norm": 1.9704039096832275, "learning_rate": 4.990365136617422e-06, "loss": 5.5093, "step": 18224 }, { "epoch": 0.9, "grad_norm": 2.2970244884490967, "learning_rate": 4.980483225455803e-06, "loss": 5.5017, "step": 18228 }, { "epoch": 0.9, "grad_norm": 2.0478765964508057, "learning_rate": 4.9706013142941845e-06, "loss": 5.4761, "step": 18232 }, { "epoch": 0.9, "grad_norm": 2.1008124351501465, "learning_rate": 4.960719403132566e-06, "loss": 5.5902, "step": 18236 }, { "epoch": 0.9, "grad_norm": 2.41119384765625, "learning_rate": 4.950837491970947e-06, "loss": 5.613, "step": 18240 }, { "epoch": 0.9, "grad_norm": 1.9696173667907715, "learning_rate": 4.940955580809329e-06, "loss": 5.4408, "step": 18244 }, { "epoch": 0.9, "grad_norm": 2.0599782466888428, "learning_rate": 4.93107366964771e-06, "loss": 5.455, "step": 18248 }, { "epoch": 0.9, "grad_norm": 2.1011674404144287, "learning_rate": 4.921191758486091e-06, "loss": 5.4402, "step": 18252 }, { "epoch": 0.9, "grad_norm": 2.209735870361328, "learning_rate": 4.9113098473244735e-06, "loss": 5.378, "step": 18256 }, { "epoch": 0.9, "grad_norm": 2.093017101287842, "learning_rate": 4.901427936162855e-06, "loss": 5.4387, "step": 18260 }, { "epoch": 0.9, "grad_norm": 2.14595103263855, "learning_rate": 4.891546025001235e-06, "loss": 5.5165, "step": 18264 }, { "epoch": 0.9, "grad_norm": 2.3589043617248535, "learning_rate": 4.881664113839616e-06, "loss": 5.4973, "step": 18268 }, { "epoch": 0.9, "grad_norm": 2.041437864303589, "learning_rate": 4.871782202677998e-06, "loss": 5.5404, "step": 18272 }, { "epoch": 0.9, "grad_norm": 2.244915723800659, "learning_rate": 4.8619002915163795e-06, "loss": 5.3885, "step": 18276 }, { "epoch": 0.9, "grad_norm": 2.1960060596466064, "learning_rate": 4.852018380354761e-06, "loss": 5.534, "step": 18280 }, { "epoch": 0.9, "grad_norm": 2.032874345779419, "learning_rate": 4.842136469193142e-06, "loss": 5.4292, "step": 18284 }, { "epoch": 0.9, "grad_norm": 1.9136545658111572, "learning_rate": 4.832254558031524e-06, "loss": 5.511, "step": 18288 }, { "epoch": 0.9, "grad_norm": 2.0950520038604736, "learning_rate": 4.822372646869905e-06, "loss": 5.414, "step": 18292 }, { "epoch": 0.9, "grad_norm": 2.0211408138275146, "learning_rate": 4.812490735708286e-06, "loss": 5.4139, "step": 18296 }, { "epoch": 0.9, "grad_norm": 1.994523286819458, "learning_rate": 4.802608824546668e-06, "loss": 5.4814, "step": 18300 }, { "epoch": 0.9, "grad_norm": 2.0099375247955322, "learning_rate": 4.792726913385049e-06, "loss": 5.4478, "step": 18304 }, { "epoch": 0.9, "grad_norm": 1.9458421468734741, "learning_rate": 4.78284500222343e-06, "loss": 5.4538, "step": 18308 }, { "epoch": 0.9, "grad_norm": 2.13274884223938, "learning_rate": 4.772963091061811e-06, "loss": 5.5146, "step": 18312 }, { "epoch": 0.9, "grad_norm": 2.223994731903076, "learning_rate": 4.763081179900193e-06, "loss": 5.3984, "step": 18316 }, { "epoch": 0.91, "grad_norm": 2.0752246379852295, "learning_rate": 4.753199268738575e-06, "loss": 5.4198, "step": 18320 }, { "epoch": 0.91, "grad_norm": 1.913928508758545, "learning_rate": 4.743317357576956e-06, "loss": 5.4548, "step": 18324 }, { "epoch": 0.91, "grad_norm": 1.9709479808807373, "learning_rate": 4.733435446415337e-06, "loss": 5.432, "step": 18328 }, { "epoch": 0.91, "grad_norm": 2.19464111328125, "learning_rate": 4.723553535253718e-06, "loss": 5.5339, "step": 18332 }, { "epoch": 0.91, "grad_norm": 1.9611873626708984, "learning_rate": 4.7136716240920994e-06, "loss": 5.6126, "step": 18336 }, { "epoch": 0.91, "grad_norm": 1.9830701351165771, "learning_rate": 4.703789712930481e-06, "loss": 5.413, "step": 18340 }, { "epoch": 0.91, "grad_norm": 2.189093828201294, "learning_rate": 4.693907801768863e-06, "loss": 5.4765, "step": 18344 }, { "epoch": 0.91, "grad_norm": 1.831568956375122, "learning_rate": 4.684025890607244e-06, "loss": 5.4116, "step": 18348 }, { "epoch": 0.91, "grad_norm": 1.8038347959518433, "learning_rate": 4.674143979445625e-06, "loss": 5.3937, "step": 18352 }, { "epoch": 0.91, "grad_norm": 2.043757200241089, "learning_rate": 4.664262068284006e-06, "loss": 5.3792, "step": 18356 }, { "epoch": 0.91, "grad_norm": 2.10916805267334, "learning_rate": 4.6543801571223876e-06, "loss": 5.4894, "step": 18360 }, { "epoch": 0.91, "grad_norm": 2.1079790592193604, "learning_rate": 4.644498245960769e-06, "loss": 5.418, "step": 18364 }, { "epoch": 0.91, "grad_norm": 2.068610191345215, "learning_rate": 4.63461633479915e-06, "loss": 5.4669, "step": 18368 }, { "epoch": 0.91, "grad_norm": 2.140131950378418, "learning_rate": 4.624734423637532e-06, "loss": 5.4256, "step": 18372 }, { "epoch": 0.91, "grad_norm": 1.9517799615859985, "learning_rate": 4.614852512475913e-06, "loss": 5.4603, "step": 18376 }, { "epoch": 0.91, "grad_norm": 2.0555922985076904, "learning_rate": 4.6049706013142945e-06, "loss": 5.594, "step": 18380 }, { "epoch": 0.91, "grad_norm": 2.1756505966186523, "learning_rate": 4.595088690152676e-06, "loss": 5.4045, "step": 18384 }, { "epoch": 0.91, "grad_norm": 2.0365309715270996, "learning_rate": 4.585206778991057e-06, "loss": 5.4668, "step": 18388 }, { "epoch": 0.91, "grad_norm": 1.979575753211975, "learning_rate": 4.575324867829438e-06, "loss": 5.5546, "step": 18392 }, { "epoch": 0.91, "grad_norm": 2.1645045280456543, "learning_rate": 4.565442956667819e-06, "loss": 5.4158, "step": 18396 }, { "epoch": 0.91, "grad_norm": 2.400611162185669, "learning_rate": 4.555561045506201e-06, "loss": 5.4139, "step": 18400 }, { "epoch": 0.91, "grad_norm": 2.1293954849243164, "learning_rate": 4.545679134344583e-06, "loss": 5.3817, "step": 18404 }, { "epoch": 0.91, "grad_norm": 2.0387656688690186, "learning_rate": 4.535797223182964e-06, "loss": 5.533, "step": 18408 }, { "epoch": 0.91, "grad_norm": 2.231621265411377, "learning_rate": 4.525915312021345e-06, "loss": 5.4445, "step": 18412 }, { "epoch": 0.91, "grad_norm": 2.0062594413757324, "learning_rate": 4.516033400859727e-06, "loss": 5.3092, "step": 18416 }, { "epoch": 0.91, "grad_norm": 1.9188722372055054, "learning_rate": 4.506151489698108e-06, "loss": 5.4884, "step": 18420 }, { "epoch": 0.91, "grad_norm": 1.9304651021957397, "learning_rate": 4.496269578536489e-06, "loss": 5.5277, "step": 18424 }, { "epoch": 0.91, "grad_norm": 2.186587333679199, "learning_rate": 4.48638766737487e-06, "loss": 5.494, "step": 18428 }, { "epoch": 0.91, "grad_norm": 2.170325994491577, "learning_rate": 4.476505756213252e-06, "loss": 5.5372, "step": 18432 }, { "epoch": 0.91, "grad_norm": 2.1663074493408203, "learning_rate": 4.466623845051633e-06, "loss": 5.4398, "step": 18436 }, { "epoch": 0.91, "grad_norm": 2.090306043624878, "learning_rate": 4.456741933890014e-06, "loss": 5.5293, "step": 18440 }, { "epoch": 0.91, "grad_norm": 1.9806874990463257, "learning_rate": 4.4468600227283964e-06, "loss": 5.4678, "step": 18444 }, { "epoch": 0.91, "grad_norm": 2.011406183242798, "learning_rate": 4.436978111566778e-06, "loss": 5.4633, "step": 18448 }, { "epoch": 0.91, "grad_norm": 1.8348437547683716, "learning_rate": 4.427096200405159e-06, "loss": 5.4313, "step": 18452 }, { "epoch": 0.91, "grad_norm": 2.4174129962921143, "learning_rate": 4.417214289243539e-06, "loss": 5.5076, "step": 18456 }, { "epoch": 0.91, "grad_norm": 2.0693600177764893, "learning_rate": 4.407332378081921e-06, "loss": 5.505, "step": 18460 }, { "epoch": 0.91, "grad_norm": 1.9076710939407349, "learning_rate": 4.3974504669203025e-06, "loss": 5.3791, "step": 18464 }, { "epoch": 0.91, "grad_norm": 2.2067015171051025, "learning_rate": 4.387568555758684e-06, "loss": 5.4183, "step": 18468 }, { "epoch": 0.91, "grad_norm": 2.229694128036499, "learning_rate": 4.377686644597066e-06, "loss": 5.4481, "step": 18472 }, { "epoch": 0.91, "grad_norm": 2.3484323024749756, "learning_rate": 4.367804733435447e-06, "loss": 5.55, "step": 18476 }, { "epoch": 0.91, "grad_norm": 2.2795610427856445, "learning_rate": 4.357922822273828e-06, "loss": 5.4688, "step": 18480 }, { "epoch": 0.91, "grad_norm": 2.2929630279541016, "learning_rate": 4.348040911112209e-06, "loss": 5.5401, "step": 18484 }, { "epoch": 0.91, "grad_norm": 2.089888334274292, "learning_rate": 4.338158999950591e-06, "loss": 5.3467, "step": 18488 }, { "epoch": 0.91, "grad_norm": 2.0417628288269043, "learning_rate": 4.328277088788972e-06, "loss": 5.3768, "step": 18492 }, { "epoch": 0.91, "grad_norm": 2.065882682800293, "learning_rate": 4.318395177627353e-06, "loss": 5.4829, "step": 18496 }, { "epoch": 0.91, "grad_norm": 2.1247379779815674, "learning_rate": 4.308513266465734e-06, "loss": 5.4237, "step": 18500 }, { "epoch": 0.91, "grad_norm": 1.8752723932266235, "learning_rate": 4.298631355304116e-06, "loss": 5.426, "step": 18504 }, { "epoch": 0.91, "grad_norm": 2.2132692337036133, "learning_rate": 4.2887494441424975e-06, "loss": 5.4543, "step": 18508 }, { "epoch": 0.91, "grad_norm": 2.0311672687530518, "learning_rate": 4.278867532980879e-06, "loss": 5.6122, "step": 18512 }, { "epoch": 0.91, "grad_norm": 2.083220958709717, "learning_rate": 4.26898562181926e-06, "loss": 5.4746, "step": 18516 }, { "epoch": 0.92, "grad_norm": 1.9595463275909424, "learning_rate": 4.259103710657641e-06, "loss": 5.3808, "step": 18520 }, { "epoch": 0.92, "grad_norm": 2.2142648696899414, "learning_rate": 4.249221799496022e-06, "loss": 5.4515, "step": 18524 }, { "epoch": 0.92, "grad_norm": 1.9906036853790283, "learning_rate": 4.239339888334404e-06, "loss": 5.4185, "step": 18528 }, { "epoch": 0.92, "grad_norm": 2.193829298019409, "learning_rate": 4.229457977172786e-06, "loss": 5.5095, "step": 18532 }, { "epoch": 0.92, "grad_norm": 2.1080212593078613, "learning_rate": 4.219576066011167e-06, "loss": 5.4778, "step": 18536 }, { "epoch": 0.92, "grad_norm": 1.9774274826049805, "learning_rate": 4.209694154849548e-06, "loss": 5.557, "step": 18540 }, { "epoch": 0.92, "grad_norm": 1.9373048543930054, "learning_rate": 4.19981224368793e-06, "loss": 5.2974, "step": 18544 }, { "epoch": 0.92, "grad_norm": 2.1534974575042725, "learning_rate": 4.1899303325263105e-06, "loss": 5.3349, "step": 18548 }, { "epoch": 0.92, "grad_norm": 1.8971552848815918, "learning_rate": 4.180048421364692e-06, "loss": 5.4853, "step": 18552 }, { "epoch": 0.92, "grad_norm": 2.132871627807617, "learning_rate": 4.170166510203073e-06, "loss": 5.4839, "step": 18556 }, { "epoch": 0.92, "grad_norm": 2.068107843399048, "learning_rate": 4.160284599041455e-06, "loss": 5.5167, "step": 18560 }, { "epoch": 0.92, "grad_norm": 1.9894949197769165, "learning_rate": 4.150402687879836e-06, "loss": 5.5169, "step": 18564 }, { "epoch": 0.92, "grad_norm": 1.914143443107605, "learning_rate": 4.1405207767182174e-06, "loss": 5.484, "step": 18568 }, { "epoch": 0.92, "grad_norm": 2.168916940689087, "learning_rate": 4.1306388655565995e-06, "loss": 5.4795, "step": 18572 }, { "epoch": 0.92, "grad_norm": 2.0595452785491943, "learning_rate": 4.120756954394981e-06, "loss": 5.5072, "step": 18576 }, { "epoch": 0.92, "grad_norm": 1.9776825904846191, "learning_rate": 4.110875043233361e-06, "loss": 5.5014, "step": 18580 }, { "epoch": 0.92, "grad_norm": 2.120623826980591, "learning_rate": 4.100993132071742e-06, "loss": 5.4891, "step": 18584 }, { "epoch": 0.92, "grad_norm": 1.789355754852295, "learning_rate": 4.091111220910124e-06, "loss": 5.5514, "step": 18588 }, { "epoch": 0.92, "grad_norm": 2.0536649227142334, "learning_rate": 4.0812293097485056e-06, "loss": 5.3581, "step": 18592 }, { "epoch": 0.92, "grad_norm": 1.785683035850525, "learning_rate": 4.071347398586887e-06, "loss": 5.4293, "step": 18596 }, { "epoch": 0.92, "grad_norm": 1.9793387651443481, "learning_rate": 4.061465487425268e-06, "loss": 5.4698, "step": 18600 }, { "epoch": 0.92, "grad_norm": 2.0507895946502686, "learning_rate": 4.05158357626365e-06, "loss": 5.3736, "step": 18604 }, { "epoch": 0.92, "grad_norm": 2.2948641777038574, "learning_rate": 4.041701665102031e-06, "loss": 5.4733, "step": 18608 }, { "epoch": 0.92, "grad_norm": 2.0238101482391357, "learning_rate": 4.0318197539404125e-06, "loss": 5.4463, "step": 18612 }, { "epoch": 0.92, "grad_norm": 2.103618860244751, "learning_rate": 4.021937842778794e-06, "loss": 5.4943, "step": 18616 }, { "epoch": 0.92, "grad_norm": 2.038139581680298, "learning_rate": 4.012055931617175e-06, "loss": 5.5276, "step": 18620 }, { "epoch": 0.92, "grad_norm": 1.837792158126831, "learning_rate": 4.002174020455556e-06, "loss": 5.4706, "step": 18624 }, { "epoch": 0.92, "grad_norm": 2.0344648361206055, "learning_rate": 3.992292109293937e-06, "loss": 5.3494, "step": 18628 }, { "epoch": 0.92, "grad_norm": 2.0213499069213867, "learning_rate": 3.982410198132319e-06, "loss": 5.5867, "step": 18632 }, { "epoch": 0.92, "grad_norm": 2.209096670150757, "learning_rate": 3.972528286970701e-06, "loss": 5.5146, "step": 18636 }, { "epoch": 0.92, "grad_norm": 1.924126148223877, "learning_rate": 3.962646375809082e-06, "loss": 5.4478, "step": 18640 }, { "epoch": 0.92, "grad_norm": 2.2356016635894775, "learning_rate": 3.952764464647463e-06, "loss": 5.5293, "step": 18644 }, { "epoch": 0.92, "grad_norm": 2.2954514026641846, "learning_rate": 3.942882553485844e-06, "loss": 5.4561, "step": 18648 }, { "epoch": 0.92, "grad_norm": 2.275831460952759, "learning_rate": 3.9330006423242254e-06, "loss": 5.4746, "step": 18652 }, { "epoch": 0.92, "grad_norm": 2.116305112838745, "learning_rate": 3.923118731162607e-06, "loss": 5.6006, "step": 18656 }, { "epoch": 0.92, "grad_norm": 2.1635780334472656, "learning_rate": 3.913236820000989e-06, "loss": 5.5606, "step": 18660 }, { "epoch": 0.92, "grad_norm": 1.8676637411117554, "learning_rate": 3.90335490883937e-06, "loss": 5.4456, "step": 18664 }, { "epoch": 0.92, "grad_norm": 2.344409227371216, "learning_rate": 3.893472997677751e-06, "loss": 5.5425, "step": 18668 }, { "epoch": 0.92, "grad_norm": 2.0696935653686523, "learning_rate": 3.883591086516132e-06, "loss": 5.4989, "step": 18672 }, { "epoch": 0.92, "grad_norm": 2.1459434032440186, "learning_rate": 3.8737091753545136e-06, "loss": 5.4776, "step": 18676 }, { "epoch": 0.92, "grad_norm": 2.0918266773223877, "learning_rate": 3.863827264192895e-06, "loss": 5.408, "step": 18680 }, { "epoch": 0.92, "grad_norm": 1.6926395893096924, "learning_rate": 3.853945353031276e-06, "loss": 5.4212, "step": 18684 }, { "epoch": 0.92, "grad_norm": 1.9440195560455322, "learning_rate": 3.844063441869658e-06, "loss": 5.4617, "step": 18688 }, { "epoch": 0.92, "grad_norm": 1.8561819791793823, "learning_rate": 3.834181530708039e-06, "loss": 5.5185, "step": 18692 }, { "epoch": 0.92, "grad_norm": 2.1534957885742188, "learning_rate": 3.8242996195464205e-06, "loss": 5.3607, "step": 18696 }, { "epoch": 0.92, "grad_norm": 1.9762316942214966, "learning_rate": 3.8144177083848017e-06, "loss": 5.4843, "step": 18700 }, { "epoch": 0.92, "grad_norm": 1.9604766368865967, "learning_rate": 3.8045357972231833e-06, "loss": 5.5596, "step": 18704 }, { "epoch": 0.92, "grad_norm": 2.186389684677124, "learning_rate": 3.7946538860615645e-06, "loss": 5.5269, "step": 18708 }, { "epoch": 0.92, "grad_norm": 2.211263656616211, "learning_rate": 3.7847719748999458e-06, "loss": 5.4103, "step": 18712 }, { "epoch": 0.92, "grad_norm": 2.0040395259857178, "learning_rate": 3.7748900637383274e-06, "loss": 5.4427, "step": 18716 }, { "epoch": 0.92, "grad_norm": 2.08552885055542, "learning_rate": 3.7650081525767086e-06, "loss": 5.3101, "step": 18720 }, { "epoch": 0.93, "grad_norm": 2.090424060821533, "learning_rate": 3.75512624141509e-06, "loss": 5.4502, "step": 18724 }, { "epoch": 0.93, "grad_norm": 1.8602705001831055, "learning_rate": 3.745244330253471e-06, "loss": 5.4708, "step": 18728 }, { "epoch": 0.93, "grad_norm": 1.9746719598770142, "learning_rate": 3.7353624190918527e-06, "loss": 5.3835, "step": 18732 }, { "epoch": 0.93, "grad_norm": 1.9959601163864136, "learning_rate": 3.725480507930234e-06, "loss": 5.4041, "step": 18736 }, { "epoch": 0.93, "grad_norm": 2.080343008041382, "learning_rate": 3.715598596768615e-06, "loss": 5.5822, "step": 18740 }, { "epoch": 0.93, "grad_norm": 2.260117292404175, "learning_rate": 3.7057166856069963e-06, "loss": 5.4475, "step": 18744 }, { "epoch": 0.93, "grad_norm": 2.2036492824554443, "learning_rate": 3.695834774445378e-06, "loss": 5.5317, "step": 18748 }, { "epoch": 0.93, "grad_norm": 2.4585494995117188, "learning_rate": 3.685952863283759e-06, "loss": 5.5552, "step": 18752 }, { "epoch": 0.93, "grad_norm": 2.2832350730895996, "learning_rate": 3.6760709521221404e-06, "loss": 5.4459, "step": 18756 }, { "epoch": 0.93, "grad_norm": 2.144115924835205, "learning_rate": 3.666189040960522e-06, "loss": 5.3545, "step": 18760 }, { "epoch": 0.93, "grad_norm": 2.214642286300659, "learning_rate": 3.6563071297989032e-06, "loss": 5.3576, "step": 18764 }, { "epoch": 0.93, "grad_norm": 1.9967424869537354, "learning_rate": 3.6464252186372844e-06, "loss": 5.4909, "step": 18768 }, { "epoch": 0.93, "grad_norm": 2.0453543663024902, "learning_rate": 3.6365433074756657e-06, "loss": 5.5049, "step": 18772 }, { "epoch": 0.93, "grad_norm": 2.09987473487854, "learning_rate": 3.6266613963140473e-06, "loss": 5.5611, "step": 18776 }, { "epoch": 0.93, "grad_norm": 1.9165294170379639, "learning_rate": 3.6167794851524285e-06, "loss": 5.3851, "step": 18780 }, { "epoch": 0.93, "grad_norm": 2.128594160079956, "learning_rate": 3.6068975739908097e-06, "loss": 5.444, "step": 18784 }, { "epoch": 0.93, "grad_norm": 2.029412031173706, "learning_rate": 3.5970156628291918e-06, "loss": 5.6027, "step": 18788 }, { "epoch": 0.93, "grad_norm": 2.0921339988708496, "learning_rate": 3.587133751667573e-06, "loss": 5.4492, "step": 18792 }, { "epoch": 0.93, "grad_norm": 2.300293445587158, "learning_rate": 3.5772518405059538e-06, "loss": 5.5684, "step": 18796 }, { "epoch": 0.93, "grad_norm": 2.034621000289917, "learning_rate": 3.567369929344335e-06, "loss": 5.5198, "step": 18800 }, { "epoch": 0.93, "grad_norm": 2.3296048641204834, "learning_rate": 3.557488018182717e-06, "loss": 5.5555, "step": 18804 }, { "epoch": 0.93, "grad_norm": 2.2251405715942383, "learning_rate": 3.5476061070210983e-06, "loss": 5.4131, "step": 18808 }, { "epoch": 0.93, "grad_norm": 2.1167004108428955, "learning_rate": 3.537724195859479e-06, "loss": 5.3361, "step": 18812 }, { "epoch": 0.93, "grad_norm": 1.959514856338501, "learning_rate": 3.527842284697861e-06, "loss": 5.3026, "step": 18816 }, { "epoch": 0.93, "grad_norm": 2.3255879878997803, "learning_rate": 3.5179603735362423e-06, "loss": 5.3271, "step": 18820 }, { "epoch": 0.93, "grad_norm": 2.0048890113830566, "learning_rate": 3.5080784623746235e-06, "loss": 5.4256, "step": 18824 }, { "epoch": 0.93, "grad_norm": 2.1140854358673096, "learning_rate": 3.4981965512130043e-06, "loss": 5.3592, "step": 18828 }, { "epoch": 0.93, "grad_norm": 1.857484221458435, "learning_rate": 3.4883146400513864e-06, "loss": 5.4143, "step": 18832 }, { "epoch": 0.93, "grad_norm": 2.063621997833252, "learning_rate": 3.4784327288897676e-06, "loss": 5.5095, "step": 18836 }, { "epoch": 0.93, "grad_norm": 1.9271105527877808, "learning_rate": 3.468550817728149e-06, "loss": 5.4678, "step": 18840 }, { "epoch": 0.93, "grad_norm": 1.9842108488082886, "learning_rate": 3.4586689065665296e-06, "loss": 5.4825, "step": 18844 }, { "epoch": 0.93, "grad_norm": 2.128753662109375, "learning_rate": 3.4487869954049117e-06, "loss": 5.4128, "step": 18848 }, { "epoch": 0.93, "grad_norm": 2.0321123600006104, "learning_rate": 3.438905084243293e-06, "loss": 5.4735, "step": 18852 }, { "epoch": 0.93, "grad_norm": 2.1090190410614014, "learning_rate": 3.429023173081674e-06, "loss": 5.4828, "step": 18856 }, { "epoch": 0.93, "grad_norm": 2.016251802444458, "learning_rate": 3.4191412619200557e-06, "loss": 5.56, "step": 18860 }, { "epoch": 0.93, "grad_norm": 1.863065481185913, "learning_rate": 3.409259350758437e-06, "loss": 5.456, "step": 18864 }, { "epoch": 0.93, "grad_norm": 2.226012706756592, "learning_rate": 3.399377439596818e-06, "loss": 5.5674, "step": 18868 }, { "epoch": 0.93, "grad_norm": 2.3455286026000977, "learning_rate": 3.3894955284351994e-06, "loss": 5.5108, "step": 18872 }, { "epoch": 0.93, "grad_norm": 1.991924524307251, "learning_rate": 3.379613617273581e-06, "loss": 5.5043, "step": 18876 }, { "epoch": 0.93, "grad_norm": 2.287811279296875, "learning_rate": 3.3697317061119622e-06, "loss": 5.4942, "step": 18880 }, { "epoch": 0.93, "grad_norm": 2.099695920944214, "learning_rate": 3.3598497949503434e-06, "loss": 5.4295, "step": 18884 }, { "epoch": 0.93, "grad_norm": 2.0772879123687744, "learning_rate": 3.349967883788725e-06, "loss": 5.4999, "step": 18888 }, { "epoch": 0.93, "grad_norm": 2.076507329940796, "learning_rate": 3.3400859726271063e-06, "loss": 5.3828, "step": 18892 }, { "epoch": 0.93, "grad_norm": 1.9823633432388306, "learning_rate": 3.3302040614654875e-06, "loss": 5.4312, "step": 18896 }, { "epoch": 0.93, "grad_norm": 2.120603322982788, "learning_rate": 3.3203221503038687e-06, "loss": 5.4585, "step": 18900 }, { "epoch": 0.93, "grad_norm": 1.9160094261169434, "learning_rate": 3.3104402391422504e-06, "loss": 5.4471, "step": 18904 }, { "epoch": 0.93, "grad_norm": 2.0175113677978516, "learning_rate": 3.3005583279806316e-06, "loss": 5.3894, "step": 18908 }, { "epoch": 0.93, "grad_norm": 2.052076578140259, "learning_rate": 3.2906764168190128e-06, "loss": 5.5049, "step": 18912 }, { "epoch": 0.93, "grad_norm": 2.0342178344726562, "learning_rate": 3.280794505657394e-06, "loss": 5.4793, "step": 18916 }, { "epoch": 0.93, "grad_norm": 2.004908561706543, "learning_rate": 3.2709125944957756e-06, "loss": 5.5442, "step": 18920 }, { "epoch": 0.94, "grad_norm": 2.1404547691345215, "learning_rate": 3.261030683334157e-06, "loss": 5.4585, "step": 18924 }, { "epoch": 0.94, "grad_norm": 2.1052112579345703, "learning_rate": 3.251148772172538e-06, "loss": 5.5015, "step": 18928 }, { "epoch": 0.94, "grad_norm": 2.0737648010253906, "learning_rate": 3.24126686101092e-06, "loss": 5.5249, "step": 18932 }, { "epoch": 0.94, "grad_norm": 2.041416883468628, "learning_rate": 3.231384949849301e-06, "loss": 5.5562, "step": 18936 }, { "epoch": 0.94, "grad_norm": 2.186037063598633, "learning_rate": 3.221503038687682e-06, "loss": 5.4618, "step": 18940 }, { "epoch": 0.94, "grad_norm": 2.2970268726348877, "learning_rate": 3.2116211275260633e-06, "loss": 5.3398, "step": 18944 }, { "epoch": 0.94, "grad_norm": 2.2040741443634033, "learning_rate": 3.2017392163644454e-06, "loss": 5.48, "step": 18948 }, { "epoch": 0.94, "grad_norm": 2.0961127281188965, "learning_rate": 3.191857305202826e-06, "loss": 5.5835, "step": 18952 }, { "epoch": 0.94, "grad_norm": 2.112224578857422, "learning_rate": 3.1819753940412074e-06, "loss": 5.5615, "step": 18956 }, { "epoch": 0.94, "grad_norm": 1.9072363376617432, "learning_rate": 3.1720934828795895e-06, "loss": 5.4394, "step": 18960 }, { "epoch": 0.94, "grad_norm": 2.0917303562164307, "learning_rate": 3.1622115717179707e-06, "loss": 5.4445, "step": 18964 }, { "epoch": 0.94, "grad_norm": 1.9776668548583984, "learning_rate": 3.152329660556352e-06, "loss": 5.4521, "step": 18968 }, { "epoch": 0.94, "grad_norm": 2.028456449508667, "learning_rate": 3.1424477493947327e-06, "loss": 5.4723, "step": 18972 }, { "epoch": 0.94, "grad_norm": 2.263448715209961, "learning_rate": 3.1325658382331147e-06, "loss": 5.4633, "step": 18976 }, { "epoch": 0.94, "grad_norm": 2.058852434158325, "learning_rate": 3.122683927071496e-06, "loss": 5.4923, "step": 18980 }, { "epoch": 0.94, "grad_norm": 2.173109769821167, "learning_rate": 3.112802015909877e-06, "loss": 5.3346, "step": 18984 }, { "epoch": 0.94, "grad_norm": 2.0743250846862793, "learning_rate": 3.1029201047482584e-06, "loss": 5.5121, "step": 18988 }, { "epoch": 0.94, "grad_norm": 2.125735282897949, "learning_rate": 3.09303819358664e-06, "loss": 5.3462, "step": 18992 }, { "epoch": 0.94, "grad_norm": 1.9164544343948364, "learning_rate": 3.0831562824250212e-06, "loss": 5.4138, "step": 18996 }, { "epoch": 0.94, "grad_norm": 1.9155079126358032, "learning_rate": 3.0732743712634024e-06, "loss": 5.3672, "step": 19000 }, { "epoch": 0.94, "grad_norm": 2.128096342086792, "learning_rate": 3.0633924601017836e-06, "loss": 5.4021, "step": 19004 }, { "epoch": 0.94, "grad_norm": 2.0933046340942383, "learning_rate": 3.0535105489401653e-06, "loss": 5.5964, "step": 19008 }, { "epoch": 0.94, "grad_norm": 1.9437744617462158, "learning_rate": 3.0436286377785465e-06, "loss": 5.5408, "step": 19012 }, { "epoch": 0.94, "grad_norm": 2.2945642471313477, "learning_rate": 3.0337467266169277e-06, "loss": 5.4818, "step": 19016 }, { "epoch": 0.94, "grad_norm": 2.067274570465088, "learning_rate": 3.023864815455309e-06, "loss": 5.4306, "step": 19020 }, { "epoch": 0.94, "grad_norm": 1.847240686416626, "learning_rate": 3.0139829042936906e-06, "loss": 5.451, "step": 19024 }, { "epoch": 0.94, "grad_norm": 2.06215238571167, "learning_rate": 3.004100993132072e-06, "loss": 5.406, "step": 19028 }, { "epoch": 0.94, "grad_norm": 1.9967362880706787, "learning_rate": 2.994219081970453e-06, "loss": 5.4419, "step": 19032 }, { "epoch": 0.94, "grad_norm": 2.127033233642578, "learning_rate": 2.9843371708088346e-06, "loss": 5.4244, "step": 19036 }, { "epoch": 0.94, "grad_norm": 2.093514919281006, "learning_rate": 2.974455259647216e-06, "loss": 5.4256, "step": 19040 }, { "epoch": 0.94, "grad_norm": 2.253448963165283, "learning_rate": 2.9645733484855975e-06, "loss": 5.5029, "step": 19044 }, { "epoch": 0.94, "grad_norm": 1.996626377105713, "learning_rate": 2.9546914373239783e-06, "loss": 5.5046, "step": 19048 }, { "epoch": 0.94, "grad_norm": 2.028315782546997, "learning_rate": 2.94480952616236e-06, "loss": 5.5133, "step": 19052 }, { "epoch": 0.94, "grad_norm": 1.8789162635803223, "learning_rate": 2.934927615000741e-06, "loss": 5.5341, "step": 19056 }, { "epoch": 0.94, "grad_norm": 2.1197164058685303, "learning_rate": 2.9250457038391228e-06, "loss": 5.426, "step": 19060 }, { "epoch": 0.94, "grad_norm": 2.14929461479187, "learning_rate": 2.915163792677504e-06, "loss": 5.5187, "step": 19064 }, { "epoch": 0.94, "grad_norm": 2.1319830417633057, "learning_rate": 2.905281881515885e-06, "loss": 5.4122, "step": 19068 }, { "epoch": 0.94, "grad_norm": 2.1806631088256836, "learning_rate": 2.895399970354267e-06, "loss": 5.483, "step": 19072 }, { "epoch": 0.94, "grad_norm": 2.130634307861328, "learning_rate": 2.885518059192648e-06, "loss": 5.352, "step": 19076 }, { "epoch": 0.94, "grad_norm": 1.8736112117767334, "learning_rate": 2.8756361480310292e-06, "loss": 5.4776, "step": 19080 }, { "epoch": 0.94, "grad_norm": 1.9261388778686523, "learning_rate": 2.8657542368694105e-06, "loss": 5.4317, "step": 19084 }, { "epoch": 0.94, "grad_norm": 2.2391560077667236, "learning_rate": 2.855872325707792e-06, "loss": 5.4883, "step": 19088 }, { "epoch": 0.94, "grad_norm": 2.0670218467712402, "learning_rate": 2.8459904145461733e-06, "loss": 5.4033, "step": 19092 }, { "epoch": 0.94, "grad_norm": 1.906471610069275, "learning_rate": 2.8361085033845545e-06, "loss": 5.4008, "step": 19096 }, { "epoch": 0.94, "grad_norm": 1.9118762016296387, "learning_rate": 2.826226592222936e-06, "loss": 5.4519, "step": 19100 }, { "epoch": 0.94, "grad_norm": 2.1099867820739746, "learning_rate": 2.8163446810613174e-06, "loss": 5.4299, "step": 19104 }, { "epoch": 0.94, "grad_norm": 1.8047434091567993, "learning_rate": 2.806462769899699e-06, "loss": 5.4628, "step": 19108 }, { "epoch": 0.94, "grad_norm": 2.058469772338867, "learning_rate": 2.79658085873808e-06, "loss": 5.4885, "step": 19112 }, { "epoch": 0.94, "grad_norm": 2.310060501098633, "learning_rate": 2.7866989475764614e-06, "loss": 5.5399, "step": 19116 }, { "epoch": 0.94, "grad_norm": 2.1850757598876953, "learning_rate": 2.7768170364148426e-06, "loss": 5.397, "step": 19120 }, { "epoch": 0.94, "grad_norm": 1.979308009147644, "learning_rate": 2.7669351252532243e-06, "loss": 5.4149, "step": 19124 }, { "epoch": 0.95, "grad_norm": 2.1982696056365967, "learning_rate": 2.7570532140916055e-06, "loss": 5.4355, "step": 19128 }, { "epoch": 0.95, "grad_norm": 2.1380321979522705, "learning_rate": 2.7471713029299867e-06, "loss": 5.4576, "step": 19132 }, { "epoch": 0.95, "grad_norm": 2.1724483966827393, "learning_rate": 2.7372893917683683e-06, "loss": 5.4849, "step": 19136 }, { "epoch": 0.95, "grad_norm": 2.1920723915100098, "learning_rate": 2.7274074806067496e-06, "loss": 5.5044, "step": 19140 }, { "epoch": 0.95, "grad_norm": 2.0876924991607666, "learning_rate": 2.7175255694451308e-06, "loss": 5.5164, "step": 19144 }, { "epoch": 0.95, "grad_norm": 2.028972864151001, "learning_rate": 2.707643658283512e-06, "loss": 5.4324, "step": 19148 }, { "epoch": 0.95, "grad_norm": 2.1649794578552246, "learning_rate": 2.6977617471218936e-06, "loss": 5.4183, "step": 19152 }, { "epoch": 0.95, "grad_norm": 2.0742039680480957, "learning_rate": 2.687879835960275e-06, "loss": 5.5804, "step": 19156 }, { "epoch": 0.95, "grad_norm": 2.0070149898529053, "learning_rate": 2.677997924798656e-06, "loss": 5.4086, "step": 19160 }, { "epoch": 0.95, "grad_norm": 2.270585298538208, "learning_rate": 2.6681160136370377e-06, "loss": 5.3827, "step": 19164 }, { "epoch": 0.95, "grad_norm": 1.9209606647491455, "learning_rate": 2.658234102475419e-06, "loss": 5.4162, "step": 19168 }, { "epoch": 0.95, "grad_norm": 2.2000796794891357, "learning_rate": 2.6483521913138005e-06, "loss": 5.5244, "step": 19172 }, { "epoch": 0.95, "grad_norm": 2.1105525493621826, "learning_rate": 2.6384702801521813e-06, "loss": 5.554, "step": 19176 }, { "epoch": 0.95, "grad_norm": 2.1889917850494385, "learning_rate": 2.628588368990563e-06, "loss": 5.434, "step": 19180 }, { "epoch": 0.95, "grad_norm": 2.020902395248413, "learning_rate": 2.618706457828944e-06, "loss": 5.5205, "step": 19184 }, { "epoch": 0.95, "grad_norm": 1.8815635442733765, "learning_rate": 2.608824546667326e-06, "loss": 5.3136, "step": 19188 }, { "epoch": 0.95, "grad_norm": 2.1251041889190674, "learning_rate": 2.5989426355057066e-06, "loss": 5.4888, "step": 19192 }, { "epoch": 0.95, "grad_norm": 2.0390384197235107, "learning_rate": 2.5890607243440882e-06, "loss": 5.487, "step": 19196 }, { "epoch": 0.95, "grad_norm": 2.1622934341430664, "learning_rate": 2.57917881318247e-06, "loss": 5.4759, "step": 19200 }, { "epoch": 0.95, "grad_norm": 2.190906524658203, "learning_rate": 2.569296902020851e-06, "loss": 5.505, "step": 19204 }, { "epoch": 0.95, "grad_norm": 2.2829856872558594, "learning_rate": 2.5594149908592323e-06, "loss": 5.5217, "step": 19208 }, { "epoch": 0.95, "grad_norm": 1.9600523710250854, "learning_rate": 2.5495330796976135e-06, "loss": 5.5042, "step": 19212 }, { "epoch": 0.95, "grad_norm": 1.987808346748352, "learning_rate": 2.539651168535995e-06, "loss": 5.5081, "step": 19216 }, { "epoch": 0.95, "grad_norm": 2.160310745239258, "learning_rate": 2.5297692573743764e-06, "loss": 5.41, "step": 19220 }, { "epoch": 0.95, "grad_norm": 2.06400728225708, "learning_rate": 2.5198873462127576e-06, "loss": 5.4132, "step": 19224 }, { "epoch": 0.95, "grad_norm": 1.9328960180282593, "learning_rate": 2.510005435051139e-06, "loss": 5.4076, "step": 19228 }, { "epoch": 0.95, "grad_norm": 2.1214709281921387, "learning_rate": 2.5001235238895204e-06, "loss": 5.4865, "step": 19232 }, { "epoch": 0.95, "grad_norm": 1.868773102760315, "learning_rate": 2.4902416127279016e-06, "loss": 5.41, "step": 19236 }, { "epoch": 0.95, "grad_norm": 1.9432061910629272, "learning_rate": 2.480359701566283e-06, "loss": 5.4604, "step": 19240 }, { "epoch": 0.95, "grad_norm": 1.9144906997680664, "learning_rate": 2.4704777904046645e-06, "loss": 5.4058, "step": 19244 }, { "epoch": 0.95, "grad_norm": 2.061899185180664, "learning_rate": 2.4605958792430457e-06, "loss": 5.3946, "step": 19248 }, { "epoch": 0.95, "grad_norm": 2.211974859237671, "learning_rate": 2.4507139680814273e-06, "loss": 5.5185, "step": 19252 }, { "epoch": 0.95, "grad_norm": 2.155259847640991, "learning_rate": 2.440832056919808e-06, "loss": 5.5318, "step": 19256 }, { "epoch": 0.95, "grad_norm": 2.0900609493255615, "learning_rate": 2.4309501457581898e-06, "loss": 5.3854, "step": 19260 }, { "epoch": 0.95, "grad_norm": 2.067147970199585, "learning_rate": 2.421068234596571e-06, "loss": 5.5763, "step": 19264 }, { "epoch": 0.95, "grad_norm": 1.9264382123947144, "learning_rate": 2.4111863234349526e-06, "loss": 5.4579, "step": 19268 }, { "epoch": 0.95, "grad_norm": 1.9205288887023926, "learning_rate": 2.401304412273334e-06, "loss": 5.5091, "step": 19272 }, { "epoch": 0.95, "grad_norm": 2.4955880641937256, "learning_rate": 2.391422501111715e-06, "loss": 5.4334, "step": 19276 }, { "epoch": 0.95, "grad_norm": 2.0582950115203857, "learning_rate": 2.3815405899500967e-06, "loss": 5.5041, "step": 19280 }, { "epoch": 0.95, "grad_norm": 1.9333152770996094, "learning_rate": 2.371658678788478e-06, "loss": 5.5563, "step": 19284 }, { "epoch": 0.95, "grad_norm": 2.0240728855133057, "learning_rate": 2.361776767626859e-06, "loss": 5.4964, "step": 19288 }, { "epoch": 0.95, "grad_norm": 2.198359251022339, "learning_rate": 2.3518948564652403e-06, "loss": 5.5561, "step": 19292 }, { "epoch": 0.95, "grad_norm": 2.372915506362915, "learning_rate": 2.342012945303622e-06, "loss": 5.5221, "step": 19296 }, { "epoch": 0.95, "grad_norm": 2.183767557144165, "learning_rate": 2.332131034142003e-06, "loss": 5.4475, "step": 19300 }, { "epoch": 0.95, "grad_norm": 2.019258499145508, "learning_rate": 2.3222491229803844e-06, "loss": 5.4531, "step": 19304 }, { "epoch": 0.95, "grad_norm": 2.098344564437866, "learning_rate": 2.312367211818766e-06, "loss": 5.4763, "step": 19308 }, { "epoch": 0.95, "grad_norm": 1.9019840955734253, "learning_rate": 2.3024853006571472e-06, "loss": 5.5028, "step": 19312 }, { "epoch": 0.95, "grad_norm": 1.9219045639038086, "learning_rate": 2.2926033894955284e-06, "loss": 5.4599, "step": 19316 }, { "epoch": 0.95, "grad_norm": 2.0754764080047607, "learning_rate": 2.2827214783339097e-06, "loss": 5.5103, "step": 19320 }, { "epoch": 0.95, "grad_norm": 1.8941829204559326, "learning_rate": 2.2728395671722913e-06, "loss": 5.4677, "step": 19324 }, { "epoch": 0.95, "grad_norm": 2.03706431388855, "learning_rate": 2.2629576560106725e-06, "loss": 5.3742, "step": 19328 }, { "epoch": 0.96, "grad_norm": 2.2529075145721436, "learning_rate": 2.253075744849054e-06, "loss": 5.4839, "step": 19332 }, { "epoch": 0.96, "grad_norm": 2.053737163543701, "learning_rate": 2.243193833687435e-06, "loss": 5.466, "step": 19336 }, { "epoch": 0.96, "grad_norm": 2.1574110984802246, "learning_rate": 2.2333119225258166e-06, "loss": 5.3834, "step": 19340 }, { "epoch": 0.96, "grad_norm": 2.3568503856658936, "learning_rate": 2.2234300113641982e-06, "loss": 5.4472, "step": 19344 }, { "epoch": 0.96, "grad_norm": 2.0544795989990234, "learning_rate": 2.2135481002025794e-06, "loss": 5.3323, "step": 19348 }, { "epoch": 0.96, "grad_norm": 2.053724527359009, "learning_rate": 2.2036661890409606e-06, "loss": 5.4349, "step": 19352 }, { "epoch": 0.96, "grad_norm": 2.132596015930176, "learning_rate": 2.193784277879342e-06, "loss": 5.4897, "step": 19356 }, { "epoch": 0.96, "grad_norm": 2.1658577919006348, "learning_rate": 2.1839023667177235e-06, "loss": 5.4621, "step": 19360 }, { "epoch": 0.96, "grad_norm": 2.2812771797180176, "learning_rate": 2.1740204555561047e-06, "loss": 5.3762, "step": 19364 }, { "epoch": 0.96, "grad_norm": 2.056185483932495, "learning_rate": 2.164138544394486e-06, "loss": 5.4444, "step": 19368 }, { "epoch": 0.96, "grad_norm": 2.1531660556793213, "learning_rate": 2.154256633232867e-06, "loss": 5.5484, "step": 19372 }, { "epoch": 0.96, "grad_norm": 2.3134000301361084, "learning_rate": 2.1443747220712488e-06, "loss": 5.5518, "step": 19376 }, { "epoch": 0.96, "grad_norm": 2.110779047012329, "learning_rate": 2.13449281090963e-06, "loss": 5.5833, "step": 19380 }, { "epoch": 0.96, "grad_norm": 1.998799443244934, "learning_rate": 2.124610899748011e-06, "loss": 5.4789, "step": 19384 }, { "epoch": 0.96, "grad_norm": 1.902309536933899, "learning_rate": 2.114728988586393e-06, "loss": 5.4095, "step": 19388 }, { "epoch": 0.96, "grad_norm": 2.1755199432373047, "learning_rate": 2.104847077424774e-06, "loss": 5.5097, "step": 19392 }, { "epoch": 0.96, "grad_norm": 2.089999198913574, "learning_rate": 2.0949651662631553e-06, "loss": 5.4806, "step": 19396 }, { "epoch": 0.96, "grad_norm": 2.029437780380249, "learning_rate": 2.0850832551015365e-06, "loss": 5.406, "step": 19400 }, { "epoch": 0.96, "grad_norm": 2.0628230571746826, "learning_rate": 2.075201343939918e-06, "loss": 5.5123, "step": 19404 }, { "epoch": 0.96, "grad_norm": 2.2448441982269287, "learning_rate": 2.0653194327782997e-06, "loss": 5.5619, "step": 19408 }, { "epoch": 0.96, "grad_norm": 1.9651387929916382, "learning_rate": 2.0554375216166805e-06, "loss": 5.4864, "step": 19412 }, { "epoch": 0.96, "grad_norm": 2.1724853515625, "learning_rate": 2.045555610455062e-06, "loss": 5.4135, "step": 19416 }, { "epoch": 0.96, "grad_norm": 1.981934666633606, "learning_rate": 2.0356736992934434e-06, "loss": 5.5062, "step": 19420 }, { "epoch": 0.96, "grad_norm": 2.0844616889953613, "learning_rate": 2.025791788131825e-06, "loss": 5.4076, "step": 19424 }, { "epoch": 0.96, "grad_norm": 2.2012956142425537, "learning_rate": 2.0159098769702062e-06, "loss": 5.4093, "step": 19428 }, { "epoch": 0.96, "grad_norm": 2.386598587036133, "learning_rate": 2.0060279658085874e-06, "loss": 5.4528, "step": 19432 }, { "epoch": 0.96, "grad_norm": 1.9763364791870117, "learning_rate": 1.9961460546469687e-06, "loss": 5.5794, "step": 19436 }, { "epoch": 0.96, "grad_norm": 1.9587762355804443, "learning_rate": 1.9862641434853503e-06, "loss": 5.4508, "step": 19440 }, { "epoch": 0.96, "grad_norm": 1.9256550073623657, "learning_rate": 1.9763822323237315e-06, "loss": 5.4952, "step": 19444 }, { "epoch": 0.96, "grad_norm": 2.1667470932006836, "learning_rate": 1.9665003211621127e-06, "loss": 5.4601, "step": 19448 }, { "epoch": 0.96, "grad_norm": 2.241722583770752, "learning_rate": 1.9566184100004944e-06, "loss": 5.4552, "step": 19452 }, { "epoch": 0.96, "grad_norm": 2.051391363143921, "learning_rate": 1.9467364988388756e-06, "loss": 5.4654, "step": 19456 }, { "epoch": 0.96, "grad_norm": 2.4016683101654053, "learning_rate": 1.9368545876772568e-06, "loss": 5.4888, "step": 19460 }, { "epoch": 0.96, "grad_norm": 2.049546480178833, "learning_rate": 1.926972676515638e-06, "loss": 5.4593, "step": 19464 }, { "epoch": 0.96, "grad_norm": 2.011448621749878, "learning_rate": 1.9170907653540196e-06, "loss": 5.4591, "step": 19468 }, { "epoch": 0.96, "grad_norm": 2.133927345275879, "learning_rate": 1.9072088541924008e-06, "loss": 5.5276, "step": 19472 }, { "epoch": 0.96, "grad_norm": 2.1823043823242188, "learning_rate": 1.8973269430307823e-06, "loss": 5.4671, "step": 19476 }, { "epoch": 0.96, "grad_norm": 2.2862277030944824, "learning_rate": 1.8874450318691637e-06, "loss": 5.3897, "step": 19480 }, { "epoch": 0.96, "grad_norm": 1.9178924560546875, "learning_rate": 1.877563120707545e-06, "loss": 5.4061, "step": 19484 }, { "epoch": 0.96, "grad_norm": 2.043980836868286, "learning_rate": 1.8676812095459263e-06, "loss": 5.4469, "step": 19488 }, { "epoch": 0.96, "grad_norm": 2.0812246799468994, "learning_rate": 1.8577992983843076e-06, "loss": 5.4019, "step": 19492 }, { "epoch": 0.96, "grad_norm": 2.1293153762817383, "learning_rate": 1.847917387222689e-06, "loss": 5.5026, "step": 19496 }, { "epoch": 0.96, "grad_norm": 2.150707960128784, "learning_rate": 1.8380354760610702e-06, "loss": 5.5382, "step": 19500 }, { "epoch": 0.96, "grad_norm": 2.0206191539764404, "learning_rate": 1.8281535648994516e-06, "loss": 5.3712, "step": 19504 }, { "epoch": 0.96, "grad_norm": 2.0948994159698486, "learning_rate": 1.8182716537378328e-06, "loss": 5.3896, "step": 19508 }, { "epoch": 0.96, "grad_norm": 2.020963191986084, "learning_rate": 1.8083897425762143e-06, "loss": 5.3902, "step": 19512 }, { "epoch": 0.96, "grad_norm": 2.0603950023651123, "learning_rate": 1.7985078314145959e-06, "loss": 5.4755, "step": 19516 }, { "epoch": 0.96, "grad_norm": 1.9533694982528687, "learning_rate": 1.7886259202529769e-06, "loss": 5.3764, "step": 19520 }, { "epoch": 0.96, "grad_norm": 2.0614960193634033, "learning_rate": 1.7787440090913585e-06, "loss": 5.5436, "step": 19524 }, { "epoch": 0.96, "grad_norm": 1.9552912712097168, "learning_rate": 1.7688620979297395e-06, "loss": 5.4593, "step": 19528 }, { "epoch": 0.97, "grad_norm": 2.1261250972747803, "learning_rate": 1.7589801867681212e-06, "loss": 5.3616, "step": 19532 }, { "epoch": 0.97, "grad_norm": 1.9987773895263672, "learning_rate": 1.7490982756065022e-06, "loss": 5.4697, "step": 19536 }, { "epoch": 0.97, "grad_norm": 1.9402105808258057, "learning_rate": 1.7392163644448838e-06, "loss": 5.4345, "step": 19540 }, { "epoch": 0.97, "grad_norm": 1.9414345026016235, "learning_rate": 1.7293344532832648e-06, "loss": 5.3996, "step": 19544 }, { "epoch": 0.97, "grad_norm": 2.1511659622192383, "learning_rate": 1.7194525421216464e-06, "loss": 5.3444, "step": 19548 }, { "epoch": 0.97, "grad_norm": 2.011024236679077, "learning_rate": 1.7095706309600279e-06, "loss": 5.5161, "step": 19552 }, { "epoch": 0.97, "grad_norm": 2.05631422996521, "learning_rate": 1.699688719798409e-06, "loss": 5.451, "step": 19556 }, { "epoch": 0.97, "grad_norm": 2.112424373626709, "learning_rate": 1.6898068086367905e-06, "loss": 5.5912, "step": 19560 }, { "epoch": 0.97, "grad_norm": 2.210801839828491, "learning_rate": 1.6799248974751717e-06, "loss": 5.5403, "step": 19564 }, { "epoch": 0.97, "grad_norm": 2.0372581481933594, "learning_rate": 1.6700429863135531e-06, "loss": 5.3212, "step": 19568 }, { "epoch": 0.97, "grad_norm": 1.9437897205352783, "learning_rate": 1.6601610751519344e-06, "loss": 5.49, "step": 19572 }, { "epoch": 0.97, "grad_norm": 2.0939133167266846, "learning_rate": 1.6502791639903158e-06, "loss": 5.5276, "step": 19576 }, { "epoch": 0.97, "grad_norm": 2.0417065620422363, "learning_rate": 1.640397252828697e-06, "loss": 5.4277, "step": 19580 }, { "epoch": 0.97, "grad_norm": 1.9575787782669067, "learning_rate": 1.6305153416670784e-06, "loss": 5.4794, "step": 19584 }, { "epoch": 0.97, "grad_norm": 2.010903835296631, "learning_rate": 1.62063343050546e-06, "loss": 5.5096, "step": 19588 }, { "epoch": 0.97, "grad_norm": 1.9882851839065552, "learning_rate": 1.610751519343841e-06, "loss": 5.3729, "step": 19592 }, { "epoch": 0.97, "grad_norm": 2.00439190864563, "learning_rate": 1.6008696081822227e-06, "loss": 5.4072, "step": 19596 }, { "epoch": 0.97, "grad_norm": 2.154120445251465, "learning_rate": 1.5909876970206037e-06, "loss": 5.546, "step": 19600 }, { "epoch": 0.97, "grad_norm": 2.1139557361602783, "learning_rate": 1.5811057858589853e-06, "loss": 5.5374, "step": 19604 }, { "epoch": 0.97, "grad_norm": 2.38275408744812, "learning_rate": 1.5712238746973663e-06, "loss": 5.6017, "step": 19608 }, { "epoch": 0.97, "grad_norm": 2.0247836112976074, "learning_rate": 1.561341963535748e-06, "loss": 5.4871, "step": 19612 }, { "epoch": 0.97, "grad_norm": 1.97475004196167, "learning_rate": 1.5514600523741292e-06, "loss": 5.5351, "step": 19616 }, { "epoch": 0.97, "grad_norm": 2.0974037647247314, "learning_rate": 1.5415781412125106e-06, "loss": 5.5301, "step": 19620 }, { "epoch": 0.97, "grad_norm": 2.1904966831207275, "learning_rate": 1.5316962300508918e-06, "loss": 5.4878, "step": 19624 }, { "epoch": 0.97, "grad_norm": 2.0616374015808105, "learning_rate": 1.5218143188892732e-06, "loss": 5.4575, "step": 19628 }, { "epoch": 0.97, "grad_norm": 2.0660183429718018, "learning_rate": 1.5119324077276545e-06, "loss": 5.3544, "step": 19632 }, { "epoch": 0.97, "grad_norm": 1.9736340045928955, "learning_rate": 1.502050496566036e-06, "loss": 5.4619, "step": 19636 }, { "epoch": 0.97, "grad_norm": 2.014892339706421, "learning_rate": 1.4921685854044173e-06, "loss": 5.3977, "step": 19640 }, { "epoch": 0.97, "grad_norm": 1.8507190942764282, "learning_rate": 1.4822866742427987e-06, "loss": 5.4187, "step": 19644 }, { "epoch": 0.97, "grad_norm": 2.1220788955688477, "learning_rate": 1.47240476308118e-06, "loss": 5.4812, "step": 19648 }, { "epoch": 0.97, "grad_norm": 2.1677944660186768, "learning_rate": 1.4625228519195614e-06, "loss": 5.3252, "step": 19652 }, { "epoch": 0.97, "grad_norm": 2.054081439971924, "learning_rate": 1.4526409407579426e-06, "loss": 5.5602, "step": 19656 }, { "epoch": 0.97, "grad_norm": 2.1197407245635986, "learning_rate": 1.442759029596324e-06, "loss": 5.4585, "step": 19660 }, { "epoch": 0.97, "grad_norm": 2.005307674407959, "learning_rate": 1.4328771184347052e-06, "loss": 5.4986, "step": 19664 }, { "epoch": 0.97, "grad_norm": 2.428250312805176, "learning_rate": 1.4229952072730867e-06, "loss": 5.3953, "step": 19668 }, { "epoch": 0.97, "grad_norm": 2.051632881164551, "learning_rate": 1.413113296111468e-06, "loss": 5.4902, "step": 19672 }, { "epoch": 0.97, "grad_norm": 2.1171364784240723, "learning_rate": 1.4032313849498495e-06, "loss": 5.411, "step": 19676 }, { "epoch": 0.97, "grad_norm": 1.9378799200057983, "learning_rate": 1.3933494737882307e-06, "loss": 5.401, "step": 19680 }, { "epoch": 0.97, "grad_norm": 2.151566743850708, "learning_rate": 1.3834675626266121e-06, "loss": 5.4693, "step": 19684 }, { "epoch": 0.97, "grad_norm": 1.948789358139038, "learning_rate": 1.3735856514649934e-06, "loss": 5.4927, "step": 19688 }, { "epoch": 0.97, "grad_norm": 2.4134631156921387, "learning_rate": 1.3637037403033748e-06, "loss": 5.5223, "step": 19692 }, { "epoch": 0.97, "grad_norm": 2.0687310695648193, "learning_rate": 1.353821829141756e-06, "loss": 5.6205, "step": 19696 }, { "epoch": 0.97, "grad_norm": 2.03088641166687, "learning_rate": 1.3439399179801374e-06, "loss": 5.4587, "step": 19700 }, { "epoch": 0.97, "grad_norm": 2.0663702487945557, "learning_rate": 1.3340580068185188e-06, "loss": 5.4831, "step": 19704 }, { "epoch": 0.97, "grad_norm": 1.904789686203003, "learning_rate": 1.3241760956569003e-06, "loss": 5.5511, "step": 19708 }, { "epoch": 0.97, "grad_norm": 2.119781970977783, "learning_rate": 1.3142941844952815e-06, "loss": 5.4318, "step": 19712 }, { "epoch": 0.97, "grad_norm": 1.9469795227050781, "learning_rate": 1.304412273333663e-06, "loss": 5.4406, "step": 19716 }, { "epoch": 0.97, "grad_norm": 2.30818247795105, "learning_rate": 1.2945303621720441e-06, "loss": 5.4206, "step": 19720 }, { "epoch": 0.97, "grad_norm": 1.9821704626083374, "learning_rate": 1.2846484510104255e-06, "loss": 5.5063, "step": 19724 }, { "epoch": 0.97, "grad_norm": 2.095536470413208, "learning_rate": 1.2747665398488068e-06, "loss": 5.545, "step": 19728 }, { "epoch": 0.97, "grad_norm": 2.048340320587158, "learning_rate": 1.2648846286871882e-06, "loss": 5.386, "step": 19732 }, { "epoch": 0.98, "grad_norm": 2.1303012371063232, "learning_rate": 1.2550027175255694e-06, "loss": 5.433, "step": 19736 }, { "epoch": 0.98, "grad_norm": 2.1081206798553467, "learning_rate": 1.2451208063639508e-06, "loss": 5.4296, "step": 19740 }, { "epoch": 0.98, "grad_norm": 2.0982227325439453, "learning_rate": 1.2352388952023322e-06, "loss": 5.4267, "step": 19744 }, { "epoch": 0.98, "grad_norm": 1.9972717761993408, "learning_rate": 1.2253569840407137e-06, "loss": 5.55, "step": 19748 }, { "epoch": 0.98, "grad_norm": 2.1928958892822266, "learning_rate": 1.2154750728790949e-06, "loss": 5.5068, "step": 19752 }, { "epoch": 0.98, "grad_norm": 1.973193883895874, "learning_rate": 1.2055931617174763e-06, "loss": 5.4586, "step": 19756 }, { "epoch": 0.98, "grad_norm": 1.975972056388855, "learning_rate": 1.1957112505558575e-06, "loss": 5.2701, "step": 19760 }, { "epoch": 0.98, "grad_norm": 2.053972005844116, "learning_rate": 1.185829339394239e-06, "loss": 5.4591, "step": 19764 }, { "epoch": 0.98, "grad_norm": 2.108062267303467, "learning_rate": 1.1759474282326202e-06, "loss": 5.4726, "step": 19768 }, { "epoch": 0.98, "grad_norm": 2.1291093826293945, "learning_rate": 1.1660655170710016e-06, "loss": 5.459, "step": 19772 }, { "epoch": 0.98, "grad_norm": 2.2266008853912354, "learning_rate": 1.156183605909383e-06, "loss": 5.4267, "step": 19776 }, { "epoch": 0.98, "grad_norm": 2.031205892562866, "learning_rate": 1.1463016947477642e-06, "loss": 5.3383, "step": 19780 }, { "epoch": 0.98, "grad_norm": 2.1661624908447266, "learning_rate": 1.1364197835861456e-06, "loss": 5.4843, "step": 19784 }, { "epoch": 0.98, "grad_norm": 2.1752281188964844, "learning_rate": 1.126537872424527e-06, "loss": 5.4579, "step": 19788 }, { "epoch": 0.98, "grad_norm": 2.279290199279785, "learning_rate": 1.1166559612629083e-06, "loss": 5.4887, "step": 19792 }, { "epoch": 0.98, "grad_norm": 1.843815803527832, "learning_rate": 1.1067740501012897e-06, "loss": 5.4187, "step": 19796 }, { "epoch": 0.98, "grad_norm": 1.9832565784454346, "learning_rate": 1.096892138939671e-06, "loss": 5.5083, "step": 19800 }, { "epoch": 0.98, "grad_norm": 1.7962124347686768, "learning_rate": 1.0870102277780524e-06, "loss": 5.5071, "step": 19804 }, { "epoch": 0.98, "grad_norm": 2.0890607833862305, "learning_rate": 1.0771283166164336e-06, "loss": 5.56, "step": 19808 }, { "epoch": 0.98, "grad_norm": 1.972830057144165, "learning_rate": 1.067246405454815e-06, "loss": 5.4381, "step": 19812 }, { "epoch": 0.98, "grad_norm": 1.968068242073059, "learning_rate": 1.0573644942931964e-06, "loss": 5.5253, "step": 19816 }, { "epoch": 0.98, "grad_norm": 2.201663017272949, "learning_rate": 1.0474825831315776e-06, "loss": 5.5018, "step": 19820 }, { "epoch": 0.98, "grad_norm": 1.8504890203475952, "learning_rate": 1.037600671969959e-06, "loss": 5.4115, "step": 19824 }, { "epoch": 0.98, "grad_norm": 2.112267017364502, "learning_rate": 1.0277187608083403e-06, "loss": 5.3483, "step": 19828 }, { "epoch": 0.98, "grad_norm": 2.159766435623169, "learning_rate": 1.0178368496467217e-06, "loss": 5.381, "step": 19832 }, { "epoch": 0.98, "grad_norm": 2.1608057022094727, "learning_rate": 1.0079549384851031e-06, "loss": 5.4216, "step": 19836 }, { "epoch": 0.98, "grad_norm": 1.8269460201263428, "learning_rate": 9.980730273234843e-07, "loss": 5.4642, "step": 19840 }, { "epoch": 0.98, "grad_norm": 2.0793392658233643, "learning_rate": 9.881911161618658e-07, "loss": 5.4693, "step": 19844 }, { "epoch": 0.98, "grad_norm": 2.1695215702056885, "learning_rate": 9.783092050002472e-07, "loss": 5.5016, "step": 19848 }, { "epoch": 0.98, "grad_norm": 1.9932477474212646, "learning_rate": 9.684272938386284e-07, "loss": 5.4885, "step": 19852 }, { "epoch": 0.98, "grad_norm": 1.9905871152877808, "learning_rate": 9.585453826770098e-07, "loss": 5.4697, "step": 19856 }, { "epoch": 0.98, "grad_norm": 1.9750021696090698, "learning_rate": 9.486634715153911e-07, "loss": 5.3438, "step": 19860 }, { "epoch": 0.98, "grad_norm": 1.9781215190887451, "learning_rate": 9.387815603537725e-07, "loss": 5.4243, "step": 19864 }, { "epoch": 0.98, "grad_norm": 2.112760543823242, "learning_rate": 9.288996491921538e-07, "loss": 5.496, "step": 19868 }, { "epoch": 0.98, "grad_norm": 2.029419422149658, "learning_rate": 9.190177380305351e-07, "loss": 5.4413, "step": 19872 }, { "epoch": 0.98, "grad_norm": 2.0092504024505615, "learning_rate": 9.091358268689164e-07, "loss": 5.3783, "step": 19876 }, { "epoch": 0.98, "grad_norm": 2.1386382579803467, "learning_rate": 8.992539157072979e-07, "loss": 5.4708, "step": 19880 }, { "epoch": 0.98, "grad_norm": 1.9855784177780151, "learning_rate": 8.893720045456793e-07, "loss": 5.5455, "step": 19884 }, { "epoch": 0.98, "grad_norm": 2.007969856262207, "learning_rate": 8.794900933840606e-07, "loss": 5.5531, "step": 19888 }, { "epoch": 0.98, "grad_norm": 1.9627690315246582, "learning_rate": 8.696081822224419e-07, "loss": 5.4983, "step": 19892 }, { "epoch": 0.98, "grad_norm": 2.0733351707458496, "learning_rate": 8.597262710608232e-07, "loss": 5.4475, "step": 19896 }, { "epoch": 0.98, "grad_norm": 2.1221306324005127, "learning_rate": 8.498443598992045e-07, "loss": 5.5056, "step": 19900 }, { "epoch": 0.98, "grad_norm": 1.9957540035247803, "learning_rate": 8.399624487375859e-07, "loss": 5.5603, "step": 19904 }, { "epoch": 0.98, "grad_norm": 2.1626851558685303, "learning_rate": 8.300805375759672e-07, "loss": 5.485, "step": 19908 }, { "epoch": 0.98, "grad_norm": 1.9834141731262207, "learning_rate": 8.201986264143485e-07, "loss": 5.3205, "step": 19912 }, { "epoch": 0.98, "grad_norm": 2.065784454345703, "learning_rate": 8.1031671525273e-07, "loss": 5.5108, "step": 19916 }, { "epoch": 0.98, "grad_norm": 2.299056053161621, "learning_rate": 8.004348040911113e-07, "loss": 5.5952, "step": 19920 }, { "epoch": 0.98, "grad_norm": 2.0861730575561523, "learning_rate": 7.905528929294927e-07, "loss": 5.4168, "step": 19924 }, { "epoch": 0.98, "grad_norm": 2.2378146648406982, "learning_rate": 7.80670981767874e-07, "loss": 5.4531, "step": 19928 }, { "epoch": 0.98, "grad_norm": 1.9151175022125244, "learning_rate": 7.707890706062553e-07, "loss": 5.4655, "step": 19932 }, { "epoch": 0.99, "grad_norm": 2.122283697128296, "learning_rate": 7.609071594446366e-07, "loss": 5.4724, "step": 19936 }, { "epoch": 0.99, "grad_norm": 2.1069324016571045, "learning_rate": 7.51025248283018e-07, "loss": 5.5641, "step": 19940 }, { "epoch": 0.99, "grad_norm": 2.006833791732788, "learning_rate": 7.411433371213994e-07, "loss": 5.4192, "step": 19944 }, { "epoch": 0.99, "grad_norm": 2.0627007484436035, "learning_rate": 7.312614259597807e-07, "loss": 5.5582, "step": 19948 }, { "epoch": 0.99, "grad_norm": 1.9530029296875, "learning_rate": 7.21379514798162e-07, "loss": 5.5445, "step": 19952 }, { "epoch": 0.99, "grad_norm": 2.1795194149017334, "learning_rate": 7.114976036365433e-07, "loss": 5.5048, "step": 19956 }, { "epoch": 0.99, "grad_norm": 2.1359763145446777, "learning_rate": 7.016156924749248e-07, "loss": 5.432, "step": 19960 }, { "epoch": 0.99, "grad_norm": 2.1916072368621826, "learning_rate": 6.917337813133061e-07, "loss": 5.4901, "step": 19964 }, { "epoch": 0.99, "grad_norm": 2.025327205657959, "learning_rate": 6.818518701516874e-07, "loss": 5.3755, "step": 19968 }, { "epoch": 0.99, "grad_norm": 2.0348243713378906, "learning_rate": 6.719699589900687e-07, "loss": 5.5042, "step": 19972 }, { "epoch": 0.99, "grad_norm": 2.128100633621216, "learning_rate": 6.620880478284501e-07, "loss": 5.4892, "step": 19976 }, { "epoch": 0.99, "grad_norm": 2.0665409564971924, "learning_rate": 6.522061366668315e-07, "loss": 5.4291, "step": 19980 }, { "epoch": 0.99, "grad_norm": 2.2713510990142822, "learning_rate": 6.423242255052128e-07, "loss": 5.5485, "step": 19984 }, { "epoch": 0.99, "grad_norm": 1.9565412998199463, "learning_rate": 6.324423143435941e-07, "loss": 5.3597, "step": 19988 }, { "epoch": 0.99, "grad_norm": 2.122183084487915, "learning_rate": 6.225604031819754e-07, "loss": 5.5183, "step": 19992 }, { "epoch": 0.99, "grad_norm": 2.2601876258850098, "learning_rate": 6.126784920203568e-07, "loss": 5.3787, "step": 19996 }, { "epoch": 0.99, "grad_norm": 1.9778292179107666, "learning_rate": 6.027965808587382e-07, "loss": 5.3719, "step": 20000 }, { "epoch": 0.99, "grad_norm": 2.0290167331695557, "learning_rate": 5.929146696971195e-07, "loss": 5.4688, "step": 20004 }, { "epoch": 0.99, "grad_norm": 2.108403444290161, "learning_rate": 5.830327585355008e-07, "loss": 5.3245, "step": 20008 }, { "epoch": 0.99, "grad_norm": 2.3024027347564697, "learning_rate": 5.731508473738821e-07, "loss": 5.4963, "step": 20012 }, { "epoch": 0.99, "grad_norm": 2.154008150100708, "learning_rate": 5.632689362122635e-07, "loss": 5.4071, "step": 20016 }, { "epoch": 0.99, "grad_norm": 2.2379016876220703, "learning_rate": 5.533870250506449e-07, "loss": 5.4005, "step": 20020 }, { "epoch": 0.99, "grad_norm": 2.0857393741607666, "learning_rate": 5.435051138890262e-07, "loss": 5.5153, "step": 20024 }, { "epoch": 0.99, "grad_norm": 1.9951947927474976, "learning_rate": 5.336232027274075e-07, "loss": 5.4283, "step": 20028 }, { "epoch": 0.99, "grad_norm": 2.357630729675293, "learning_rate": 5.237412915657888e-07, "loss": 5.4484, "step": 20032 }, { "epoch": 0.99, "grad_norm": 2.1432507038116455, "learning_rate": 5.138593804041701e-07, "loss": 5.4331, "step": 20036 }, { "epoch": 0.99, "grad_norm": 1.8710029125213623, "learning_rate": 5.039774692425516e-07, "loss": 5.331, "step": 20040 }, { "epoch": 0.99, "grad_norm": 2.1204705238342285, "learning_rate": 4.940955580809329e-07, "loss": 5.3974, "step": 20044 }, { "epoch": 0.99, "grad_norm": 2.2792441844940186, "learning_rate": 4.842136469193142e-07, "loss": 5.3835, "step": 20048 }, { "epoch": 0.99, "grad_norm": 2.204084873199463, "learning_rate": 4.7433173575769557e-07, "loss": 5.457, "step": 20052 }, { "epoch": 0.99, "grad_norm": 2.0620133876800537, "learning_rate": 4.644498245960769e-07, "loss": 5.4719, "step": 20056 }, { "epoch": 0.99, "grad_norm": 2.0034713745117188, "learning_rate": 4.545679134344582e-07, "loss": 5.3951, "step": 20060 }, { "epoch": 0.99, "grad_norm": 1.9210227727890015, "learning_rate": 4.4468600227283963e-07, "loss": 5.5225, "step": 20064 }, { "epoch": 0.99, "grad_norm": 2.102591037750244, "learning_rate": 4.3480409111122095e-07, "loss": 5.5268, "step": 20068 }, { "epoch": 0.99, "grad_norm": 2.0443971157073975, "learning_rate": 4.2492217994960227e-07, "loss": 5.5364, "step": 20072 }, { "epoch": 0.99, "grad_norm": 2.135408878326416, "learning_rate": 4.150402687879836e-07, "loss": 5.4666, "step": 20076 }, { "epoch": 0.99, "grad_norm": 2.1176159381866455, "learning_rate": 4.05158357626365e-07, "loss": 5.4501, "step": 20080 }, { "epoch": 0.99, "grad_norm": 1.8199737071990967, "learning_rate": 3.9527644646474633e-07, "loss": 5.4657, "step": 20084 }, { "epoch": 0.99, "grad_norm": 2.200198173522949, "learning_rate": 3.8539453530312765e-07, "loss": 5.6004, "step": 20088 }, { "epoch": 0.99, "grad_norm": 1.9499452114105225, "learning_rate": 3.75512624141509e-07, "loss": 5.5219, "step": 20092 }, { "epoch": 0.99, "grad_norm": 2.084623098373413, "learning_rate": 3.6563071297989034e-07, "loss": 5.5269, "step": 20096 }, { "epoch": 0.99, "grad_norm": 2.1379587650299072, "learning_rate": 3.5574880181827166e-07, "loss": 5.2866, "step": 20100 }, { "epoch": 0.99, "grad_norm": 2.1811280250549316, "learning_rate": 3.4586689065665304e-07, "loss": 5.5607, "step": 20104 }, { "epoch": 0.99, "grad_norm": 2.0732221603393555, "learning_rate": 3.3598497949503435e-07, "loss": 5.5122, "step": 20108 }, { "epoch": 0.99, "grad_norm": 2.181933641433716, "learning_rate": 3.2610306833341573e-07, "loss": 5.5222, "step": 20112 }, { "epoch": 0.99, "grad_norm": 1.8368821144104004, "learning_rate": 3.1622115717179705e-07, "loss": 5.3693, "step": 20116 }, { "epoch": 0.99, "grad_norm": 2.0082316398620605, "learning_rate": 3.063392460101784e-07, "loss": 5.3343, "step": 20120 }, { "epoch": 0.99, "grad_norm": 1.952849268913269, "learning_rate": 2.9645733484855974e-07, "loss": 5.377, "step": 20124 }, { "epoch": 0.99, "grad_norm": 1.9544694423675537, "learning_rate": 2.8657542368694106e-07, "loss": 5.4439, "step": 20128 }, { "epoch": 0.99, "grad_norm": 2.0548641681671143, "learning_rate": 2.7669351252532243e-07, "loss": 5.5092, "step": 20132 }, { "epoch": 0.99, "grad_norm": 2.101043939590454, "learning_rate": 2.6681160136370375e-07, "loss": 5.3574, "step": 20136 }, { "epoch": 1.0, "grad_norm": 2.1047868728637695, "learning_rate": 2.5692969020208507e-07, "loss": 5.513, "step": 20140 }, { "epoch": 1.0, "grad_norm": 1.9184880256652832, "learning_rate": 2.4704777904046644e-07, "loss": 5.3459, "step": 20144 }, { "epoch": 1.0, "grad_norm": 2.2628798484802246, "learning_rate": 2.3716586787884778e-07, "loss": 5.4285, "step": 20148 }, { "epoch": 1.0, "grad_norm": 2.3330535888671875, "learning_rate": 2.272839567172291e-07, "loss": 5.4673, "step": 20152 }, { "epoch": 1.0, "grad_norm": 2.0739777088165283, "learning_rate": 2.1740204555561048e-07, "loss": 5.4847, "step": 20156 }, { "epoch": 1.0, "grad_norm": 2.010349750518799, "learning_rate": 2.075201343939918e-07, "loss": 5.4364, "step": 20160 }, { "epoch": 1.0, "grad_norm": 1.8513480424880981, "learning_rate": 1.9763822323237317e-07, "loss": 5.528, "step": 20164 }, { "epoch": 1.0, "grad_norm": 2.0048437118530273, "learning_rate": 1.877563120707545e-07, "loss": 5.3088, "step": 20168 }, { "epoch": 1.0, "grad_norm": 1.953896403312683, "learning_rate": 1.7787440090913583e-07, "loss": 5.4566, "step": 20172 }, { "epoch": 1.0, "grad_norm": 1.9878010749816895, "learning_rate": 1.6799248974751718e-07, "loss": 5.3852, "step": 20176 }, { "epoch": 1.0, "grad_norm": 2.017378330230713, "learning_rate": 1.5811057858589852e-07, "loss": 5.334, "step": 20180 }, { "epoch": 1.0, "grad_norm": 1.9125686883926392, "learning_rate": 1.4822866742427987e-07, "loss": 5.4209, "step": 20184 }, { "epoch": 1.0, "grad_norm": 2.205244302749634, "learning_rate": 1.3834675626266121e-07, "loss": 5.443, "step": 20188 }, { "epoch": 1.0, "grad_norm": 1.9783563613891602, "learning_rate": 1.2846484510104253e-07, "loss": 5.5629, "step": 20192 }, { "epoch": 1.0, "grad_norm": 2.003634214401245, "learning_rate": 1.1858293393942389e-07, "loss": 5.4012, "step": 20196 }, { "epoch": 1.0, "grad_norm": 2.183382511138916, "learning_rate": 1.0870102277780524e-07, "loss": 5.4401, "step": 20200 }, { "epoch": 1.0, "grad_norm": 2.161017656326294, "learning_rate": 9.881911161618658e-08, "loss": 5.5125, "step": 20204 }, { "epoch": 1.0, "grad_norm": 2.1500487327575684, "learning_rate": 8.893720045456792e-08, "loss": 5.5267, "step": 20208 }, { "epoch": 1.0, "grad_norm": 2.1386756896972656, "learning_rate": 7.905528929294926e-08, "loss": 5.4015, "step": 20212 }, { "epoch": 1.0, "grad_norm": 2.1625399589538574, "learning_rate": 6.917337813133061e-08, "loss": 5.467, "step": 20216 }, { "epoch": 1.0, "grad_norm": 2.1189723014831543, "learning_rate": 5.9291466969711946e-08, "loss": 5.4626, "step": 20220 }, { "epoch": 1.0, "grad_norm": 2.1535654067993164, "learning_rate": 4.940955580809329e-08, "loss": 5.507, "step": 20224 }, { "epoch": 1.0, "grad_norm": 1.8377280235290527, "learning_rate": 3.952764464647463e-08, "loss": 5.4731, "step": 20228 }, { "epoch": 1.0, "grad_norm": 1.746285080909729, "learning_rate": 2.9645733484855973e-08, "loss": 5.3422, "step": 20232 }, { "epoch": 1.0, "grad_norm": 2.335073947906494, "learning_rate": 1.9763822323237315e-08, "loss": 5.4435, "step": 20236 }, { "epoch": 1.0, "step": 20239, "total_flos": 8.525167704460493e+16, "train_loss": 5.640737008268616, "train_runtime": 3394.3955, "train_samples_per_second": 95.396, "train_steps_per_second": 5.962 } ], "logging_steps": 4, "max_steps": 20239, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 2024, "total_flos": 8.525167704460493e+16, "train_batch_size": 8, "trial_name": null, "trial_params": null }