{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 21016, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.000475827940616673, "grad_norm": 5.15625, "learning_rate": 9.997620860296918e-06, "loss": 4.7984, "step": 10 }, { "epoch": 0.000951655881233346, "grad_norm": 4.71875, "learning_rate": 9.995241720593835e-06, "loss": 4.7044, "step": 20 }, { "epoch": 0.001427483821850019, "grad_norm": 4.9375, "learning_rate": 9.992862580890752e-06, "loss": 4.7987, "step": 30 }, { "epoch": 0.001903311762466692, "grad_norm": 4.78125, "learning_rate": 9.990483441187667e-06, "loss": 4.7028, "step": 40 }, { "epoch": 0.002379139703083365, "grad_norm": 4.46875, "learning_rate": 9.988104301484584e-06, "loss": 4.7488, "step": 50 }, { "epoch": 0.002854967643700038, "grad_norm": 4.5, "learning_rate": 9.9857251617815e-06, "loss": 4.4619, "step": 60 }, { "epoch": 0.003330795584316711, "grad_norm": 4.875, "learning_rate": 9.983346022078418e-06, "loss": 4.6641, "step": 70 }, { "epoch": 0.003806623524933384, "grad_norm": 4.0625, "learning_rate": 9.980966882375334e-06, "loss": 4.6234, "step": 80 }, { "epoch": 0.004282451465550057, "grad_norm": 4.28125, "learning_rate": 9.978587742672251e-06, "loss": 4.5166, "step": 90 }, { "epoch": 0.00475827940616673, "grad_norm": 4.6875, "learning_rate": 9.976208602969167e-06, "loss": 4.4579, "step": 100 }, { "epoch": 0.005234107346783403, "grad_norm": 4.4375, "learning_rate": 9.973829463266083e-06, "loss": 4.6196, "step": 110 }, { "epoch": 0.005709935287400076, "grad_norm": 4.46875, "learning_rate": 9.971450323563e-06, "loss": 4.4935, "step": 120 }, { "epoch": 0.006185763228016749, "grad_norm": 4.53125, "learning_rate": 9.969071183859917e-06, "loss": 4.583, "step": 130 }, { "epoch": 0.006661591168633422, "grad_norm": 4.4375, "learning_rate": 9.966692044156834e-06, "loss": 4.5623, "step": 140 }, { "epoch": 0.007137419109250095, "grad_norm": 4.625, "learning_rate": 9.96431290445375e-06, "loss": 4.4606, "step": 150 }, { "epoch": 0.007613247049866768, "grad_norm": 4.59375, "learning_rate": 9.961933764750666e-06, "loss": 4.4361, "step": 160 }, { "epoch": 0.008089074990483442, "grad_norm": 4.5, "learning_rate": 9.959554625047583e-06, "loss": 4.4327, "step": 170 }, { "epoch": 0.008564902931100115, "grad_norm": 4.625, "learning_rate": 9.9571754853445e-06, "loss": 4.5662, "step": 180 }, { "epoch": 0.009040730871716788, "grad_norm": 4.375, "learning_rate": 9.954796345641417e-06, "loss": 4.6018, "step": 190 }, { "epoch": 0.00951655881233346, "grad_norm": 4.15625, "learning_rate": 9.952417205938334e-06, "loss": 4.4694, "step": 200 }, { "epoch": 0.009992386752950133, "grad_norm": 4.65625, "learning_rate": 9.950038066235251e-06, "loss": 4.5537, "step": 210 }, { "epoch": 0.010468214693566806, "grad_norm": 4.25, "learning_rate": 9.947658926532166e-06, "loss": 4.3419, "step": 220 }, { "epoch": 0.01094404263418348, "grad_norm": 4.65625, "learning_rate": 9.945279786829083e-06, "loss": 4.4068, "step": 230 }, { "epoch": 0.011419870574800152, "grad_norm": 4.375, "learning_rate": 9.942900647126e-06, "loss": 4.4127, "step": 240 }, { "epoch": 0.011895698515416825, "grad_norm": 4.21875, "learning_rate": 9.940521507422917e-06, "loss": 4.5509, "step": 250 }, { "epoch": 0.012371526456033498, "grad_norm": 4.46875, "learning_rate": 9.938142367719832e-06, "loss": 4.4809, "step": 260 }, { "epoch": 0.012847354396650171, "grad_norm": 4.125, "learning_rate": 9.93576322801675e-06, "loss": 4.4099, "step": 270 }, { "epoch": 0.013323182337266844, "grad_norm": 4.0, "learning_rate": 9.933384088313666e-06, "loss": 4.4928, "step": 280 }, { "epoch": 0.013799010277883517, "grad_norm": 4.46875, "learning_rate": 9.931004948610583e-06, "loss": 4.5191, "step": 290 }, { "epoch": 0.01427483821850019, "grad_norm": 4.5, "learning_rate": 9.9286258089075e-06, "loss": 4.437, "step": 300 }, { "epoch": 0.014750666159116863, "grad_norm": 4.1875, "learning_rate": 9.926246669204417e-06, "loss": 4.4929, "step": 310 }, { "epoch": 0.015226494099733536, "grad_norm": 4.125, "learning_rate": 9.923867529501334e-06, "loss": 4.5431, "step": 320 }, { "epoch": 0.01570232204035021, "grad_norm": 4.03125, "learning_rate": 9.921488389798249e-06, "loss": 4.4964, "step": 330 }, { "epoch": 0.016178149980966883, "grad_norm": 3.96875, "learning_rate": 9.919109250095167e-06, "loss": 4.6289, "step": 340 }, { "epoch": 0.016653977921583556, "grad_norm": 4.875, "learning_rate": 9.916730110392083e-06, "loss": 4.542, "step": 350 }, { "epoch": 0.01712980586220023, "grad_norm": 3.90625, "learning_rate": 9.914350970689e-06, "loss": 4.5346, "step": 360 }, { "epoch": 0.017605633802816902, "grad_norm": 4.0, "learning_rate": 9.911971830985917e-06, "loss": 4.4476, "step": 370 }, { "epoch": 0.018081461743433575, "grad_norm": 4.40625, "learning_rate": 9.909592691282833e-06, "loss": 4.4633, "step": 380 }, { "epoch": 0.018557289684050248, "grad_norm": 4.46875, "learning_rate": 9.90721355157975e-06, "loss": 4.4198, "step": 390 }, { "epoch": 0.01903311762466692, "grad_norm": 4.65625, "learning_rate": 9.904834411876666e-06, "loss": 4.4368, "step": 400 }, { "epoch": 0.019508945565283594, "grad_norm": 4.1875, "learning_rate": 9.902455272173584e-06, "loss": 4.1835, "step": 410 }, { "epoch": 0.019984773505900267, "grad_norm": 4.125, "learning_rate": 9.9000761324705e-06, "loss": 4.4864, "step": 420 }, { "epoch": 0.02046060144651694, "grad_norm": 4.375, "learning_rate": 9.897696992767416e-06, "loss": 4.469, "step": 430 }, { "epoch": 0.020936429387133613, "grad_norm": 3.953125, "learning_rate": 9.895317853064332e-06, "loss": 4.4666, "step": 440 }, { "epoch": 0.021412257327750286, "grad_norm": 4.25, "learning_rate": 9.89293871336125e-06, "loss": 4.4126, "step": 450 }, { "epoch": 0.02188808526836696, "grad_norm": 3.953125, "learning_rate": 9.890559573658165e-06, "loss": 4.2976, "step": 460 }, { "epoch": 0.02236391320898363, "grad_norm": 4.0625, "learning_rate": 9.888180433955082e-06, "loss": 4.3031, "step": 470 }, { "epoch": 0.022839741149600305, "grad_norm": 4.28125, "learning_rate": 9.885801294252e-06, "loss": 4.2988, "step": 480 }, { "epoch": 0.023315569090216978, "grad_norm": 4.4375, "learning_rate": 9.883422154548916e-06, "loss": 4.4427, "step": 490 }, { "epoch": 0.02379139703083365, "grad_norm": 4.15625, "learning_rate": 9.881043014845833e-06, "loss": 4.4411, "step": 500 }, { "epoch": 0.024267224971450323, "grad_norm": 4.125, "learning_rate": 9.878663875142748e-06, "loss": 4.2888, "step": 510 }, { "epoch": 0.024743052912066996, "grad_norm": 4.34375, "learning_rate": 9.876284735439667e-06, "loss": 4.432, "step": 520 }, { "epoch": 0.02521888085268367, "grad_norm": 4.15625, "learning_rate": 9.873905595736582e-06, "loss": 4.4826, "step": 530 }, { "epoch": 0.025694708793300342, "grad_norm": 4.46875, "learning_rate": 9.871526456033499e-06, "loss": 4.2504, "step": 540 }, { "epoch": 0.026170536733917015, "grad_norm": 4.34375, "learning_rate": 9.869147316330416e-06, "loss": 4.3611, "step": 550 }, { "epoch": 0.026646364674533688, "grad_norm": 3.84375, "learning_rate": 9.866768176627333e-06, "loss": 4.38, "step": 560 }, { "epoch": 0.02712219261515036, "grad_norm": 4.34375, "learning_rate": 9.86438903692425e-06, "loss": 4.2997, "step": 570 }, { "epoch": 0.027598020555767034, "grad_norm": 4.1875, "learning_rate": 9.862009897221165e-06, "loss": 4.3505, "step": 580 }, { "epoch": 0.028073848496383707, "grad_norm": 4.15625, "learning_rate": 9.859630757518084e-06, "loss": 4.3777, "step": 590 }, { "epoch": 0.02854967643700038, "grad_norm": 4.1875, "learning_rate": 9.857251617814999e-06, "loss": 4.3189, "step": 600 }, { "epoch": 0.029025504377617053, "grad_norm": 3.9375, "learning_rate": 9.854872478111916e-06, "loss": 4.4829, "step": 610 }, { "epoch": 0.029501332318233726, "grad_norm": 3.828125, "learning_rate": 9.852493338408831e-06, "loss": 4.3622, "step": 620 }, { "epoch": 0.0299771602588504, "grad_norm": 4.46875, "learning_rate": 9.85011419870575e-06, "loss": 4.3517, "step": 630 }, { "epoch": 0.03045298819946707, "grad_norm": 4.375, "learning_rate": 9.847735059002665e-06, "loss": 4.3749, "step": 640 }, { "epoch": 0.030928816140083745, "grad_norm": 4.34375, "learning_rate": 9.845355919299582e-06, "loss": 4.446, "step": 650 }, { "epoch": 0.03140464408070042, "grad_norm": 4.71875, "learning_rate": 9.842976779596499e-06, "loss": 4.2799, "step": 660 }, { "epoch": 0.031880472021317094, "grad_norm": 4.4375, "learning_rate": 9.840597639893415e-06, "loss": 4.2859, "step": 670 }, { "epoch": 0.03235629996193377, "grad_norm": 4.09375, "learning_rate": 9.838218500190332e-06, "loss": 4.3998, "step": 680 }, { "epoch": 0.03283212790255044, "grad_norm": 4.34375, "learning_rate": 9.835839360487248e-06, "loss": 4.4179, "step": 690 }, { "epoch": 0.03330795584316711, "grad_norm": 4.03125, "learning_rate": 9.833460220784166e-06, "loss": 4.3138, "step": 700 }, { "epoch": 0.033783783783783786, "grad_norm": 3.796875, "learning_rate": 9.831081081081081e-06, "loss": 4.363, "step": 710 }, { "epoch": 0.03425961172440046, "grad_norm": 4.15625, "learning_rate": 9.828701941377998e-06, "loss": 4.4096, "step": 720 }, { "epoch": 0.03473543966501713, "grad_norm": 4.09375, "learning_rate": 9.826322801674915e-06, "loss": 4.2898, "step": 730 }, { "epoch": 0.035211267605633804, "grad_norm": 3.859375, "learning_rate": 9.823943661971832e-06, "loss": 4.3576, "step": 740 }, { "epoch": 0.03568709554625048, "grad_norm": 4.15625, "learning_rate": 9.821564522268749e-06, "loss": 4.4508, "step": 750 }, { "epoch": 0.03616292348686715, "grad_norm": 4.03125, "learning_rate": 9.819185382565664e-06, "loss": 4.4256, "step": 760 }, { "epoch": 0.03663875142748382, "grad_norm": 4.21875, "learning_rate": 9.816806242862583e-06, "loss": 4.4419, "step": 770 }, { "epoch": 0.037114579368100496, "grad_norm": 4.21875, "learning_rate": 9.814427103159498e-06, "loss": 4.4367, "step": 780 }, { "epoch": 0.03759040730871717, "grad_norm": 4.375, "learning_rate": 9.812047963456415e-06, "loss": 4.3371, "step": 790 }, { "epoch": 0.03806623524933384, "grad_norm": 4.09375, "learning_rate": 9.80966882375333e-06, "loss": 4.3802, "step": 800 }, { "epoch": 0.038542063189950515, "grad_norm": 3.828125, "learning_rate": 9.807289684050249e-06, "loss": 4.3758, "step": 810 }, { "epoch": 0.03901789113056719, "grad_norm": 4.375, "learning_rate": 9.804910544347164e-06, "loss": 4.1429, "step": 820 }, { "epoch": 0.03949371907118386, "grad_norm": 3.921875, "learning_rate": 9.802531404644081e-06, "loss": 4.4294, "step": 830 }, { "epoch": 0.039969547011800534, "grad_norm": 4.25, "learning_rate": 9.800152264940998e-06, "loss": 4.4945, "step": 840 }, { "epoch": 0.04044537495241721, "grad_norm": 4.09375, "learning_rate": 9.797773125237915e-06, "loss": 4.3223, "step": 850 }, { "epoch": 0.04092120289303388, "grad_norm": 4.0625, "learning_rate": 9.795393985534832e-06, "loss": 4.196, "step": 860 }, { "epoch": 0.04139703083365055, "grad_norm": 4.09375, "learning_rate": 9.793014845831747e-06, "loss": 4.3027, "step": 870 }, { "epoch": 0.041872858774267226, "grad_norm": 4.25, "learning_rate": 9.790635706128666e-06, "loss": 4.2827, "step": 880 }, { "epoch": 0.0423486867148839, "grad_norm": 4.28125, "learning_rate": 9.78825656642558e-06, "loss": 4.3437, "step": 890 }, { "epoch": 0.04282451465550057, "grad_norm": 3.890625, "learning_rate": 9.785877426722498e-06, "loss": 4.3413, "step": 900 }, { "epoch": 0.043300342596117244, "grad_norm": 4.03125, "learning_rate": 9.783498287019415e-06, "loss": 4.4435, "step": 910 }, { "epoch": 0.04377617053673392, "grad_norm": 4.5625, "learning_rate": 9.781119147316332e-06, "loss": 4.444, "step": 920 }, { "epoch": 0.04425199847735059, "grad_norm": 4.46875, "learning_rate": 9.778740007613249e-06, "loss": 4.1948, "step": 930 }, { "epoch": 0.04472782641796726, "grad_norm": 4.28125, "learning_rate": 9.776360867910164e-06, "loss": 4.3349, "step": 940 }, { "epoch": 0.045203654358583936, "grad_norm": 4.03125, "learning_rate": 9.77398172820708e-06, "loss": 4.3492, "step": 950 }, { "epoch": 0.04567948229920061, "grad_norm": 4.40625, "learning_rate": 9.771602588503998e-06, "loss": 4.3594, "step": 960 }, { "epoch": 0.04615531023981728, "grad_norm": 4.4375, "learning_rate": 9.769223448800914e-06, "loss": 4.4065, "step": 970 }, { "epoch": 0.046631138180433955, "grad_norm": 4.21875, "learning_rate": 9.766844309097831e-06, "loss": 4.199, "step": 980 }, { "epoch": 0.04710696612105063, "grad_norm": 4.03125, "learning_rate": 9.764465169394748e-06, "loss": 4.2399, "step": 990 }, { "epoch": 0.0475827940616673, "grad_norm": 4.3125, "learning_rate": 9.762086029691664e-06, "loss": 4.1906, "step": 1000 }, { "epoch": 0.048058622002283974, "grad_norm": 4.4375, "learning_rate": 9.75970688998858e-06, "loss": 4.1772, "step": 1010 }, { "epoch": 0.04853444994290065, "grad_norm": 4.53125, "learning_rate": 9.757327750285497e-06, "loss": 4.2576, "step": 1020 }, { "epoch": 0.04901027788351732, "grad_norm": 4.625, "learning_rate": 9.754948610582414e-06, "loss": 4.2638, "step": 1030 }, { "epoch": 0.04948610582413399, "grad_norm": 4.0, "learning_rate": 9.752569470879331e-06, "loss": 4.259, "step": 1040 }, { "epoch": 0.049961933764750666, "grad_norm": 4.34375, "learning_rate": 9.750190331176246e-06, "loss": 4.2842, "step": 1050 }, { "epoch": 0.05043776170536734, "grad_norm": 4.375, "learning_rate": 9.747811191473165e-06, "loss": 4.3745, "step": 1060 }, { "epoch": 0.05091358964598401, "grad_norm": 4.125, "learning_rate": 9.74543205177008e-06, "loss": 4.2792, "step": 1070 }, { "epoch": 0.051389417586600684, "grad_norm": 4.03125, "learning_rate": 9.743052912066997e-06, "loss": 4.2665, "step": 1080 }, { "epoch": 0.05186524552721736, "grad_norm": 3.984375, "learning_rate": 9.740673772363914e-06, "loss": 4.1578, "step": 1090 }, { "epoch": 0.05234107346783403, "grad_norm": 4.09375, "learning_rate": 9.738294632660831e-06, "loss": 4.3799, "step": 1100 }, { "epoch": 0.0528169014084507, "grad_norm": 4.375, "learning_rate": 9.735915492957748e-06, "loss": 4.3657, "step": 1110 }, { "epoch": 0.053292729349067376, "grad_norm": 4.40625, "learning_rate": 9.733536353254663e-06, "loss": 4.1869, "step": 1120 }, { "epoch": 0.05376855728968405, "grad_norm": 4.25, "learning_rate": 9.73115721355158e-06, "loss": 4.3098, "step": 1130 }, { "epoch": 0.05424438523030072, "grad_norm": 4.46875, "learning_rate": 9.728778073848497e-06, "loss": 4.2059, "step": 1140 }, { "epoch": 0.054720213170917395, "grad_norm": 4.125, "learning_rate": 9.726398934145414e-06, "loss": 4.1601, "step": 1150 }, { "epoch": 0.05519604111153407, "grad_norm": 4.125, "learning_rate": 9.72401979444233e-06, "loss": 4.3633, "step": 1160 }, { "epoch": 0.05567186905215074, "grad_norm": 4.25, "learning_rate": 9.721640654739248e-06, "loss": 4.2736, "step": 1170 }, { "epoch": 0.056147696992767414, "grad_norm": 4.46875, "learning_rate": 9.719261515036163e-06, "loss": 4.3279, "step": 1180 }, { "epoch": 0.05662352493338409, "grad_norm": 4.21875, "learning_rate": 9.71688237533308e-06, "loss": 4.1913, "step": 1190 }, { "epoch": 0.05709935287400076, "grad_norm": 4.125, "learning_rate": 9.714503235629997e-06, "loss": 4.271, "step": 1200 }, { "epoch": 0.05757518081461743, "grad_norm": 4.4375, "learning_rate": 9.712124095926914e-06, "loss": 4.3767, "step": 1210 }, { "epoch": 0.058051008755234106, "grad_norm": 4.75, "learning_rate": 9.70974495622383e-06, "loss": 4.3491, "step": 1220 }, { "epoch": 0.05852683669585078, "grad_norm": 4.40625, "learning_rate": 9.707365816520747e-06, "loss": 4.2536, "step": 1230 }, { "epoch": 0.05900266463646745, "grad_norm": 4.09375, "learning_rate": 9.704986676817664e-06, "loss": 4.1224, "step": 1240 }, { "epoch": 0.059478492577084124, "grad_norm": 4.34375, "learning_rate": 9.70260753711458e-06, "loss": 4.2798, "step": 1250 }, { "epoch": 0.0599543205177008, "grad_norm": 4.40625, "learning_rate": 9.700228397411497e-06, "loss": 4.2649, "step": 1260 }, { "epoch": 0.06043014845831747, "grad_norm": 4.375, "learning_rate": 9.697849257708413e-06, "loss": 4.3123, "step": 1270 }, { "epoch": 0.06090597639893414, "grad_norm": 4.5625, "learning_rate": 9.69547011800533e-06, "loss": 4.2308, "step": 1280 }, { "epoch": 0.061381804339550816, "grad_norm": 4.3125, "learning_rate": 9.693090978302247e-06, "loss": 4.2924, "step": 1290 }, { "epoch": 0.06185763228016749, "grad_norm": 4.34375, "learning_rate": 9.690711838599164e-06, "loss": 4.2212, "step": 1300 }, { "epoch": 0.06233346022078416, "grad_norm": 4.28125, "learning_rate": 9.68833269889608e-06, "loss": 4.2576, "step": 1310 }, { "epoch": 0.06280928816140084, "grad_norm": 4.125, "learning_rate": 9.685953559192996e-06, "loss": 4.2384, "step": 1320 }, { "epoch": 0.06328511610201751, "grad_norm": 5.03125, "learning_rate": 9.683574419489913e-06, "loss": 4.2406, "step": 1330 }, { "epoch": 0.06376094404263419, "grad_norm": 4.6875, "learning_rate": 9.68119527978683e-06, "loss": 4.1344, "step": 1340 }, { "epoch": 0.06423677198325085, "grad_norm": 4.03125, "learning_rate": 9.678816140083747e-06, "loss": 4.2252, "step": 1350 }, { "epoch": 0.06471259992386753, "grad_norm": 4.28125, "learning_rate": 9.676437000380662e-06, "loss": 4.4172, "step": 1360 }, { "epoch": 0.0651884278644842, "grad_norm": 4.34375, "learning_rate": 9.67405786067758e-06, "loss": 4.2644, "step": 1370 }, { "epoch": 0.06566425580510088, "grad_norm": 4.09375, "learning_rate": 9.671678720974496e-06, "loss": 4.1978, "step": 1380 }, { "epoch": 0.06614008374571755, "grad_norm": 4.375, "learning_rate": 9.669299581271413e-06, "loss": 4.3845, "step": 1390 }, { "epoch": 0.06661591168633423, "grad_norm": 4.21875, "learning_rate": 9.66692044156833e-06, "loss": 4.2033, "step": 1400 }, { "epoch": 0.06709173962695089, "grad_norm": 4.25, "learning_rate": 9.664541301865247e-06, "loss": 4.211, "step": 1410 }, { "epoch": 0.06756756756756757, "grad_norm": 4.0625, "learning_rate": 9.662162162162164e-06, "loss": 4.3212, "step": 1420 }, { "epoch": 0.06804339550818424, "grad_norm": 4.25, "learning_rate": 9.659783022459079e-06, "loss": 4.2142, "step": 1430 }, { "epoch": 0.06851922344880092, "grad_norm": 4.40625, "learning_rate": 9.657403882755996e-06, "loss": 4.2464, "step": 1440 }, { "epoch": 0.06899505138941758, "grad_norm": 4.3125, "learning_rate": 9.655024743052913e-06, "loss": 4.1577, "step": 1450 }, { "epoch": 0.06947087933003426, "grad_norm": 5.75, "learning_rate": 9.65264560334983e-06, "loss": 4.197, "step": 1460 }, { "epoch": 0.06994670727065093, "grad_norm": 4.46875, "learning_rate": 9.650266463646745e-06, "loss": 4.324, "step": 1470 }, { "epoch": 0.07042253521126761, "grad_norm": 4.09375, "learning_rate": 9.647887323943664e-06, "loss": 4.3239, "step": 1480 }, { "epoch": 0.07089836315188427, "grad_norm": 4.0, "learning_rate": 9.645508184240579e-06, "loss": 4.2438, "step": 1490 }, { "epoch": 0.07137419109250095, "grad_norm": 4.1875, "learning_rate": 9.643129044537496e-06, "loss": 4.2604, "step": 1500 }, { "epoch": 0.07185001903311762, "grad_norm": 4.0625, "learning_rate": 9.640749904834413e-06, "loss": 4.2468, "step": 1510 }, { "epoch": 0.0723258469737343, "grad_norm": 3.921875, "learning_rate": 9.63837076513133e-06, "loss": 4.3617, "step": 1520 }, { "epoch": 0.07280167491435097, "grad_norm": 4.3125, "learning_rate": 9.635991625428246e-06, "loss": 4.2239, "step": 1530 }, { "epoch": 0.07327750285496765, "grad_norm": 4.0625, "learning_rate": 9.633612485725162e-06, "loss": 4.3433, "step": 1540 }, { "epoch": 0.07375333079558431, "grad_norm": 4.03125, "learning_rate": 9.63123334602208e-06, "loss": 4.3165, "step": 1550 }, { "epoch": 0.07422915873620099, "grad_norm": 3.609375, "learning_rate": 9.628854206318996e-06, "loss": 4.2016, "step": 1560 }, { "epoch": 0.07470498667681766, "grad_norm": 4.1875, "learning_rate": 9.626475066615912e-06, "loss": 4.2796, "step": 1570 }, { "epoch": 0.07518081461743434, "grad_norm": 4.1875, "learning_rate": 9.62409592691283e-06, "loss": 4.3674, "step": 1580 }, { "epoch": 0.075656642558051, "grad_norm": 4.21875, "learning_rate": 9.621716787209746e-06, "loss": 4.2644, "step": 1590 }, { "epoch": 0.07613247049866768, "grad_norm": 4.40625, "learning_rate": 9.619337647506663e-06, "loss": 4.1968, "step": 1600 }, { "epoch": 0.07660829843928435, "grad_norm": 4.40625, "learning_rate": 9.616958507803578e-06, "loss": 4.0996, "step": 1610 }, { "epoch": 0.07708412637990103, "grad_norm": 4.375, "learning_rate": 9.614579368100497e-06, "loss": 4.1176, "step": 1620 }, { "epoch": 0.0775599543205177, "grad_norm": 4.5, "learning_rate": 9.612200228397412e-06, "loss": 4.2307, "step": 1630 }, { "epoch": 0.07803578226113438, "grad_norm": 4.6875, "learning_rate": 9.609821088694329e-06, "loss": 4.3369, "step": 1640 }, { "epoch": 0.07851161020175104, "grad_norm": 4.53125, "learning_rate": 9.607441948991244e-06, "loss": 4.2244, "step": 1650 }, { "epoch": 0.07898743814236772, "grad_norm": 4.3125, "learning_rate": 9.605062809288163e-06, "loss": 3.9973, "step": 1660 }, { "epoch": 0.07946326608298439, "grad_norm": 4.46875, "learning_rate": 9.602683669585078e-06, "loss": 4.357, "step": 1670 }, { "epoch": 0.07993909402360107, "grad_norm": 4.09375, "learning_rate": 9.600304529881995e-06, "loss": 4.2249, "step": 1680 }, { "epoch": 0.08041492196421773, "grad_norm": 4.84375, "learning_rate": 9.597925390178912e-06, "loss": 4.2489, "step": 1690 }, { "epoch": 0.08089074990483441, "grad_norm": 4.4375, "learning_rate": 9.595546250475829e-06, "loss": 4.2846, "step": 1700 }, { "epoch": 0.08136657784545108, "grad_norm": 4.3125, "learning_rate": 9.593167110772746e-06, "loss": 4.2162, "step": 1710 }, { "epoch": 0.08184240578606776, "grad_norm": 4.6875, "learning_rate": 9.590787971069661e-06, "loss": 4.3717, "step": 1720 }, { "epoch": 0.08231823372668443, "grad_norm": 3.890625, "learning_rate": 9.58840883136658e-06, "loss": 4.1397, "step": 1730 }, { "epoch": 0.0827940616673011, "grad_norm": 4.15625, "learning_rate": 9.586029691663495e-06, "loss": 4.1894, "step": 1740 }, { "epoch": 0.08326988960791777, "grad_norm": 4.375, "learning_rate": 9.583650551960412e-06, "loss": 4.3444, "step": 1750 }, { "epoch": 0.08374571754853445, "grad_norm": 3.984375, "learning_rate": 9.581271412257329e-06, "loss": 4.1382, "step": 1760 }, { "epoch": 0.08422154548915112, "grad_norm": 4.1875, "learning_rate": 9.578892272554246e-06, "loss": 4.0297, "step": 1770 }, { "epoch": 0.0846973734297678, "grad_norm": 4.1875, "learning_rate": 9.576513132851163e-06, "loss": 4.2987, "step": 1780 }, { "epoch": 0.08517320137038446, "grad_norm": 4.1875, "learning_rate": 9.574133993148078e-06, "loss": 4.2703, "step": 1790 }, { "epoch": 0.08564902931100114, "grad_norm": 4.25, "learning_rate": 9.571754853444996e-06, "loss": 4.3204, "step": 1800 }, { "epoch": 0.08612485725161781, "grad_norm": 4.3125, "learning_rate": 9.569375713741912e-06, "loss": 4.078, "step": 1810 }, { "epoch": 0.08660068519223449, "grad_norm": 4.78125, "learning_rate": 9.566996574038829e-06, "loss": 4.1415, "step": 1820 }, { "epoch": 0.08707651313285115, "grad_norm": 4.28125, "learning_rate": 9.564617434335744e-06, "loss": 4.2659, "step": 1830 }, { "epoch": 0.08755234107346783, "grad_norm": 4.1875, "learning_rate": 9.562238294632662e-06, "loss": 4.0825, "step": 1840 }, { "epoch": 0.0880281690140845, "grad_norm": 4.53125, "learning_rate": 9.559859154929578e-06, "loss": 4.2083, "step": 1850 }, { "epoch": 0.08850399695470118, "grad_norm": 4.6875, "learning_rate": 9.557480015226494e-06, "loss": 4.1795, "step": 1860 }, { "epoch": 0.08897982489531785, "grad_norm": 4.15625, "learning_rate": 9.555100875523411e-06, "loss": 4.1713, "step": 1870 }, { "epoch": 0.08945565283593453, "grad_norm": 4.09375, "learning_rate": 9.552721735820328e-06, "loss": 4.2574, "step": 1880 }, { "epoch": 0.08993148077655119, "grad_norm": 4.34375, "learning_rate": 9.550342596117245e-06, "loss": 4.2583, "step": 1890 }, { "epoch": 0.09040730871716787, "grad_norm": 4.3125, "learning_rate": 9.54796345641416e-06, "loss": 4.0167, "step": 1900 }, { "epoch": 0.09088313665778454, "grad_norm": 4.40625, "learning_rate": 9.545584316711079e-06, "loss": 4.1996, "step": 1910 }, { "epoch": 0.09135896459840122, "grad_norm": 4.65625, "learning_rate": 9.543205177007994e-06, "loss": 4.439, "step": 1920 }, { "epoch": 0.09183479253901788, "grad_norm": 4.21875, "learning_rate": 9.540826037304911e-06, "loss": 4.3167, "step": 1930 }, { "epoch": 0.09231062047963456, "grad_norm": 4.0, "learning_rate": 9.538446897601828e-06, "loss": 4.2007, "step": 1940 }, { "epoch": 0.09278644842025124, "grad_norm": 4.5625, "learning_rate": 9.536067757898745e-06, "loss": 4.1617, "step": 1950 }, { "epoch": 0.09326227636086791, "grad_norm": 4.03125, "learning_rate": 9.533688618195662e-06, "loss": 4.1586, "step": 1960 }, { "epoch": 0.09373810430148459, "grad_norm": 4.625, "learning_rate": 9.531309478492577e-06, "loss": 4.0431, "step": 1970 }, { "epoch": 0.09421393224210126, "grad_norm": 4.5, "learning_rate": 9.528930338789494e-06, "loss": 4.204, "step": 1980 }, { "epoch": 0.09468976018271794, "grad_norm": 4.3125, "learning_rate": 9.526551199086411e-06, "loss": 4.2457, "step": 1990 }, { "epoch": 0.0951655881233346, "grad_norm": 4.1875, "learning_rate": 9.524172059383328e-06, "loss": 4.0895, "step": 2000 }, { "epoch": 0.09564141606395128, "grad_norm": 4.25, "learning_rate": 9.521792919680243e-06, "loss": 4.2836, "step": 2010 }, { "epoch": 0.09611724400456795, "grad_norm": 4.65625, "learning_rate": 9.519413779977162e-06, "loss": 4.084, "step": 2020 }, { "epoch": 0.09659307194518463, "grad_norm": 4.3125, "learning_rate": 9.517034640274077e-06, "loss": 4.089, "step": 2030 }, { "epoch": 0.0970688998858013, "grad_norm": 4.34375, "learning_rate": 9.514655500570994e-06, "loss": 4.2711, "step": 2040 }, { "epoch": 0.09754472782641797, "grad_norm": 4.125, "learning_rate": 9.51227636086791e-06, "loss": 4.3037, "step": 2050 }, { "epoch": 0.09802055576703464, "grad_norm": 4.53125, "learning_rate": 9.509897221164828e-06, "loss": 4.2263, "step": 2060 }, { "epoch": 0.09849638370765132, "grad_norm": 4.625, "learning_rate": 9.507518081461745e-06, "loss": 4.1684, "step": 2070 }, { "epoch": 0.09897221164826799, "grad_norm": 4.40625, "learning_rate": 9.50513894175866e-06, "loss": 4.2453, "step": 2080 }, { "epoch": 0.09944803958888467, "grad_norm": 4.21875, "learning_rate": 9.502759802055578e-06, "loss": 4.2576, "step": 2090 }, { "epoch": 0.09992386752950133, "grad_norm": 4.03125, "learning_rate": 9.500380662352494e-06, "loss": 4.2989, "step": 2100 }, { "epoch": 0.10039969547011801, "grad_norm": 4.28125, "learning_rate": 9.49800152264941e-06, "loss": 4.2821, "step": 2110 }, { "epoch": 0.10087552341073468, "grad_norm": 4.53125, "learning_rate": 9.495622382946328e-06, "loss": 4.1646, "step": 2120 }, { "epoch": 0.10135135135135136, "grad_norm": 4.40625, "learning_rate": 9.493243243243244e-06, "loss": 4.2104, "step": 2130 }, { "epoch": 0.10182717929196802, "grad_norm": 4.28125, "learning_rate": 9.490864103540161e-06, "loss": 4.0943, "step": 2140 }, { "epoch": 0.1023030072325847, "grad_norm": 4.40625, "learning_rate": 9.488484963837077e-06, "loss": 4.2685, "step": 2150 }, { "epoch": 0.10277883517320137, "grad_norm": 4.375, "learning_rate": 9.486105824133993e-06, "loss": 4.1238, "step": 2160 }, { "epoch": 0.10325466311381805, "grad_norm": 4.625, "learning_rate": 9.48372668443091e-06, "loss": 4.1915, "step": 2170 }, { "epoch": 0.10373049105443471, "grad_norm": 4.15625, "learning_rate": 9.481347544727827e-06, "loss": 4.2247, "step": 2180 }, { "epoch": 0.1042063189950514, "grad_norm": 4.25, "learning_rate": 9.478968405024744e-06, "loss": 4.0967, "step": 2190 }, { "epoch": 0.10468214693566806, "grad_norm": 4.4375, "learning_rate": 9.476589265321661e-06, "loss": 4.1366, "step": 2200 }, { "epoch": 0.10515797487628474, "grad_norm": 4.375, "learning_rate": 9.474210125618576e-06, "loss": 4.1403, "step": 2210 }, { "epoch": 0.1056338028169014, "grad_norm": 4.1875, "learning_rate": 9.471830985915493e-06, "loss": 4.1623, "step": 2220 }, { "epoch": 0.10610963075751809, "grad_norm": 4.40625, "learning_rate": 9.46945184621241e-06, "loss": 4.2635, "step": 2230 }, { "epoch": 0.10658545869813475, "grad_norm": 4.53125, "learning_rate": 9.467072706509327e-06, "loss": 4.2755, "step": 2240 }, { "epoch": 0.10706128663875143, "grad_norm": 4.46875, "learning_rate": 9.464693566806244e-06, "loss": 4.1833, "step": 2250 }, { "epoch": 0.1075371145793681, "grad_norm": 4.4375, "learning_rate": 9.46231442710316e-06, "loss": 4.1004, "step": 2260 }, { "epoch": 0.10801294251998478, "grad_norm": 4.21875, "learning_rate": 9.459935287400078e-06, "loss": 4.1858, "step": 2270 }, { "epoch": 0.10848877046060144, "grad_norm": 4.28125, "learning_rate": 9.457556147696993e-06, "loss": 4.2448, "step": 2280 }, { "epoch": 0.10896459840121812, "grad_norm": 4.125, "learning_rate": 9.45517700799391e-06, "loss": 4.1015, "step": 2290 }, { "epoch": 0.10944042634183479, "grad_norm": 4.125, "learning_rate": 9.452797868290827e-06, "loss": 4.2425, "step": 2300 }, { "epoch": 0.10991625428245147, "grad_norm": 4.21875, "learning_rate": 9.450418728587744e-06, "loss": 4.2297, "step": 2310 }, { "epoch": 0.11039208222306814, "grad_norm": 3.984375, "learning_rate": 9.44803958888466e-06, "loss": 4.2152, "step": 2320 }, { "epoch": 0.11086791016368482, "grad_norm": 4.09375, "learning_rate": 9.445660449181576e-06, "loss": 4.095, "step": 2330 }, { "epoch": 0.11134373810430148, "grad_norm": 4.28125, "learning_rate": 9.443281309478493e-06, "loss": 4.0762, "step": 2340 }, { "epoch": 0.11181956604491816, "grad_norm": 4.375, "learning_rate": 9.44090216977541e-06, "loss": 4.1238, "step": 2350 }, { "epoch": 0.11229539398553483, "grad_norm": 4.4375, "learning_rate": 9.438523030072327e-06, "loss": 4.1722, "step": 2360 }, { "epoch": 0.11277122192615151, "grad_norm": 4.3125, "learning_rate": 9.436143890369244e-06, "loss": 4.1964, "step": 2370 }, { "epoch": 0.11324704986676817, "grad_norm": 4.09375, "learning_rate": 9.43376475066616e-06, "loss": 3.9835, "step": 2380 }, { "epoch": 0.11372287780738485, "grad_norm": 4.21875, "learning_rate": 9.431385610963076e-06, "loss": 4.2603, "step": 2390 }, { "epoch": 0.11419870574800152, "grad_norm": 4.625, "learning_rate": 9.429006471259993e-06, "loss": 4.2746, "step": 2400 }, { "epoch": 0.1146745336886182, "grad_norm": 4.40625, "learning_rate": 9.42662733155691e-06, "loss": 4.2664, "step": 2410 }, { "epoch": 0.11515036162923487, "grad_norm": 4.21875, "learning_rate": 9.424248191853826e-06, "loss": 4.2261, "step": 2420 }, { "epoch": 0.11562618956985155, "grad_norm": 4.65625, "learning_rate": 9.421869052150743e-06, "loss": 4.0674, "step": 2430 }, { "epoch": 0.11610201751046821, "grad_norm": 4.3125, "learning_rate": 9.41948991244766e-06, "loss": 4.1884, "step": 2440 }, { "epoch": 0.11657784545108489, "grad_norm": 4.53125, "learning_rate": 9.417110772744577e-06, "loss": 4.1652, "step": 2450 }, { "epoch": 0.11705367339170156, "grad_norm": 4.03125, "learning_rate": 9.414731633041492e-06, "loss": 4.0409, "step": 2460 }, { "epoch": 0.11752950133231824, "grad_norm": 4.28125, "learning_rate": 9.41235249333841e-06, "loss": 4.1724, "step": 2470 }, { "epoch": 0.1180053292729349, "grad_norm": 3.8125, "learning_rate": 9.409973353635326e-06, "loss": 4.1828, "step": 2480 }, { "epoch": 0.11848115721355158, "grad_norm": 4.375, "learning_rate": 9.407594213932243e-06, "loss": 4.2305, "step": 2490 }, { "epoch": 0.11895698515416825, "grad_norm": 4.875, "learning_rate": 9.405215074229158e-06, "loss": 4.1689, "step": 2500 }, { "epoch": 0.11943281309478493, "grad_norm": 4.34375, "learning_rate": 9.402835934526077e-06, "loss": 4.2455, "step": 2510 }, { "epoch": 0.1199086410354016, "grad_norm": 4.28125, "learning_rate": 9.400456794822992e-06, "loss": 4.1024, "step": 2520 }, { "epoch": 0.12038446897601827, "grad_norm": 4.21875, "learning_rate": 9.39807765511991e-06, "loss": 4.121, "step": 2530 }, { "epoch": 0.12086029691663494, "grad_norm": 4.5625, "learning_rate": 9.395698515416826e-06, "loss": 4.2474, "step": 2540 }, { "epoch": 0.12133612485725162, "grad_norm": 4.03125, "learning_rate": 9.393319375713743e-06, "loss": 3.9112, "step": 2550 }, { "epoch": 0.12181195279786829, "grad_norm": 4.03125, "learning_rate": 9.39094023601066e-06, "loss": 3.9818, "step": 2560 }, { "epoch": 0.12228778073848497, "grad_norm": 4.375, "learning_rate": 9.388561096307575e-06, "loss": 4.2175, "step": 2570 }, { "epoch": 0.12276360867910163, "grad_norm": 4.375, "learning_rate": 9.386181956604492e-06, "loss": 4.0873, "step": 2580 }, { "epoch": 0.12323943661971831, "grad_norm": 4.09375, "learning_rate": 9.383802816901409e-06, "loss": 4.1014, "step": 2590 }, { "epoch": 0.12371526456033498, "grad_norm": 4.53125, "learning_rate": 9.381423677198326e-06, "loss": 4.1899, "step": 2600 }, { "epoch": 0.12419109250095166, "grad_norm": 3.96875, "learning_rate": 9.379044537495243e-06, "loss": 4.2734, "step": 2610 }, { "epoch": 0.12466692044156832, "grad_norm": 4.4375, "learning_rate": 9.37666539779216e-06, "loss": 4.1136, "step": 2620 }, { "epoch": 0.125142748382185, "grad_norm": 4.0625, "learning_rate": 9.374286258089077e-06, "loss": 4.1539, "step": 2630 }, { "epoch": 0.12561857632280168, "grad_norm": 4.34375, "learning_rate": 9.371907118385992e-06, "loss": 4.1747, "step": 2640 }, { "epoch": 0.12609440426341834, "grad_norm": 4.53125, "learning_rate": 9.369527978682909e-06, "loss": 4.0296, "step": 2650 }, { "epoch": 0.12657023220403502, "grad_norm": 4.0625, "learning_rate": 9.367148838979826e-06, "loss": 4.1731, "step": 2660 }, { "epoch": 0.1270460601446517, "grad_norm": 4.5, "learning_rate": 9.364769699276743e-06, "loss": 4.1789, "step": 2670 }, { "epoch": 0.12752188808526838, "grad_norm": 4.0625, "learning_rate": 9.362390559573658e-06, "loss": 4.0947, "step": 2680 }, { "epoch": 0.12799771602588503, "grad_norm": 4.25, "learning_rate": 9.360011419870576e-06, "loss": 4.178, "step": 2690 }, { "epoch": 0.1284735439665017, "grad_norm": 4.4375, "learning_rate": 9.357632280167492e-06, "loss": 4.2662, "step": 2700 }, { "epoch": 0.1289493719071184, "grad_norm": 4.15625, "learning_rate": 9.355253140464409e-06, "loss": 4.0956, "step": 2710 }, { "epoch": 0.12942519984773507, "grad_norm": 4.625, "learning_rate": 9.352874000761325e-06, "loss": 4.1425, "step": 2720 }, { "epoch": 0.12990102778835172, "grad_norm": 4.21875, "learning_rate": 9.350494861058242e-06, "loss": 4.2371, "step": 2730 }, { "epoch": 0.1303768557289684, "grad_norm": 4.625, "learning_rate": 9.34811572135516e-06, "loss": 4.1974, "step": 2740 }, { "epoch": 0.13085268366958508, "grad_norm": 4.625, "learning_rate": 9.345736581652075e-06, "loss": 4.1269, "step": 2750 }, { "epoch": 0.13132851161020176, "grad_norm": 4.375, "learning_rate": 9.343357441948993e-06, "loss": 4.1207, "step": 2760 }, { "epoch": 0.1318043395508184, "grad_norm": 4.21875, "learning_rate": 9.340978302245908e-06, "loss": 4.1916, "step": 2770 }, { "epoch": 0.1322801674914351, "grad_norm": 4.65625, "learning_rate": 9.338599162542825e-06, "loss": 4.2229, "step": 2780 }, { "epoch": 0.13275599543205177, "grad_norm": 3.859375, "learning_rate": 9.336220022839742e-06, "loss": 4.2247, "step": 2790 }, { "epoch": 0.13323182337266845, "grad_norm": 4.34375, "learning_rate": 9.333840883136659e-06, "loss": 4.0776, "step": 2800 }, { "epoch": 0.1337076513132851, "grad_norm": 4.96875, "learning_rate": 9.331461743433576e-06, "loss": 4.206, "step": 2810 }, { "epoch": 0.13418347925390178, "grad_norm": 4.625, "learning_rate": 9.329082603730491e-06, "loss": 4.139, "step": 2820 }, { "epoch": 0.13465930719451846, "grad_norm": 4.15625, "learning_rate": 9.32670346402741e-06, "loss": 4.1194, "step": 2830 }, { "epoch": 0.13513513513513514, "grad_norm": 4.53125, "learning_rate": 9.324324324324325e-06, "loss": 4.1028, "step": 2840 }, { "epoch": 0.1356109630757518, "grad_norm": 4.53125, "learning_rate": 9.321945184621242e-06, "loss": 4.0715, "step": 2850 }, { "epoch": 0.13608679101636847, "grad_norm": 4.71875, "learning_rate": 9.319566044918157e-06, "loss": 4.0622, "step": 2860 }, { "epoch": 0.13656261895698515, "grad_norm": 4.65625, "learning_rate": 9.317186905215076e-06, "loss": 4.2715, "step": 2870 }, { "epoch": 0.13703844689760183, "grad_norm": 4.0625, "learning_rate": 9.314807765511991e-06, "loss": 4.178, "step": 2880 }, { "epoch": 0.1375142748382185, "grad_norm": 4.5625, "learning_rate": 9.312428625808908e-06, "loss": 4.0675, "step": 2890 }, { "epoch": 0.13799010277883517, "grad_norm": 4.5, "learning_rate": 9.310049486105825e-06, "loss": 4.2048, "step": 2900 }, { "epoch": 0.13846593071945185, "grad_norm": 4.28125, "learning_rate": 9.307670346402742e-06, "loss": 4.0821, "step": 2910 }, { "epoch": 0.13894175866006853, "grad_norm": 4.375, "learning_rate": 9.305291206699659e-06, "loss": 4.2887, "step": 2920 }, { "epoch": 0.1394175866006852, "grad_norm": 4.375, "learning_rate": 9.302912066996574e-06, "loss": 4.2213, "step": 2930 }, { "epoch": 0.13989341454130186, "grad_norm": 4.28125, "learning_rate": 9.300532927293493e-06, "loss": 4.2231, "step": 2940 }, { "epoch": 0.14036924248191854, "grad_norm": 4.53125, "learning_rate": 9.298153787590408e-06, "loss": 4.0923, "step": 2950 }, { "epoch": 0.14084507042253522, "grad_norm": 4.375, "learning_rate": 9.295774647887325e-06, "loss": 4.1716, "step": 2960 }, { "epoch": 0.1413208983631519, "grad_norm": 4.4375, "learning_rate": 9.293395508184242e-06, "loss": 4.3189, "step": 2970 }, { "epoch": 0.14179672630376855, "grad_norm": 4.59375, "learning_rate": 9.291016368481158e-06, "loss": 4.1607, "step": 2980 }, { "epoch": 0.14227255424438523, "grad_norm": 4.125, "learning_rate": 9.288637228778075e-06, "loss": 4.202, "step": 2990 }, { "epoch": 0.1427483821850019, "grad_norm": 4.40625, "learning_rate": 9.28625808907499e-06, "loss": 4.1842, "step": 3000 }, { "epoch": 0.1432242101256186, "grad_norm": 4.4375, "learning_rate": 9.283878949371908e-06, "loss": 3.993, "step": 3010 }, { "epoch": 0.14370003806623524, "grad_norm": 4.28125, "learning_rate": 9.281499809668824e-06, "loss": 4.3401, "step": 3020 }, { "epoch": 0.14417586600685192, "grad_norm": 4.375, "learning_rate": 9.279120669965741e-06, "loss": 4.2357, "step": 3030 }, { "epoch": 0.1446516939474686, "grad_norm": 4.3125, "learning_rate": 9.276741530262657e-06, "loss": 4.142, "step": 3040 }, { "epoch": 0.14512752188808528, "grad_norm": 4.40625, "learning_rate": 9.274362390559575e-06, "loss": 4.2717, "step": 3050 }, { "epoch": 0.14560334982870193, "grad_norm": 4.34375, "learning_rate": 9.27198325085649e-06, "loss": 4.0452, "step": 3060 }, { "epoch": 0.1460791777693186, "grad_norm": 4.5, "learning_rate": 9.269604111153407e-06, "loss": 4.285, "step": 3070 }, { "epoch": 0.1465550057099353, "grad_norm": 4.21875, "learning_rate": 9.267224971450324e-06, "loss": 4.088, "step": 3080 }, { "epoch": 0.14703083365055197, "grad_norm": 4.40625, "learning_rate": 9.264845831747241e-06, "loss": 4.3187, "step": 3090 }, { "epoch": 0.14750666159116863, "grad_norm": 4.25, "learning_rate": 9.262466692044158e-06, "loss": 4.2071, "step": 3100 }, { "epoch": 0.1479824895317853, "grad_norm": 4.03125, "learning_rate": 9.260087552341073e-06, "loss": 4.1892, "step": 3110 }, { "epoch": 0.14845831747240198, "grad_norm": 4.0625, "learning_rate": 9.257708412637992e-06, "loss": 4.041, "step": 3120 }, { "epoch": 0.14893414541301866, "grad_norm": 4.40625, "learning_rate": 9.255329272934907e-06, "loss": 4.027, "step": 3130 }, { "epoch": 0.14940997335363532, "grad_norm": 4.5, "learning_rate": 9.252950133231824e-06, "loss": 4.3238, "step": 3140 }, { "epoch": 0.149885801294252, "grad_norm": 4.4375, "learning_rate": 9.250570993528741e-06, "loss": 4.0881, "step": 3150 }, { "epoch": 0.15036162923486868, "grad_norm": 4.6875, "learning_rate": 9.248191853825658e-06, "loss": 4.2109, "step": 3160 }, { "epoch": 0.15083745717548536, "grad_norm": 5.09375, "learning_rate": 9.245812714122575e-06, "loss": 4.2989, "step": 3170 }, { "epoch": 0.151313285116102, "grad_norm": 4.53125, "learning_rate": 9.24343357441949e-06, "loss": 3.9884, "step": 3180 }, { "epoch": 0.1517891130567187, "grad_norm": 5.09375, "learning_rate": 9.241054434716407e-06, "loss": 4.2166, "step": 3190 }, { "epoch": 0.15226494099733537, "grad_norm": 4.65625, "learning_rate": 9.238675295013324e-06, "loss": 4.0483, "step": 3200 }, { "epoch": 0.15274076893795205, "grad_norm": 4.59375, "learning_rate": 9.23629615531024e-06, "loss": 4.0661, "step": 3210 }, { "epoch": 0.1532165968785687, "grad_norm": 4.3125, "learning_rate": 9.233917015607156e-06, "loss": 4.1935, "step": 3220 }, { "epoch": 0.15369242481918538, "grad_norm": 4.1875, "learning_rate": 9.231537875904075e-06, "loss": 4.1766, "step": 3230 }, { "epoch": 0.15416825275980206, "grad_norm": 4.59375, "learning_rate": 9.22915873620099e-06, "loss": 4.1103, "step": 3240 }, { "epoch": 0.15464408070041874, "grad_norm": 4.28125, "learning_rate": 9.226779596497907e-06, "loss": 4.1766, "step": 3250 }, { "epoch": 0.1551199086410354, "grad_norm": 4.59375, "learning_rate": 9.224400456794824e-06, "loss": 4.3148, "step": 3260 }, { "epoch": 0.15559573658165207, "grad_norm": 4.5, "learning_rate": 9.22202131709174e-06, "loss": 4.228, "step": 3270 }, { "epoch": 0.15607156452226875, "grad_norm": 4.59375, "learning_rate": 9.219642177388657e-06, "loss": 3.9987, "step": 3280 }, { "epoch": 0.15654739246288543, "grad_norm": 4.375, "learning_rate": 9.217263037685573e-06, "loss": 4.0563, "step": 3290 }, { "epoch": 0.15702322040350208, "grad_norm": 4.4375, "learning_rate": 9.214883897982491e-06, "loss": 4.3537, "step": 3300 }, { "epoch": 0.15749904834411876, "grad_norm": 4.28125, "learning_rate": 9.212504758279407e-06, "loss": 4.1466, "step": 3310 }, { "epoch": 0.15797487628473544, "grad_norm": 4.6875, "learning_rate": 9.210125618576323e-06, "loss": 4.0335, "step": 3320 }, { "epoch": 0.15845070422535212, "grad_norm": 4.75, "learning_rate": 9.20774647887324e-06, "loss": 4.1065, "step": 3330 }, { "epoch": 0.15892653216596878, "grad_norm": 4.59375, "learning_rate": 9.205367339170157e-06, "loss": 4.1503, "step": 3340 }, { "epoch": 0.15940236010658546, "grad_norm": 4.46875, "learning_rate": 9.202988199467074e-06, "loss": 4.1657, "step": 3350 }, { "epoch": 0.15987818804720214, "grad_norm": 4.3125, "learning_rate": 9.20060905976399e-06, "loss": 4.1193, "step": 3360 }, { "epoch": 0.16035401598781882, "grad_norm": 4.59375, "learning_rate": 9.198229920060906e-06, "loss": 4.2502, "step": 3370 }, { "epoch": 0.16082984392843547, "grad_norm": 4.28125, "learning_rate": 9.195850780357823e-06, "loss": 4.2415, "step": 3380 }, { "epoch": 0.16130567186905215, "grad_norm": 4.28125, "learning_rate": 9.19347164065474e-06, "loss": 3.9747, "step": 3390 }, { "epoch": 0.16178149980966883, "grad_norm": 4.375, "learning_rate": 9.191092500951657e-06, "loss": 4.0792, "step": 3400 }, { "epoch": 0.1622573277502855, "grad_norm": 4.40625, "learning_rate": 9.188713361248574e-06, "loss": 4.0455, "step": 3410 }, { "epoch": 0.16273315569090216, "grad_norm": 4.84375, "learning_rate": 9.18633422154549e-06, "loss": 4.1212, "step": 3420 }, { "epoch": 0.16320898363151884, "grad_norm": 4.4375, "learning_rate": 9.183955081842406e-06, "loss": 4.2003, "step": 3430 }, { "epoch": 0.16368481157213552, "grad_norm": 4.625, "learning_rate": 9.181575942139323e-06, "loss": 4.1628, "step": 3440 }, { "epoch": 0.1641606395127522, "grad_norm": 4.21875, "learning_rate": 9.17919680243624e-06, "loss": 4.0427, "step": 3450 }, { "epoch": 0.16463646745336885, "grad_norm": 4.71875, "learning_rate": 9.176817662733157e-06, "loss": 4.177, "step": 3460 }, { "epoch": 0.16511229539398553, "grad_norm": 3.984375, "learning_rate": 9.174438523030072e-06, "loss": 4.1297, "step": 3470 }, { "epoch": 0.1655881233346022, "grad_norm": 4.25, "learning_rate": 9.17205938332699e-06, "loss": 4.2166, "step": 3480 }, { "epoch": 0.1660639512752189, "grad_norm": 4.25, "learning_rate": 9.169680243623906e-06, "loss": 4.1994, "step": 3490 }, { "epoch": 0.16653977921583554, "grad_norm": 4.21875, "learning_rate": 9.167301103920823e-06, "loss": 4.1354, "step": 3500 }, { "epoch": 0.16701560715645222, "grad_norm": 4.125, "learning_rate": 9.16492196421774e-06, "loss": 4.0115, "step": 3510 }, { "epoch": 0.1674914350970689, "grad_norm": 4.53125, "learning_rate": 9.162542824514657e-06, "loss": 4.0431, "step": 3520 }, { "epoch": 0.16796726303768558, "grad_norm": 4.5625, "learning_rate": 9.160163684811572e-06, "loss": 4.1884, "step": 3530 }, { "epoch": 0.16844309097830223, "grad_norm": 4.15625, "learning_rate": 9.157784545108489e-06, "loss": 4.0855, "step": 3540 }, { "epoch": 0.16891891891891891, "grad_norm": 4.6875, "learning_rate": 9.155405405405406e-06, "loss": 4.014, "step": 3550 }, { "epoch": 0.1693947468595356, "grad_norm": 4.5, "learning_rate": 9.153026265702323e-06, "loss": 4.0568, "step": 3560 }, { "epoch": 0.16987057480015227, "grad_norm": 4.28125, "learning_rate": 9.15064712599924e-06, "loss": 4.1918, "step": 3570 }, { "epoch": 0.17034640274076893, "grad_norm": 4.625, "learning_rate": 9.148267986296156e-06, "loss": 4.2386, "step": 3580 }, { "epoch": 0.1708222306813856, "grad_norm": 4.59375, "learning_rate": 9.145888846593073e-06, "loss": 3.9592, "step": 3590 }, { "epoch": 0.17129805862200229, "grad_norm": 4.25, "learning_rate": 9.143509706889989e-06, "loss": 4.1108, "step": 3600 }, { "epoch": 0.17177388656261897, "grad_norm": 4.625, "learning_rate": 9.141130567186905e-06, "loss": 4.0876, "step": 3610 }, { "epoch": 0.17224971450323562, "grad_norm": 4.46875, "learning_rate": 9.138751427483822e-06, "loss": 3.9827, "step": 3620 }, { "epoch": 0.1727255424438523, "grad_norm": 4.15625, "learning_rate": 9.13637228778074e-06, "loss": 4.1071, "step": 3630 }, { "epoch": 0.17320137038446898, "grad_norm": 4.5, "learning_rate": 9.133993148077656e-06, "loss": 4.3443, "step": 3640 }, { "epoch": 0.17367719832508566, "grad_norm": 4.34375, "learning_rate": 9.131614008374573e-06, "loss": 4.0899, "step": 3650 }, { "epoch": 0.1741530262657023, "grad_norm": 4.4375, "learning_rate": 9.12923486867149e-06, "loss": 4.0653, "step": 3660 }, { "epoch": 0.174628854206319, "grad_norm": 4.71875, "learning_rate": 9.126855728968405e-06, "loss": 4.0997, "step": 3670 }, { "epoch": 0.17510468214693567, "grad_norm": 4.3125, "learning_rate": 9.124476589265322e-06, "loss": 4.1414, "step": 3680 }, { "epoch": 0.17558051008755235, "grad_norm": 12.0625, "learning_rate": 9.122097449562239e-06, "loss": 4.1036, "step": 3690 }, { "epoch": 0.176056338028169, "grad_norm": 4.375, "learning_rate": 9.119718309859156e-06, "loss": 4.1137, "step": 3700 }, { "epoch": 0.17653216596878568, "grad_norm": 4.34375, "learning_rate": 9.117339170156071e-06, "loss": 3.9828, "step": 3710 }, { "epoch": 0.17700799390940236, "grad_norm": 4.15625, "learning_rate": 9.11496003045299e-06, "loss": 4.1668, "step": 3720 }, { "epoch": 0.17748382185001904, "grad_norm": 4.53125, "learning_rate": 9.112580890749905e-06, "loss": 3.9616, "step": 3730 }, { "epoch": 0.1779596497906357, "grad_norm": 4.375, "learning_rate": 9.110201751046822e-06, "loss": 4.0062, "step": 3740 }, { "epoch": 0.17843547773125237, "grad_norm": 4.5, "learning_rate": 9.107822611343739e-06, "loss": 4.0277, "step": 3750 }, { "epoch": 0.17891130567186905, "grad_norm": 4.78125, "learning_rate": 9.105443471640656e-06, "loss": 4.2088, "step": 3760 }, { "epoch": 0.17938713361248573, "grad_norm": 4.40625, "learning_rate": 9.103064331937573e-06, "loss": 4.1252, "step": 3770 }, { "epoch": 0.17986296155310239, "grad_norm": 4.1875, "learning_rate": 9.100685192234488e-06, "loss": 4.2697, "step": 3780 }, { "epoch": 0.18033878949371906, "grad_norm": 4.3125, "learning_rate": 9.098306052531405e-06, "loss": 4.0482, "step": 3790 }, { "epoch": 0.18081461743433574, "grad_norm": 4.8125, "learning_rate": 9.095926912828322e-06, "loss": 4.1797, "step": 3800 }, { "epoch": 0.18129044537495242, "grad_norm": 4.65625, "learning_rate": 9.093547773125239e-06, "loss": 4.1134, "step": 3810 }, { "epoch": 0.18176627331556908, "grad_norm": 4.34375, "learning_rate": 9.091168633422156e-06, "loss": 4.1565, "step": 3820 }, { "epoch": 0.18224210125618576, "grad_norm": 3.859375, "learning_rate": 9.088789493719073e-06, "loss": 3.9475, "step": 3830 }, { "epoch": 0.18271792919680244, "grad_norm": 4.53125, "learning_rate": 9.08641035401599e-06, "loss": 4.0679, "step": 3840 }, { "epoch": 0.18319375713741912, "grad_norm": 4.4375, "learning_rate": 9.084031214312905e-06, "loss": 4.1552, "step": 3850 }, { "epoch": 0.18366958507803577, "grad_norm": 4.8125, "learning_rate": 9.081652074609822e-06, "loss": 4.1413, "step": 3860 }, { "epoch": 0.18414541301865245, "grad_norm": 4.40625, "learning_rate": 9.079272934906739e-06, "loss": 4.2443, "step": 3870 }, { "epoch": 0.18462124095926913, "grad_norm": 4.3125, "learning_rate": 9.076893795203655e-06, "loss": 4.1507, "step": 3880 }, { "epoch": 0.1850970688998858, "grad_norm": 4.34375, "learning_rate": 9.07451465550057e-06, "loss": 4.2882, "step": 3890 }, { "epoch": 0.1855728968405025, "grad_norm": 6.8125, "learning_rate": 9.07213551579749e-06, "loss": 4.1221, "step": 3900 }, { "epoch": 0.18604872478111914, "grad_norm": 4.5625, "learning_rate": 9.069756376094404e-06, "loss": 4.1105, "step": 3910 }, { "epoch": 0.18652455272173582, "grad_norm": 4.09375, "learning_rate": 9.067377236391321e-06, "loss": 4.2618, "step": 3920 }, { "epoch": 0.1870003806623525, "grad_norm": 4.5, "learning_rate": 9.064998096688238e-06, "loss": 4.1832, "step": 3930 }, { "epoch": 0.18747620860296918, "grad_norm": 4.46875, "learning_rate": 9.062618956985155e-06, "loss": 4.0323, "step": 3940 }, { "epoch": 0.18795203654358583, "grad_norm": 4.90625, "learning_rate": 9.060239817282072e-06, "loss": 4.2229, "step": 3950 }, { "epoch": 0.1884278644842025, "grad_norm": 4.5, "learning_rate": 9.057860677578987e-06, "loss": 4.0911, "step": 3960 }, { "epoch": 0.1889036924248192, "grad_norm": 4.34375, "learning_rate": 9.055481537875906e-06, "loss": 3.9964, "step": 3970 }, { "epoch": 0.18937952036543587, "grad_norm": 4.78125, "learning_rate": 9.053102398172821e-06, "loss": 4.0779, "step": 3980 }, { "epoch": 0.18985534830605252, "grad_norm": 4.5, "learning_rate": 9.050723258469738e-06, "loss": 4.1432, "step": 3990 }, { "epoch": 0.1903311762466692, "grad_norm": 9.375, "learning_rate": 9.048344118766655e-06, "loss": 3.977, "step": 4000 }, { "epoch": 0.19080700418728588, "grad_norm": 4.875, "learning_rate": 9.045964979063572e-06, "loss": 4.0699, "step": 4010 }, { "epoch": 0.19128283212790256, "grad_norm": 4.25, "learning_rate": 9.043585839360489e-06, "loss": 4.1791, "step": 4020 }, { "epoch": 0.19175866006851922, "grad_norm": 4.9375, "learning_rate": 9.041206699657404e-06, "loss": 4.1312, "step": 4030 }, { "epoch": 0.1922344880091359, "grad_norm": 4.4375, "learning_rate": 9.038827559954321e-06, "loss": 4.1361, "step": 4040 }, { "epoch": 0.19271031594975258, "grad_norm": 4.1875, "learning_rate": 9.036448420251238e-06, "loss": 3.9474, "step": 4050 }, { "epoch": 0.19318614389036926, "grad_norm": 4.34375, "learning_rate": 9.034069280548155e-06, "loss": 4.0059, "step": 4060 }, { "epoch": 0.1936619718309859, "grad_norm": 4.65625, "learning_rate": 9.03169014084507e-06, "loss": 3.9457, "step": 4070 }, { "epoch": 0.1941377997716026, "grad_norm": 4.1875, "learning_rate": 9.029311001141989e-06, "loss": 4.0565, "step": 4080 }, { "epoch": 0.19461362771221927, "grad_norm": 4.28125, "learning_rate": 9.026931861438904e-06, "loss": 4.0561, "step": 4090 }, { "epoch": 0.19508945565283595, "grad_norm": 4.59375, "learning_rate": 9.02455272173582e-06, "loss": 4.1572, "step": 4100 }, { "epoch": 0.1955652835934526, "grad_norm": 4.34375, "learning_rate": 9.022173582032738e-06, "loss": 4.1929, "step": 4110 }, { "epoch": 0.19604111153406928, "grad_norm": 4.53125, "learning_rate": 9.019794442329655e-06, "loss": 4.043, "step": 4120 }, { "epoch": 0.19651693947468596, "grad_norm": 4.15625, "learning_rate": 9.017415302626572e-06, "loss": 4.0222, "step": 4130 }, { "epoch": 0.19699276741530264, "grad_norm": 4.34375, "learning_rate": 9.015036162923487e-06, "loss": 4.1197, "step": 4140 }, { "epoch": 0.1974685953559193, "grad_norm": 4.53125, "learning_rate": 9.012657023220405e-06, "loss": 4.1609, "step": 4150 }, { "epoch": 0.19794442329653597, "grad_norm": 4.46875, "learning_rate": 9.01027788351732e-06, "loss": 4.1256, "step": 4160 }, { "epoch": 0.19842025123715265, "grad_norm": 4.5, "learning_rate": 9.007898743814237e-06, "loss": 4.1588, "step": 4170 }, { "epoch": 0.19889607917776933, "grad_norm": 4.4375, "learning_rate": 9.005519604111154e-06, "loss": 4.166, "step": 4180 }, { "epoch": 0.19937190711838598, "grad_norm": 4.53125, "learning_rate": 9.003140464408071e-06, "loss": 4.3228, "step": 4190 }, { "epoch": 0.19984773505900266, "grad_norm": 4.28125, "learning_rate": 9.000761324704988e-06, "loss": 3.9919, "step": 4200 }, { "epoch": 0.20032356299961934, "grad_norm": 4.15625, "learning_rate": 8.998382185001903e-06, "loss": 4.0275, "step": 4210 }, { "epoch": 0.20079939094023602, "grad_norm": 4.71875, "learning_rate": 8.99600304529882e-06, "loss": 4.2195, "step": 4220 }, { "epoch": 0.20127521888085267, "grad_norm": 4.28125, "learning_rate": 8.993623905595737e-06, "loss": 3.9919, "step": 4230 }, { "epoch": 0.20175104682146935, "grad_norm": 4.3125, "learning_rate": 8.991244765892654e-06, "loss": 4.221, "step": 4240 }, { "epoch": 0.20222687476208603, "grad_norm": 4.5, "learning_rate": 8.98886562618957e-06, "loss": 4.0229, "step": 4250 }, { "epoch": 0.20270270270270271, "grad_norm": 4.28125, "learning_rate": 8.986486486486488e-06, "loss": 4.0602, "step": 4260 }, { "epoch": 0.20317853064331937, "grad_norm": 4.5625, "learning_rate": 8.984107346783403e-06, "loss": 3.9639, "step": 4270 }, { "epoch": 0.20365435858393605, "grad_norm": 4.3125, "learning_rate": 8.98172820708032e-06, "loss": 4.1917, "step": 4280 }, { "epoch": 0.20413018652455273, "grad_norm": 4.5, "learning_rate": 8.979349067377237e-06, "loss": 4.0393, "step": 4290 }, { "epoch": 0.2046060144651694, "grad_norm": 4.40625, "learning_rate": 8.976969927674154e-06, "loss": 4.2257, "step": 4300 }, { "epoch": 0.20508184240578606, "grad_norm": 4.375, "learning_rate": 8.974590787971071e-06, "loss": 4.0432, "step": 4310 }, { "epoch": 0.20555767034640274, "grad_norm": 4.6875, "learning_rate": 8.972211648267986e-06, "loss": 4.1771, "step": 4320 }, { "epoch": 0.20603349828701942, "grad_norm": 4.40625, "learning_rate": 8.969832508564905e-06, "loss": 4.1292, "step": 4330 }, { "epoch": 0.2065093262276361, "grad_norm": 4.65625, "learning_rate": 8.96745336886182e-06, "loss": 4.0716, "step": 4340 }, { "epoch": 0.20698515416825275, "grad_norm": 4.4375, "learning_rate": 8.965074229158737e-06, "loss": 4.0873, "step": 4350 }, { "epoch": 0.20746098210886943, "grad_norm": 4.34375, "learning_rate": 8.962695089455654e-06, "loss": 4.1851, "step": 4360 }, { "epoch": 0.2079368100494861, "grad_norm": 4.4375, "learning_rate": 8.96031594975257e-06, "loss": 4.1102, "step": 4370 }, { "epoch": 0.2084126379901028, "grad_norm": 4.1875, "learning_rate": 8.957936810049488e-06, "loss": 3.9685, "step": 4380 }, { "epoch": 0.20888846593071944, "grad_norm": 4.40625, "learning_rate": 8.955557670346403e-06, "loss": 4.0001, "step": 4390 }, { "epoch": 0.20936429387133612, "grad_norm": 4.65625, "learning_rate": 8.95317853064332e-06, "loss": 4.1629, "step": 4400 }, { "epoch": 0.2098401218119528, "grad_norm": 4.78125, "learning_rate": 8.950799390940237e-06, "loss": 4.07, "step": 4410 }, { "epoch": 0.21031594975256948, "grad_norm": 4.40625, "learning_rate": 8.948420251237154e-06, "loss": 4.0415, "step": 4420 }, { "epoch": 0.21079177769318613, "grad_norm": 7.6875, "learning_rate": 8.946041111534069e-06, "loss": 4.1533, "step": 4430 }, { "epoch": 0.2112676056338028, "grad_norm": 4.5, "learning_rate": 8.943661971830987e-06, "loss": 4.0548, "step": 4440 }, { "epoch": 0.2117434335744195, "grad_norm": 4.4375, "learning_rate": 8.941282832127903e-06, "loss": 3.9344, "step": 4450 }, { "epoch": 0.21221926151503617, "grad_norm": 4.8125, "learning_rate": 8.93890369242482e-06, "loss": 4.1426, "step": 4460 }, { "epoch": 0.21269508945565282, "grad_norm": 4.25, "learning_rate": 8.936524552721736e-06, "loss": 4.0514, "step": 4470 }, { "epoch": 0.2131709173962695, "grad_norm": 4.5625, "learning_rate": 8.934145413018653e-06, "loss": 4.1646, "step": 4480 }, { "epoch": 0.21364674533688618, "grad_norm": 4.3125, "learning_rate": 8.93176627331557e-06, "loss": 4.2407, "step": 4490 }, { "epoch": 0.21412257327750286, "grad_norm": 4.4375, "learning_rate": 8.929387133612486e-06, "loss": 4.1675, "step": 4500 }, { "epoch": 0.21459840121811952, "grad_norm": 4.40625, "learning_rate": 8.927007993909404e-06, "loss": 4.0266, "step": 4510 }, { "epoch": 0.2150742291587362, "grad_norm": 4.09375, "learning_rate": 8.92462885420632e-06, "loss": 4.1883, "step": 4520 }, { "epoch": 0.21555005709935288, "grad_norm": 4.46875, "learning_rate": 8.922249714503236e-06, "loss": 4.1879, "step": 4530 }, { "epoch": 0.21602588503996956, "grad_norm": 4.78125, "learning_rate": 8.919870574800153e-06, "loss": 4.1005, "step": 4540 }, { "epoch": 0.2165017129805862, "grad_norm": 4.65625, "learning_rate": 8.91749143509707e-06, "loss": 4.2302, "step": 4550 }, { "epoch": 0.2169775409212029, "grad_norm": 4.4375, "learning_rate": 8.915112295393985e-06, "loss": 3.9854, "step": 4560 }, { "epoch": 0.21745336886181957, "grad_norm": 4.03125, "learning_rate": 8.912733155690902e-06, "loss": 4.039, "step": 4570 }, { "epoch": 0.21792919680243625, "grad_norm": 4.4375, "learning_rate": 8.910354015987819e-06, "loss": 4.0332, "step": 4580 }, { "epoch": 0.2184050247430529, "grad_norm": 4.34375, "learning_rate": 8.907974876284736e-06, "loss": 4.0565, "step": 4590 }, { "epoch": 0.21888085268366958, "grad_norm": 4.5, "learning_rate": 8.905595736581653e-06, "loss": 4.0016, "step": 4600 }, { "epoch": 0.21935668062428626, "grad_norm": 4.6875, "learning_rate": 8.90321659687857e-06, "loss": 3.9981, "step": 4610 }, { "epoch": 0.21983250856490294, "grad_norm": 5.53125, "learning_rate": 8.900837457175487e-06, "loss": 4.1127, "step": 4620 }, { "epoch": 0.2203083365055196, "grad_norm": 4.65625, "learning_rate": 8.898458317472402e-06, "loss": 3.8738, "step": 4630 }, { "epoch": 0.22078416444613627, "grad_norm": 4.3125, "learning_rate": 8.896079177769319e-06, "loss": 4.0746, "step": 4640 }, { "epoch": 0.22125999238675295, "grad_norm": 4.8125, "learning_rate": 8.893700038066236e-06, "loss": 4.201, "step": 4650 }, { "epoch": 0.22173582032736963, "grad_norm": 4.28125, "learning_rate": 8.891320898363153e-06, "loss": 4.099, "step": 4660 }, { "epoch": 0.22221164826798628, "grad_norm": 3.953125, "learning_rate": 8.88894175866007e-06, "loss": 4.0299, "step": 4670 }, { "epoch": 0.22268747620860296, "grad_norm": 4.65625, "learning_rate": 8.886562618956985e-06, "loss": 4.2018, "step": 4680 }, { "epoch": 0.22316330414921964, "grad_norm": 4.84375, "learning_rate": 8.884183479253904e-06, "loss": 4.1384, "step": 4690 }, { "epoch": 0.22363913208983632, "grad_norm": 4.5, "learning_rate": 8.881804339550819e-06, "loss": 4.2489, "step": 4700 }, { "epoch": 0.22411496003045298, "grad_norm": 4.21875, "learning_rate": 8.879425199847736e-06, "loss": 4.3085, "step": 4710 }, { "epoch": 0.22459078797106966, "grad_norm": 4.625, "learning_rate": 8.877046060144653e-06, "loss": 4.1457, "step": 4720 }, { "epoch": 0.22506661591168634, "grad_norm": 4.5625, "learning_rate": 8.87466692044157e-06, "loss": 4.1303, "step": 4730 }, { "epoch": 0.22554244385230302, "grad_norm": 4.46875, "learning_rate": 8.872287780738485e-06, "loss": 4.2847, "step": 4740 }, { "epoch": 0.22601827179291967, "grad_norm": 4.46875, "learning_rate": 8.869908641035402e-06, "loss": 4.1697, "step": 4750 }, { "epoch": 0.22649409973353635, "grad_norm": 4.1875, "learning_rate": 8.867529501332319e-06, "loss": 4.1714, "step": 4760 }, { "epoch": 0.22696992767415303, "grad_norm": 4.59375, "learning_rate": 8.865150361629235e-06, "loss": 4.0514, "step": 4770 }, { "epoch": 0.2274457556147697, "grad_norm": 4.46875, "learning_rate": 8.862771221926152e-06, "loss": 3.9517, "step": 4780 }, { "epoch": 0.22792158355538636, "grad_norm": 4.65625, "learning_rate": 8.86039208222307e-06, "loss": 3.9799, "step": 4790 }, { "epoch": 0.22839741149600304, "grad_norm": 4.4375, "learning_rate": 8.858012942519986e-06, "loss": 4.0086, "step": 4800 }, { "epoch": 0.22887323943661972, "grad_norm": 4.6875, "learning_rate": 8.855633802816901e-06, "loss": 4.0984, "step": 4810 }, { "epoch": 0.2293490673772364, "grad_norm": 4.34375, "learning_rate": 8.853254663113818e-06, "loss": 4.0395, "step": 4820 }, { "epoch": 0.22982489531785305, "grad_norm": 5.34375, "learning_rate": 8.850875523410735e-06, "loss": 4.1219, "step": 4830 }, { "epoch": 0.23030072325846973, "grad_norm": 4.71875, "learning_rate": 8.848496383707652e-06, "loss": 4.1857, "step": 4840 }, { "epoch": 0.2307765511990864, "grad_norm": 4.3125, "learning_rate": 8.846117244004569e-06, "loss": 4.1326, "step": 4850 }, { "epoch": 0.2312523791397031, "grad_norm": 4.78125, "learning_rate": 8.843738104301486e-06, "loss": 4.0886, "step": 4860 }, { "epoch": 0.23172820708031977, "grad_norm": 4.5625, "learning_rate": 8.841358964598403e-06, "loss": 3.9277, "step": 4870 }, { "epoch": 0.23220403502093642, "grad_norm": 4.84375, "learning_rate": 8.838979824895318e-06, "loss": 4.1393, "step": 4880 }, { "epoch": 0.2326798629615531, "grad_norm": 4.375, "learning_rate": 8.836600685192235e-06, "loss": 4.1588, "step": 4890 }, { "epoch": 0.23315569090216978, "grad_norm": 4.625, "learning_rate": 8.834221545489152e-06, "loss": 4.2485, "step": 4900 }, { "epoch": 0.23363151884278646, "grad_norm": 4.59375, "learning_rate": 8.831842405786069e-06, "loss": 4.072, "step": 4910 }, { "epoch": 0.23410734678340311, "grad_norm": 4.8125, "learning_rate": 8.829463266082984e-06, "loss": 4.0892, "step": 4920 }, { "epoch": 0.2345831747240198, "grad_norm": 4.375, "learning_rate": 8.827084126379901e-06, "loss": 4.1413, "step": 4930 }, { "epoch": 0.23505900266463647, "grad_norm": 4.4375, "learning_rate": 8.824704986676818e-06, "loss": 4.0727, "step": 4940 }, { "epoch": 0.23553483060525315, "grad_norm": 4.4375, "learning_rate": 8.822325846973735e-06, "loss": 3.9788, "step": 4950 }, { "epoch": 0.2360106585458698, "grad_norm": 4.59375, "learning_rate": 8.819946707270652e-06, "loss": 4.203, "step": 4960 }, { "epoch": 0.23648648648648649, "grad_norm": 4.125, "learning_rate": 8.817567567567569e-06, "loss": 4.0701, "step": 4970 }, { "epoch": 0.23696231442710317, "grad_norm": 4.71875, "learning_rate": 8.815188427864486e-06, "loss": 4.1479, "step": 4980 }, { "epoch": 0.23743814236771985, "grad_norm": 4.46875, "learning_rate": 8.8128092881614e-06, "loss": 4.1856, "step": 4990 }, { "epoch": 0.2379139703083365, "grad_norm": 4.25, "learning_rate": 8.810430148458318e-06, "loss": 4.0174, "step": 5000 }, { "epoch": 0.23838979824895318, "grad_norm": 4.3125, "learning_rate": 8.808051008755235e-06, "loss": 4.1041, "step": 5010 }, { "epoch": 0.23886562618956986, "grad_norm": 4.4375, "learning_rate": 8.805671869052152e-06, "loss": 4.0777, "step": 5020 }, { "epoch": 0.23934145413018654, "grad_norm": 4.71875, "learning_rate": 8.803292729349068e-06, "loss": 4.1021, "step": 5030 }, { "epoch": 0.2398172820708032, "grad_norm": 4.40625, "learning_rate": 8.800913589645985e-06, "loss": 3.9755, "step": 5040 }, { "epoch": 0.24029311001141987, "grad_norm": 4.3125, "learning_rate": 8.798534449942902e-06, "loss": 3.9681, "step": 5050 }, { "epoch": 0.24076893795203655, "grad_norm": 4.90625, "learning_rate": 8.796155310239818e-06, "loss": 4.08, "step": 5060 }, { "epoch": 0.24124476589265323, "grad_norm": 4.625, "learning_rate": 8.793776170536734e-06, "loss": 3.9981, "step": 5070 }, { "epoch": 0.24172059383326988, "grad_norm": 4.34375, "learning_rate": 8.791397030833651e-06, "loss": 4.1835, "step": 5080 }, { "epoch": 0.24219642177388656, "grad_norm": 4.71875, "learning_rate": 8.789017891130568e-06, "loss": 4.1707, "step": 5090 }, { "epoch": 0.24267224971450324, "grad_norm": 4.34375, "learning_rate": 8.786638751427483e-06, "loss": 4.1127, "step": 5100 }, { "epoch": 0.24314807765511992, "grad_norm": 4.59375, "learning_rate": 8.784259611724402e-06, "loss": 4.1094, "step": 5110 }, { "epoch": 0.24362390559573657, "grad_norm": 4.71875, "learning_rate": 8.781880472021317e-06, "loss": 4.0505, "step": 5120 }, { "epoch": 0.24409973353635325, "grad_norm": 4.46875, "learning_rate": 8.779501332318234e-06, "loss": 4.11, "step": 5130 }, { "epoch": 0.24457556147696993, "grad_norm": 4.78125, "learning_rate": 8.777122192615151e-06, "loss": 4.1371, "step": 5140 }, { "epoch": 0.2450513894175866, "grad_norm": 4.1875, "learning_rate": 8.774743052912068e-06, "loss": 4.1018, "step": 5150 }, { "epoch": 0.24552721735820326, "grad_norm": 4.34375, "learning_rate": 8.772363913208985e-06, "loss": 4.2735, "step": 5160 }, { "epoch": 0.24600304529881994, "grad_norm": 4.28125, "learning_rate": 8.7699847735059e-06, "loss": 4.1887, "step": 5170 }, { "epoch": 0.24647887323943662, "grad_norm": 4.34375, "learning_rate": 8.767605633802819e-06, "loss": 4.1659, "step": 5180 }, { "epoch": 0.2469547011800533, "grad_norm": 4.4375, "learning_rate": 8.765226494099734e-06, "loss": 4.1768, "step": 5190 }, { "epoch": 0.24743052912066996, "grad_norm": 4.46875, "learning_rate": 8.762847354396651e-06, "loss": 4.0428, "step": 5200 }, { "epoch": 0.24790635706128664, "grad_norm": 4.34375, "learning_rate": 8.760468214693568e-06, "loss": 4.1032, "step": 5210 }, { "epoch": 0.24838218500190332, "grad_norm": 4.28125, "learning_rate": 8.758089074990485e-06, "loss": 4.0263, "step": 5220 }, { "epoch": 0.24885801294252, "grad_norm": 4.40625, "learning_rate": 8.755709935287402e-06, "loss": 4.0336, "step": 5230 }, { "epoch": 0.24933384088313665, "grad_norm": 4.65625, "learning_rate": 8.753330795584317e-06, "loss": 4.0243, "step": 5240 }, { "epoch": 0.24980966882375333, "grad_norm": 4.5625, "learning_rate": 8.750951655881234e-06, "loss": 4.2253, "step": 5250 }, { "epoch": 0.25028549676437, "grad_norm": 4.53125, "learning_rate": 8.74857251617815e-06, "loss": 4.0178, "step": 5260 }, { "epoch": 0.2507613247049867, "grad_norm": 4.34375, "learning_rate": 8.746193376475068e-06, "loss": 3.9313, "step": 5270 }, { "epoch": 0.25123715264560337, "grad_norm": 4.8125, "learning_rate": 8.743814236771983e-06, "loss": 4.0955, "step": 5280 }, { "epoch": 0.25171298058622005, "grad_norm": 4.53125, "learning_rate": 8.741435097068901e-06, "loss": 4.1099, "step": 5290 }, { "epoch": 0.25218880852683667, "grad_norm": 4.0625, "learning_rate": 8.739055957365817e-06, "loss": 4.0528, "step": 5300 }, { "epoch": 0.25266463646745335, "grad_norm": 4.21875, "learning_rate": 8.736676817662734e-06, "loss": 4.0575, "step": 5310 }, { "epoch": 0.25314046440807003, "grad_norm": 4.59375, "learning_rate": 8.73429767795965e-06, "loss": 4.0546, "step": 5320 }, { "epoch": 0.2536162923486867, "grad_norm": 4.59375, "learning_rate": 8.731918538256567e-06, "loss": 4.2696, "step": 5330 }, { "epoch": 0.2540921202893034, "grad_norm": 4.28125, "learning_rate": 8.729539398553484e-06, "loss": 4.134, "step": 5340 }, { "epoch": 0.25456794822992007, "grad_norm": 4.4375, "learning_rate": 8.7271602588504e-06, "loss": 4.0941, "step": 5350 }, { "epoch": 0.25504377617053675, "grad_norm": 3.90625, "learning_rate": 8.724781119147318e-06, "loss": 4.3053, "step": 5360 }, { "epoch": 0.25551960411115343, "grad_norm": 4.4375, "learning_rate": 8.722401979444233e-06, "loss": 4.2408, "step": 5370 }, { "epoch": 0.25599543205177006, "grad_norm": 4.03125, "learning_rate": 8.72002283974115e-06, "loss": 4.1579, "step": 5380 }, { "epoch": 0.25647125999238674, "grad_norm": 4.59375, "learning_rate": 8.717643700038067e-06, "loss": 4.109, "step": 5390 }, { "epoch": 0.2569470879330034, "grad_norm": 4.75, "learning_rate": 8.715264560334984e-06, "loss": 4.0685, "step": 5400 }, { "epoch": 0.2574229158736201, "grad_norm": 4.59375, "learning_rate": 8.712885420631901e-06, "loss": 4.1139, "step": 5410 }, { "epoch": 0.2578987438142368, "grad_norm": 4.28125, "learning_rate": 8.710506280928816e-06, "loss": 4.0187, "step": 5420 }, { "epoch": 0.25837457175485345, "grad_norm": 4.03125, "learning_rate": 8.708127141225733e-06, "loss": 4.1273, "step": 5430 }, { "epoch": 0.25885039969547013, "grad_norm": 3.9375, "learning_rate": 8.70574800152265e-06, "loss": 4.2265, "step": 5440 }, { "epoch": 0.2593262276360868, "grad_norm": 4.59375, "learning_rate": 8.703368861819567e-06, "loss": 3.9793, "step": 5450 }, { "epoch": 0.25980205557670344, "grad_norm": 5.0, "learning_rate": 8.700989722116482e-06, "loss": 4.0223, "step": 5460 }, { "epoch": 0.2602778835173201, "grad_norm": 4.65625, "learning_rate": 8.698610582413401e-06, "loss": 4.1453, "step": 5470 }, { "epoch": 0.2607537114579368, "grad_norm": 4.5625, "learning_rate": 8.696231442710316e-06, "loss": 3.9461, "step": 5480 }, { "epoch": 0.2612295393985535, "grad_norm": 4.4375, "learning_rate": 8.693852303007233e-06, "loss": 4.1937, "step": 5490 }, { "epoch": 0.26170536733917016, "grad_norm": 4.34375, "learning_rate": 8.69147316330415e-06, "loss": 4.2928, "step": 5500 }, { "epoch": 0.26218119527978684, "grad_norm": 4.25, "learning_rate": 8.689094023601067e-06, "loss": 4.0684, "step": 5510 }, { "epoch": 0.2626570232204035, "grad_norm": 4.6875, "learning_rate": 8.686714883897984e-06, "loss": 4.257, "step": 5520 }, { "epoch": 0.2631328511610202, "grad_norm": 4.28125, "learning_rate": 8.684335744194899e-06, "loss": 4.0779, "step": 5530 }, { "epoch": 0.2636086791016368, "grad_norm": 4.375, "learning_rate": 8.681956604491818e-06, "loss": 4.0512, "step": 5540 }, { "epoch": 0.2640845070422535, "grad_norm": 4.46875, "learning_rate": 8.679577464788733e-06, "loss": 4.1395, "step": 5550 }, { "epoch": 0.2645603349828702, "grad_norm": 4.6875, "learning_rate": 8.67719832508565e-06, "loss": 4.0707, "step": 5560 }, { "epoch": 0.26503616292348686, "grad_norm": 4.4375, "learning_rate": 8.674819185382567e-06, "loss": 4.2878, "step": 5570 }, { "epoch": 0.26551199086410354, "grad_norm": 4.34375, "learning_rate": 8.672440045679484e-06, "loss": 4.0355, "step": 5580 }, { "epoch": 0.2659878188047202, "grad_norm": 4.53125, "learning_rate": 8.670060905976399e-06, "loss": 4.0561, "step": 5590 }, { "epoch": 0.2664636467453369, "grad_norm": 4.84375, "learning_rate": 8.667681766273316e-06, "loss": 4.1909, "step": 5600 }, { "epoch": 0.2669394746859536, "grad_norm": 4.46875, "learning_rate": 8.665302626570233e-06, "loss": 4.126, "step": 5610 }, { "epoch": 0.2674153026265702, "grad_norm": 4.09375, "learning_rate": 8.66292348686715e-06, "loss": 4.1033, "step": 5620 }, { "epoch": 0.2678911305671869, "grad_norm": 4.75, "learning_rate": 8.660544347164066e-06, "loss": 4.137, "step": 5630 }, { "epoch": 0.26836695850780357, "grad_norm": 4.90625, "learning_rate": 8.658165207460982e-06, "loss": 4.0266, "step": 5640 }, { "epoch": 0.26884278644842025, "grad_norm": 6.09375, "learning_rate": 8.6557860677579e-06, "loss": 4.1615, "step": 5650 }, { "epoch": 0.2693186143890369, "grad_norm": 4.53125, "learning_rate": 8.653406928054815e-06, "loss": 4.0524, "step": 5660 }, { "epoch": 0.2697944423296536, "grad_norm": 4.53125, "learning_rate": 8.651027788351732e-06, "loss": 3.9766, "step": 5670 }, { "epoch": 0.2702702702702703, "grad_norm": 4.34375, "learning_rate": 8.64864864864865e-06, "loss": 3.9695, "step": 5680 }, { "epoch": 0.27074609821088697, "grad_norm": 4.4375, "learning_rate": 8.646269508945566e-06, "loss": 4.0698, "step": 5690 }, { "epoch": 0.2712219261515036, "grad_norm": 4.84375, "learning_rate": 8.643890369242483e-06, "loss": 4.0105, "step": 5700 }, { "epoch": 0.27169775409212027, "grad_norm": 4.75, "learning_rate": 8.641511229539398e-06, "loss": 3.913, "step": 5710 }, { "epoch": 0.27217358203273695, "grad_norm": 3.84375, "learning_rate": 8.639132089836317e-06, "loss": 4.0829, "step": 5720 }, { "epoch": 0.27264940997335363, "grad_norm": 4.4375, "learning_rate": 8.636752950133232e-06, "loss": 3.9916, "step": 5730 }, { "epoch": 0.2731252379139703, "grad_norm": 4.40625, "learning_rate": 8.634373810430149e-06, "loss": 4.099, "step": 5740 }, { "epoch": 0.273601065854587, "grad_norm": 4.5, "learning_rate": 8.631994670727066e-06, "loss": 4.186, "step": 5750 }, { "epoch": 0.27407689379520367, "grad_norm": 4.59375, "learning_rate": 8.629615531023983e-06, "loss": 4.091, "step": 5760 }, { "epoch": 0.27455272173582035, "grad_norm": 4.40625, "learning_rate": 8.627236391320898e-06, "loss": 4.1782, "step": 5770 }, { "epoch": 0.275028549676437, "grad_norm": 4.5625, "learning_rate": 8.624857251617815e-06, "loss": 4.0917, "step": 5780 }, { "epoch": 0.27550437761705365, "grad_norm": 5.21875, "learning_rate": 8.622478111914732e-06, "loss": 4.0848, "step": 5790 }, { "epoch": 0.27598020555767033, "grad_norm": 4.1875, "learning_rate": 8.620098972211649e-06, "loss": 3.9458, "step": 5800 }, { "epoch": 0.276456033498287, "grad_norm": 4.0, "learning_rate": 8.617719832508566e-06, "loss": 4.0517, "step": 5810 }, { "epoch": 0.2769318614389037, "grad_norm": 4.09375, "learning_rate": 8.615340692805483e-06, "loss": 4.0085, "step": 5820 }, { "epoch": 0.2774076893795204, "grad_norm": 5.03125, "learning_rate": 8.6129615531024e-06, "loss": 4.0754, "step": 5830 }, { "epoch": 0.27788351732013705, "grad_norm": 4.46875, "learning_rate": 8.610582413399315e-06, "loss": 4.078, "step": 5840 }, { "epoch": 0.27835934526075373, "grad_norm": 4.5625, "learning_rate": 8.608203273696232e-06, "loss": 4.1811, "step": 5850 }, { "epoch": 0.2788351732013704, "grad_norm": 4.5, "learning_rate": 8.605824133993149e-06, "loss": 4.2573, "step": 5860 }, { "epoch": 0.27931100114198704, "grad_norm": 4.5625, "learning_rate": 8.603444994290066e-06, "loss": 4.1446, "step": 5870 }, { "epoch": 0.2797868290826037, "grad_norm": 5.875, "learning_rate": 8.601065854586983e-06, "loss": 3.9695, "step": 5880 }, { "epoch": 0.2802626570232204, "grad_norm": 4.375, "learning_rate": 8.598686714883898e-06, "loss": 4.2669, "step": 5890 }, { "epoch": 0.2807384849638371, "grad_norm": 4.4375, "learning_rate": 8.596307575180816e-06, "loss": 4.1147, "step": 5900 }, { "epoch": 0.28121431290445376, "grad_norm": 4.53125, "learning_rate": 8.593928435477732e-06, "loss": 4.1568, "step": 5910 }, { "epoch": 0.28169014084507044, "grad_norm": 4.625, "learning_rate": 8.591549295774648e-06, "loss": 4.4093, "step": 5920 }, { "epoch": 0.2821659687856871, "grad_norm": 4.5625, "learning_rate": 8.589170156071565e-06, "loss": 4.0032, "step": 5930 }, { "epoch": 0.2826417967263038, "grad_norm": 4.71875, "learning_rate": 8.586791016368482e-06, "loss": 3.9237, "step": 5940 }, { "epoch": 0.2831176246669204, "grad_norm": 4.71875, "learning_rate": 8.584411876665398e-06, "loss": 3.9339, "step": 5950 }, { "epoch": 0.2835934526075371, "grad_norm": 4.1875, "learning_rate": 8.582032736962314e-06, "loss": 4.2154, "step": 5960 }, { "epoch": 0.2840692805481538, "grad_norm": 4.4375, "learning_rate": 8.579653597259231e-06, "loss": 3.8988, "step": 5970 }, { "epoch": 0.28454510848877046, "grad_norm": 4.75, "learning_rate": 8.577274457556148e-06, "loss": 3.9965, "step": 5980 }, { "epoch": 0.28502093642938714, "grad_norm": 4.4375, "learning_rate": 8.574895317853065e-06, "loss": 4.0246, "step": 5990 }, { "epoch": 0.2854967643700038, "grad_norm": 4.53125, "learning_rate": 8.572516178149982e-06, "loss": 4.0368, "step": 6000 }, { "epoch": 0.2859725923106205, "grad_norm": 4.46875, "learning_rate": 8.570137038446899e-06, "loss": 4.1306, "step": 6010 }, { "epoch": 0.2864484202512372, "grad_norm": 5.46875, "learning_rate": 8.567757898743814e-06, "loss": 4.2228, "step": 6020 }, { "epoch": 0.2869242481918538, "grad_norm": 4.3125, "learning_rate": 8.565378759040731e-06, "loss": 4.0598, "step": 6030 }, { "epoch": 0.2874000761324705, "grad_norm": 4.5625, "learning_rate": 8.562999619337648e-06, "loss": 4.0766, "step": 6040 }, { "epoch": 0.28787590407308716, "grad_norm": 4.15625, "learning_rate": 8.560620479634565e-06, "loss": 3.9653, "step": 6050 }, { "epoch": 0.28835173201370384, "grad_norm": 4.84375, "learning_rate": 8.558241339931482e-06, "loss": 4.0109, "step": 6060 }, { "epoch": 0.2888275599543205, "grad_norm": 4.25, "learning_rate": 8.555862200228399e-06, "loss": 4.0328, "step": 6070 }, { "epoch": 0.2893033878949372, "grad_norm": 4.40625, "learning_rate": 8.553483060525316e-06, "loss": 4.0755, "step": 6080 }, { "epoch": 0.2897792158355539, "grad_norm": 4.6875, "learning_rate": 8.551103920822231e-06, "loss": 4.0586, "step": 6090 }, { "epoch": 0.29025504377617056, "grad_norm": 4.625, "learning_rate": 8.548724781119148e-06, "loss": 4.1922, "step": 6100 }, { "epoch": 0.2907308717167872, "grad_norm": 4.8125, "learning_rate": 8.546345641416065e-06, "loss": 4.1791, "step": 6110 }, { "epoch": 0.29120669965740387, "grad_norm": 4.46875, "learning_rate": 8.543966501712982e-06, "loss": 4.2365, "step": 6120 }, { "epoch": 0.29168252759802055, "grad_norm": 4.375, "learning_rate": 8.541587362009897e-06, "loss": 4.1174, "step": 6130 }, { "epoch": 0.2921583555386372, "grad_norm": 4.3125, "learning_rate": 8.539208222306814e-06, "loss": 3.9897, "step": 6140 }, { "epoch": 0.2926341834792539, "grad_norm": 4.46875, "learning_rate": 8.53682908260373e-06, "loss": 4.1795, "step": 6150 }, { "epoch": 0.2931100114198706, "grad_norm": 4.90625, "learning_rate": 8.534449942900648e-06, "loss": 3.9994, "step": 6160 }, { "epoch": 0.29358583936048727, "grad_norm": 4.8125, "learning_rate": 8.532070803197565e-06, "loss": 4.1164, "step": 6170 }, { "epoch": 0.29406166730110395, "grad_norm": 4.5625, "learning_rate": 8.529691663494482e-06, "loss": 4.1432, "step": 6180 }, { "epoch": 0.29453749524172057, "grad_norm": 4.8125, "learning_rate": 8.527312523791398e-06, "loss": 4.1384, "step": 6190 }, { "epoch": 0.29501332318233725, "grad_norm": 4.59375, "learning_rate": 8.524933384088314e-06, "loss": 4.0145, "step": 6200 }, { "epoch": 0.29548915112295393, "grad_norm": 4.28125, "learning_rate": 8.52255424438523e-06, "loss": 4.1267, "step": 6210 }, { "epoch": 0.2959649790635706, "grad_norm": 4.3125, "learning_rate": 8.520175104682147e-06, "loss": 4.1031, "step": 6220 }, { "epoch": 0.2964408070041873, "grad_norm": 4.875, "learning_rate": 8.517795964979064e-06, "loss": 4.1252, "step": 6230 }, { "epoch": 0.29691663494480397, "grad_norm": 4.71875, "learning_rate": 8.515416825275981e-06, "loss": 4.0393, "step": 6240 }, { "epoch": 0.29739246288542065, "grad_norm": 4.8125, "learning_rate": 8.513037685572898e-06, "loss": 4.0323, "step": 6250 }, { "epoch": 0.29786829082603733, "grad_norm": 4.15625, "learning_rate": 8.510658545869815e-06, "loss": 4.0521, "step": 6260 }, { "epoch": 0.29834411876665395, "grad_norm": 4.4375, "learning_rate": 8.50827940616673e-06, "loss": 4.157, "step": 6270 }, { "epoch": 0.29881994670727063, "grad_norm": 4.5, "learning_rate": 8.505900266463647e-06, "loss": 3.9163, "step": 6280 }, { "epoch": 0.2992957746478873, "grad_norm": 4.5, "learning_rate": 8.503521126760564e-06, "loss": 4.0811, "step": 6290 }, { "epoch": 0.299771602588504, "grad_norm": 4.5625, "learning_rate": 8.501141987057481e-06, "loss": 4.105, "step": 6300 }, { "epoch": 0.3002474305291207, "grad_norm": 5.125, "learning_rate": 8.498762847354396e-06, "loss": 4.0623, "step": 6310 }, { "epoch": 0.30072325846973735, "grad_norm": 4.09375, "learning_rate": 8.496383707651315e-06, "loss": 4.1383, "step": 6320 }, { "epoch": 0.30119908641035403, "grad_norm": 4.46875, "learning_rate": 8.49400456794823e-06, "loss": 4.0565, "step": 6330 }, { "epoch": 0.3016749143509707, "grad_norm": 4.59375, "learning_rate": 8.491625428245147e-06, "loss": 4.1379, "step": 6340 }, { "epoch": 0.30215074229158734, "grad_norm": 4.75, "learning_rate": 8.489246288542064e-06, "loss": 4.1822, "step": 6350 }, { "epoch": 0.302626570232204, "grad_norm": 4.5625, "learning_rate": 8.486867148838981e-06, "loss": 4.109, "step": 6360 }, { "epoch": 0.3031023981728207, "grad_norm": 4.25, "learning_rate": 8.484488009135898e-06, "loss": 4.073, "step": 6370 }, { "epoch": 0.3035782261134374, "grad_norm": 4.6875, "learning_rate": 8.482108869432813e-06, "loss": 3.9786, "step": 6380 }, { "epoch": 0.30405405405405406, "grad_norm": 4.625, "learning_rate": 8.479729729729732e-06, "loss": 4.0492, "step": 6390 }, { "epoch": 0.30452988199467074, "grad_norm": 4.4375, "learning_rate": 8.477350590026647e-06, "loss": 4.0358, "step": 6400 }, { "epoch": 0.3050057099352874, "grad_norm": 4.8125, "learning_rate": 8.474971450323564e-06, "loss": 4.0075, "step": 6410 }, { "epoch": 0.3054815378759041, "grad_norm": 4.90625, "learning_rate": 8.47259231062048e-06, "loss": 3.8845, "step": 6420 }, { "epoch": 0.3059573658165207, "grad_norm": 4.6875, "learning_rate": 8.470213170917398e-06, "loss": 4.0142, "step": 6430 }, { "epoch": 0.3064331937571374, "grad_norm": 4.71875, "learning_rate": 8.467834031214315e-06, "loss": 4.059, "step": 6440 }, { "epoch": 0.3069090216977541, "grad_norm": 4.625, "learning_rate": 8.46545489151123e-06, "loss": 4.1055, "step": 6450 }, { "epoch": 0.30738484963837076, "grad_norm": 4.09375, "learning_rate": 8.463075751808147e-06, "loss": 4.1565, "step": 6460 }, { "epoch": 0.30786067757898744, "grad_norm": 4.9375, "learning_rate": 8.460696612105064e-06, "loss": 4.0734, "step": 6470 }, { "epoch": 0.3083365055196041, "grad_norm": 4.375, "learning_rate": 8.45831747240198e-06, "loss": 3.9731, "step": 6480 }, { "epoch": 0.3088123334602208, "grad_norm": 4.40625, "learning_rate": 8.455938332698896e-06, "loss": 4.1382, "step": 6490 }, { "epoch": 0.3092881614008375, "grad_norm": 4.75, "learning_rate": 8.453559192995814e-06, "loss": 4.0897, "step": 6500 }, { "epoch": 0.3097639893414541, "grad_norm": 4.625, "learning_rate": 8.45118005329273e-06, "loss": 3.9089, "step": 6510 }, { "epoch": 0.3102398172820708, "grad_norm": 4.65625, "learning_rate": 8.448800913589646e-06, "loss": 4.1107, "step": 6520 }, { "epoch": 0.31071564522268746, "grad_norm": 4.875, "learning_rate": 8.446421773886563e-06, "loss": 3.9056, "step": 6530 }, { "epoch": 0.31119147316330414, "grad_norm": 4.25, "learning_rate": 8.44404263418348e-06, "loss": 4.0372, "step": 6540 }, { "epoch": 0.3116673011039208, "grad_norm": 4.75, "learning_rate": 8.441663494480397e-06, "loss": 4.0732, "step": 6550 }, { "epoch": 0.3121431290445375, "grad_norm": 4.75, "learning_rate": 8.439284354777312e-06, "loss": 4.0018, "step": 6560 }, { "epoch": 0.3126189569851542, "grad_norm": 4.71875, "learning_rate": 8.436905215074231e-06, "loss": 4.0957, "step": 6570 }, { "epoch": 0.31309478492577086, "grad_norm": 4.59375, "learning_rate": 8.434526075371146e-06, "loss": 4.1039, "step": 6580 }, { "epoch": 0.3135706128663875, "grad_norm": 4.53125, "learning_rate": 8.432146935668063e-06, "loss": 4.0916, "step": 6590 }, { "epoch": 0.31404644080700417, "grad_norm": 4.3125, "learning_rate": 8.42976779596498e-06, "loss": 4.0841, "step": 6600 }, { "epoch": 0.31452226874762085, "grad_norm": 4.75, "learning_rate": 8.427388656261897e-06, "loss": 4.0429, "step": 6610 }, { "epoch": 0.3149980966882375, "grad_norm": 4.65625, "learning_rate": 8.425009516558812e-06, "loss": 4.1607, "step": 6620 }, { "epoch": 0.3154739246288542, "grad_norm": 4.1875, "learning_rate": 8.422630376855729e-06, "loss": 3.8124, "step": 6630 }, { "epoch": 0.3159497525694709, "grad_norm": 4.4375, "learning_rate": 8.420251237152646e-06, "loss": 3.9561, "step": 6640 }, { "epoch": 0.31642558051008757, "grad_norm": 4.34375, "learning_rate": 8.417872097449563e-06, "loss": 4.0697, "step": 6650 }, { "epoch": 0.31690140845070425, "grad_norm": 4.375, "learning_rate": 8.41549295774648e-06, "loss": 4.2736, "step": 6660 }, { "epoch": 0.31737723639132087, "grad_norm": 4.71875, "learning_rate": 8.413113818043395e-06, "loss": 4.0979, "step": 6670 }, { "epoch": 0.31785306433193755, "grad_norm": 4.625, "learning_rate": 8.410734678340314e-06, "loss": 4.0976, "step": 6680 }, { "epoch": 0.31832889227255423, "grad_norm": 4.5, "learning_rate": 8.408355538637229e-06, "loss": 3.9868, "step": 6690 }, { "epoch": 0.3188047202131709, "grad_norm": 5.0, "learning_rate": 8.405976398934146e-06, "loss": 4.0956, "step": 6700 }, { "epoch": 0.3192805481537876, "grad_norm": 4.6875, "learning_rate": 8.403597259231063e-06, "loss": 4.0959, "step": 6710 }, { "epoch": 0.31975637609440427, "grad_norm": 4.71875, "learning_rate": 8.40121811952798e-06, "loss": 4.0641, "step": 6720 }, { "epoch": 0.32023220403502095, "grad_norm": 4.28125, "learning_rate": 8.398838979824897e-06, "loss": 4.0665, "step": 6730 }, { "epoch": 0.32070803197563763, "grad_norm": 4.5625, "learning_rate": 8.396459840121812e-06, "loss": 4.0797, "step": 6740 }, { "epoch": 0.32118385991625426, "grad_norm": 4.4375, "learning_rate": 8.39408070041873e-06, "loss": 3.9241, "step": 6750 }, { "epoch": 0.32165968785687094, "grad_norm": 4.21875, "learning_rate": 8.391701560715646e-06, "loss": 4.0011, "step": 6760 }, { "epoch": 0.3221355157974876, "grad_norm": 5.09375, "learning_rate": 8.389322421012563e-06, "loss": 3.9355, "step": 6770 }, { "epoch": 0.3226113437381043, "grad_norm": 4.59375, "learning_rate": 8.38694328130948e-06, "loss": 4.1294, "step": 6780 }, { "epoch": 0.323087171678721, "grad_norm": 4.4375, "learning_rate": 8.384564141606396e-06, "loss": 4.1997, "step": 6790 }, { "epoch": 0.32356299961933765, "grad_norm": 4.75, "learning_rate": 8.382185001903312e-06, "loss": 3.9322, "step": 6800 }, { "epoch": 0.32403882755995433, "grad_norm": 4.75, "learning_rate": 8.379805862200229e-06, "loss": 4.0946, "step": 6810 }, { "epoch": 0.324514655500571, "grad_norm": 4.40625, "learning_rate": 8.377426722497145e-06, "loss": 3.866, "step": 6820 }, { "epoch": 0.3249904834411877, "grad_norm": 4.71875, "learning_rate": 8.375047582794062e-06, "loss": 4.1377, "step": 6830 }, { "epoch": 0.3254663113818043, "grad_norm": 4.90625, "learning_rate": 8.37266844309098e-06, "loss": 4.1182, "step": 6840 }, { "epoch": 0.325942139322421, "grad_norm": 5.0625, "learning_rate": 8.370289303387894e-06, "loss": 4.0385, "step": 6850 }, { "epoch": 0.3264179672630377, "grad_norm": 4.46875, "learning_rate": 8.367910163684813e-06, "loss": 4.1026, "step": 6860 }, { "epoch": 0.32689379520365436, "grad_norm": 4.375, "learning_rate": 8.365531023981728e-06, "loss": 3.9589, "step": 6870 }, { "epoch": 0.32736962314427104, "grad_norm": 4.21875, "learning_rate": 8.363151884278645e-06, "loss": 4.2882, "step": 6880 }, { "epoch": 0.3278454510848877, "grad_norm": 4.21875, "learning_rate": 8.360772744575562e-06, "loss": 3.9769, "step": 6890 }, { "epoch": 0.3283212790255044, "grad_norm": 4.625, "learning_rate": 8.358393604872479e-06, "loss": 4.1959, "step": 6900 }, { "epoch": 0.3287971069661211, "grad_norm": 4.59375, "learning_rate": 8.356014465169396e-06, "loss": 3.9489, "step": 6910 }, { "epoch": 0.3292729349067377, "grad_norm": 4.40625, "learning_rate": 8.353635325466311e-06, "loss": 3.9267, "step": 6920 }, { "epoch": 0.3297487628473544, "grad_norm": 4.46875, "learning_rate": 8.35125618576323e-06, "loss": 3.9775, "step": 6930 }, { "epoch": 0.33022459078797106, "grad_norm": 4.46875, "learning_rate": 8.348877046060145e-06, "loss": 4.1197, "step": 6940 }, { "epoch": 0.33070041872858774, "grad_norm": 4.3125, "learning_rate": 8.346497906357062e-06, "loss": 4.0682, "step": 6950 }, { "epoch": 0.3311762466692044, "grad_norm": 4.6875, "learning_rate": 8.344118766653979e-06, "loss": 4.1532, "step": 6960 }, { "epoch": 0.3316520746098211, "grad_norm": 4.34375, "learning_rate": 8.341739626950896e-06, "loss": 4.0216, "step": 6970 }, { "epoch": 0.3321279025504378, "grad_norm": 4.5625, "learning_rate": 8.339360487247811e-06, "loss": 4.0546, "step": 6980 }, { "epoch": 0.33260373049105446, "grad_norm": 4.375, "learning_rate": 8.336981347544728e-06, "loss": 4.1252, "step": 6990 }, { "epoch": 0.3330795584316711, "grad_norm": 4.5625, "learning_rate": 8.334602207841645e-06, "loss": 4.141, "step": 7000 }, { "epoch": 0.33355538637228777, "grad_norm": 4.8125, "learning_rate": 8.332223068138562e-06, "loss": 4.1397, "step": 7010 }, { "epoch": 0.33403121431290445, "grad_norm": 4.71875, "learning_rate": 8.329843928435479e-06, "loss": 4.0746, "step": 7020 }, { "epoch": 0.3345070422535211, "grad_norm": 4.3125, "learning_rate": 8.327464788732394e-06, "loss": 4.0482, "step": 7030 }, { "epoch": 0.3349828701941378, "grad_norm": 4.5625, "learning_rate": 8.325085649029312e-06, "loss": 3.9917, "step": 7040 }, { "epoch": 0.3354586981347545, "grad_norm": 4.1875, "learning_rate": 8.322706509326228e-06, "loss": 4.0372, "step": 7050 }, { "epoch": 0.33593452607537116, "grad_norm": 4.96875, "learning_rate": 8.320327369623145e-06, "loss": 4.1584, "step": 7060 }, { "epoch": 0.33641035401598784, "grad_norm": 4.375, "learning_rate": 8.317948229920062e-06, "loss": 3.9167, "step": 7070 }, { "epoch": 0.33688618195660447, "grad_norm": 4.3125, "learning_rate": 8.315569090216978e-06, "loss": 4.0369, "step": 7080 }, { "epoch": 0.33736200989722115, "grad_norm": 4.15625, "learning_rate": 8.313189950513895e-06, "loss": 4.1303, "step": 7090 }, { "epoch": 0.33783783783783783, "grad_norm": 4.4375, "learning_rate": 8.31081081081081e-06, "loss": 4.029, "step": 7100 }, { "epoch": 0.3383136657784545, "grad_norm": 4.8125, "learning_rate": 8.30843167110773e-06, "loss": 4.1146, "step": 7110 }, { "epoch": 0.3387894937190712, "grad_norm": 5.0, "learning_rate": 8.306052531404644e-06, "loss": 4.0401, "step": 7120 }, { "epoch": 0.33926532165968787, "grad_norm": 4.34375, "learning_rate": 8.303673391701561e-06, "loss": 4.0997, "step": 7130 }, { "epoch": 0.33974114960030455, "grad_norm": 4.03125, "learning_rate": 8.301294251998478e-06, "loss": 4.1168, "step": 7140 }, { "epoch": 0.34021697754092123, "grad_norm": 4.625, "learning_rate": 8.298915112295395e-06, "loss": 3.9334, "step": 7150 }, { "epoch": 0.34069280548153785, "grad_norm": 4.5625, "learning_rate": 8.29653597259231e-06, "loss": 3.9863, "step": 7160 }, { "epoch": 0.34116863342215453, "grad_norm": 4.78125, "learning_rate": 8.294156832889227e-06, "loss": 4.0166, "step": 7170 }, { "epoch": 0.3416444613627712, "grad_norm": 4.75, "learning_rate": 8.291777693186144e-06, "loss": 3.9568, "step": 7180 }, { "epoch": 0.3421202893033879, "grad_norm": 4.4375, "learning_rate": 8.289398553483061e-06, "loss": 4.0727, "step": 7190 }, { "epoch": 0.34259611724400457, "grad_norm": 4.53125, "learning_rate": 8.287019413779978e-06, "loss": 3.985, "step": 7200 }, { "epoch": 0.34307194518462125, "grad_norm": 4.625, "learning_rate": 8.284640274076895e-06, "loss": 4.1523, "step": 7210 }, { "epoch": 0.34354777312523793, "grad_norm": 4.375, "learning_rate": 8.282261134373812e-06, "loss": 3.9931, "step": 7220 }, { "epoch": 0.3440236010658546, "grad_norm": 4.75, "learning_rate": 8.279881994670727e-06, "loss": 4.0843, "step": 7230 }, { "epoch": 0.34449942900647124, "grad_norm": 4.59375, "learning_rate": 8.277502854967644e-06, "loss": 4.0668, "step": 7240 }, { "epoch": 0.3449752569470879, "grad_norm": 4.5, "learning_rate": 8.275123715264561e-06, "loss": 4.141, "step": 7250 }, { "epoch": 0.3454510848877046, "grad_norm": 4.71875, "learning_rate": 8.272744575561478e-06, "loss": 4.0883, "step": 7260 }, { "epoch": 0.3459269128283213, "grad_norm": 4.53125, "learning_rate": 8.270365435858395e-06, "loss": 4.1547, "step": 7270 }, { "epoch": 0.34640274076893796, "grad_norm": 4.65625, "learning_rate": 8.267986296155312e-06, "loss": 4.0569, "step": 7280 }, { "epoch": 0.34687856870955464, "grad_norm": 4.34375, "learning_rate": 8.265607156452229e-06, "loss": 4.1292, "step": 7290 }, { "epoch": 0.3473543966501713, "grad_norm": 4.4375, "learning_rate": 8.263228016749144e-06, "loss": 4.0743, "step": 7300 }, { "epoch": 0.347830224590788, "grad_norm": 4.65625, "learning_rate": 8.26084887704606e-06, "loss": 3.9363, "step": 7310 }, { "epoch": 0.3483060525314046, "grad_norm": 4.46875, "learning_rate": 8.258469737342978e-06, "loss": 3.9454, "step": 7320 }, { "epoch": 0.3487818804720213, "grad_norm": 4.625, "learning_rate": 8.256090597639895e-06, "loss": 3.9581, "step": 7330 }, { "epoch": 0.349257708412638, "grad_norm": 4.90625, "learning_rate": 8.25371145793681e-06, "loss": 4.0928, "step": 7340 }, { "epoch": 0.34973353635325466, "grad_norm": 4.53125, "learning_rate": 8.251332318233727e-06, "loss": 3.9903, "step": 7350 }, { "epoch": 0.35020936429387134, "grad_norm": 5.0, "learning_rate": 8.248953178530644e-06, "loss": 4.1331, "step": 7360 }, { "epoch": 0.350685192234488, "grad_norm": 4.28125, "learning_rate": 8.24657403882756e-06, "loss": 3.9584, "step": 7370 }, { "epoch": 0.3511610201751047, "grad_norm": 6.21875, "learning_rate": 8.244194899124477e-06, "loss": 4.104, "step": 7380 }, { "epoch": 0.3516368481157214, "grad_norm": 4.5, "learning_rate": 8.241815759421394e-06, "loss": 4.0209, "step": 7390 }, { "epoch": 0.352112676056338, "grad_norm": 4.6875, "learning_rate": 8.239436619718311e-06, "loss": 4.0969, "step": 7400 }, { "epoch": 0.3525885039969547, "grad_norm": 4.1875, "learning_rate": 8.237057480015226e-06, "loss": 4.0389, "step": 7410 }, { "epoch": 0.35306433193757136, "grad_norm": 4.875, "learning_rate": 8.234678340312143e-06, "loss": 4.0889, "step": 7420 }, { "epoch": 0.35354015987818804, "grad_norm": 4.53125, "learning_rate": 8.23229920060906e-06, "loss": 4.0457, "step": 7430 }, { "epoch": 0.3540159878188047, "grad_norm": 4.875, "learning_rate": 8.229920060905977e-06, "loss": 4.079, "step": 7440 }, { "epoch": 0.3544918157594214, "grad_norm": 4.375, "learning_rate": 8.227540921202894e-06, "loss": 4.0067, "step": 7450 }, { "epoch": 0.3549676437000381, "grad_norm": 4.90625, "learning_rate": 8.225161781499811e-06, "loss": 3.9536, "step": 7460 }, { "epoch": 0.35544347164065476, "grad_norm": 4.5, "learning_rate": 8.222782641796728e-06, "loss": 4.0844, "step": 7470 }, { "epoch": 0.3559192995812714, "grad_norm": 4.5625, "learning_rate": 8.220403502093643e-06, "loss": 4.1825, "step": 7480 }, { "epoch": 0.35639512752188807, "grad_norm": 4.46875, "learning_rate": 8.21802436239056e-06, "loss": 4.0353, "step": 7490 }, { "epoch": 0.35687095546250475, "grad_norm": 4.21875, "learning_rate": 8.215645222687477e-06, "loss": 3.9066, "step": 7500 }, { "epoch": 0.3573467834031214, "grad_norm": 4.5625, "learning_rate": 8.213266082984394e-06, "loss": 4.072, "step": 7510 }, { "epoch": 0.3578226113437381, "grad_norm": 4.625, "learning_rate": 8.21088694328131e-06, "loss": 3.8571, "step": 7520 }, { "epoch": 0.3582984392843548, "grad_norm": 4.71875, "learning_rate": 8.208507803578228e-06, "loss": 4.07, "step": 7530 }, { "epoch": 0.35877426722497147, "grad_norm": 4.59375, "learning_rate": 8.206128663875143e-06, "loss": 4.1133, "step": 7540 }, { "epoch": 0.35925009516558815, "grad_norm": 4.875, "learning_rate": 8.20374952417206e-06, "loss": 3.8898, "step": 7550 }, { "epoch": 0.35972592310620477, "grad_norm": 4.65625, "learning_rate": 8.201370384468977e-06, "loss": 4.0222, "step": 7560 }, { "epoch": 0.36020175104682145, "grad_norm": 4.46875, "learning_rate": 8.198991244765894e-06, "loss": 3.8332, "step": 7570 }, { "epoch": 0.36067757898743813, "grad_norm": 4.71875, "learning_rate": 8.19661210506281e-06, "loss": 4.141, "step": 7580 }, { "epoch": 0.3611534069280548, "grad_norm": 4.625, "learning_rate": 8.194232965359726e-06, "loss": 4.2563, "step": 7590 }, { "epoch": 0.3616292348686715, "grad_norm": 4.15625, "learning_rate": 8.191853825656644e-06, "loss": 4.0097, "step": 7600 }, { "epoch": 0.36210506280928817, "grad_norm": 4.5625, "learning_rate": 8.18947468595356e-06, "loss": 4.1175, "step": 7610 }, { "epoch": 0.36258089074990485, "grad_norm": 4.5625, "learning_rate": 8.187095546250477e-06, "loss": 4.127, "step": 7620 }, { "epoch": 0.36305671869052153, "grad_norm": 4.59375, "learning_rate": 8.184716406547394e-06, "loss": 4.2144, "step": 7630 }, { "epoch": 0.36353254663113815, "grad_norm": 4.78125, "learning_rate": 8.18233726684431e-06, "loss": 4.1498, "step": 7640 }, { "epoch": 0.36400837457175483, "grad_norm": 4.78125, "learning_rate": 8.179958127141226e-06, "loss": 4.0922, "step": 7650 }, { "epoch": 0.3644842025123715, "grad_norm": 4.1875, "learning_rate": 8.177578987438143e-06, "loss": 3.9884, "step": 7660 }, { "epoch": 0.3649600304529882, "grad_norm": 4.34375, "learning_rate": 8.17519984773506e-06, "loss": 4.0436, "step": 7670 }, { "epoch": 0.3654358583936049, "grad_norm": 4.84375, "learning_rate": 8.172820708031976e-06, "loss": 4.0218, "step": 7680 }, { "epoch": 0.36591168633422155, "grad_norm": 5.1875, "learning_rate": 8.170441568328893e-06, "loss": 3.979, "step": 7690 }, { "epoch": 0.36638751427483823, "grad_norm": 4.46875, "learning_rate": 8.168062428625809e-06, "loss": 4.0663, "step": 7700 }, { "epoch": 0.3668633422154549, "grad_norm": 4.3125, "learning_rate": 8.165683288922727e-06, "loss": 4.002, "step": 7710 }, { "epoch": 0.36733917015607154, "grad_norm": 4.34375, "learning_rate": 8.163304149219642e-06, "loss": 4.1015, "step": 7720 }, { "epoch": 0.3678149980966882, "grad_norm": 4.3125, "learning_rate": 8.16092500951656e-06, "loss": 3.9887, "step": 7730 }, { "epoch": 0.3682908260373049, "grad_norm": 4.59375, "learning_rate": 8.158545869813476e-06, "loss": 4.0764, "step": 7740 }, { "epoch": 0.3687666539779216, "grad_norm": 4.46875, "learning_rate": 8.156166730110393e-06, "loss": 4.0404, "step": 7750 }, { "epoch": 0.36924248191853826, "grad_norm": 4.28125, "learning_rate": 8.15378759040731e-06, "loss": 4.1453, "step": 7760 }, { "epoch": 0.36971830985915494, "grad_norm": 4.5, "learning_rate": 8.151408450704225e-06, "loss": 4.0953, "step": 7770 }, { "epoch": 0.3701941377997716, "grad_norm": 4.78125, "learning_rate": 8.149029311001144e-06, "loss": 4.123, "step": 7780 }, { "epoch": 0.3706699657403883, "grad_norm": 4.40625, "learning_rate": 8.146650171298059e-06, "loss": 4.1948, "step": 7790 }, { "epoch": 0.371145793681005, "grad_norm": 4.5625, "learning_rate": 8.144271031594976e-06, "loss": 4.1035, "step": 7800 }, { "epoch": 0.3716216216216216, "grad_norm": 4.53125, "learning_rate": 8.141891891891893e-06, "loss": 4.1463, "step": 7810 }, { "epoch": 0.3720974495622383, "grad_norm": 4.78125, "learning_rate": 8.13951275218881e-06, "loss": 4.0655, "step": 7820 }, { "epoch": 0.37257327750285496, "grad_norm": 4.65625, "learning_rate": 8.137133612485725e-06, "loss": 4.0313, "step": 7830 }, { "epoch": 0.37304910544347164, "grad_norm": 4.34375, "learning_rate": 8.134754472782642e-06, "loss": 4.0067, "step": 7840 }, { "epoch": 0.3735249333840883, "grad_norm": 4.46875, "learning_rate": 8.132375333079559e-06, "loss": 4.1338, "step": 7850 }, { "epoch": 0.374000761324705, "grad_norm": 4.71875, "learning_rate": 8.129996193376476e-06, "loss": 3.9961, "step": 7860 }, { "epoch": 0.3744765892653217, "grad_norm": 4.53125, "learning_rate": 8.127617053673393e-06, "loss": 3.9175, "step": 7870 }, { "epoch": 0.37495241720593836, "grad_norm": 4.4375, "learning_rate": 8.125237913970308e-06, "loss": 4.089, "step": 7880 }, { "epoch": 0.375428245146555, "grad_norm": 4.5, "learning_rate": 8.122858774267227e-06, "loss": 4.1382, "step": 7890 }, { "epoch": 0.37590407308717166, "grad_norm": 4.5, "learning_rate": 8.120479634564142e-06, "loss": 3.9129, "step": 7900 }, { "epoch": 0.37637990102778834, "grad_norm": 4.375, "learning_rate": 8.118100494861059e-06, "loss": 4.1006, "step": 7910 }, { "epoch": 0.376855728968405, "grad_norm": 4.21875, "learning_rate": 8.115721355157976e-06, "loss": 4.0742, "step": 7920 }, { "epoch": 0.3773315569090217, "grad_norm": 4.40625, "learning_rate": 8.113342215454893e-06, "loss": 4.1029, "step": 7930 }, { "epoch": 0.3778073848496384, "grad_norm": 4.5, "learning_rate": 8.11096307575181e-06, "loss": 4.0353, "step": 7940 }, { "epoch": 0.37828321279025506, "grad_norm": 4.53125, "learning_rate": 8.108583936048725e-06, "loss": 4.123, "step": 7950 }, { "epoch": 0.37875904073087174, "grad_norm": 4.875, "learning_rate": 8.106204796345643e-06, "loss": 3.9719, "step": 7960 }, { "epoch": 0.37923486867148837, "grad_norm": 4.6875, "learning_rate": 8.103825656642558e-06, "loss": 4.1216, "step": 7970 }, { "epoch": 0.37971069661210505, "grad_norm": 4.6875, "learning_rate": 8.101446516939475e-06, "loss": 3.9455, "step": 7980 }, { "epoch": 0.3801865245527217, "grad_norm": 4.34375, "learning_rate": 8.099067377236392e-06, "loss": 4.0831, "step": 7990 }, { "epoch": 0.3806623524933384, "grad_norm": 4.375, "learning_rate": 8.09668823753331e-06, "loss": 3.9635, "step": 8000 }, { "epoch": 0.3811381804339551, "grad_norm": 4.78125, "learning_rate": 8.094309097830224e-06, "loss": 4.2112, "step": 8010 }, { "epoch": 0.38161400837457177, "grad_norm": 4.5625, "learning_rate": 8.091929958127141e-06, "loss": 4.1111, "step": 8020 }, { "epoch": 0.38208983631518845, "grad_norm": 4.59375, "learning_rate": 8.089550818424058e-06, "loss": 4.0415, "step": 8030 }, { "epoch": 0.3825656642558051, "grad_norm": 4.46875, "learning_rate": 8.087171678720975e-06, "loss": 4.0799, "step": 8040 }, { "epoch": 0.38304149219642175, "grad_norm": 4.8125, "learning_rate": 8.084792539017892e-06, "loss": 3.9689, "step": 8050 }, { "epoch": 0.38351732013703843, "grad_norm": 5.28125, "learning_rate": 8.082413399314807e-06, "loss": 4.0447, "step": 8060 }, { "epoch": 0.3839931480776551, "grad_norm": 4.46875, "learning_rate": 8.080034259611726e-06, "loss": 4.0374, "step": 8070 }, { "epoch": 0.3844689760182718, "grad_norm": 4.5625, "learning_rate": 8.077655119908641e-06, "loss": 3.9036, "step": 8080 }, { "epoch": 0.38494480395888847, "grad_norm": 4.6875, "learning_rate": 8.075275980205558e-06, "loss": 3.971, "step": 8090 }, { "epoch": 0.38542063189950515, "grad_norm": 5.0, "learning_rate": 8.072896840502475e-06, "loss": 4.1058, "step": 8100 }, { "epoch": 0.38589645984012183, "grad_norm": 4.75, "learning_rate": 8.070517700799392e-06, "loss": 4.1088, "step": 8110 }, { "epoch": 0.3863722877807385, "grad_norm": 4.65625, "learning_rate": 8.068138561096309e-06, "loss": 4.0677, "step": 8120 }, { "epoch": 0.38684811572135513, "grad_norm": 4.78125, "learning_rate": 8.065759421393224e-06, "loss": 4.0771, "step": 8130 }, { "epoch": 0.3873239436619718, "grad_norm": 4.21875, "learning_rate": 8.063380281690143e-06, "loss": 3.9418, "step": 8140 }, { "epoch": 0.3877997716025885, "grad_norm": 4.59375, "learning_rate": 8.061001141987058e-06, "loss": 4.2185, "step": 8150 }, { "epoch": 0.3882755995432052, "grad_norm": 4.90625, "learning_rate": 8.058622002283975e-06, "loss": 4.1158, "step": 8160 }, { "epoch": 0.38875142748382185, "grad_norm": 4.65625, "learning_rate": 8.056242862580892e-06, "loss": 4.0029, "step": 8170 }, { "epoch": 0.38922725542443853, "grad_norm": 4.71875, "learning_rate": 8.053863722877809e-06, "loss": 4.0373, "step": 8180 }, { "epoch": 0.3897030833650552, "grad_norm": 4.625, "learning_rate": 8.051484583174724e-06, "loss": 4.047, "step": 8190 }, { "epoch": 0.3901789113056719, "grad_norm": 4.65625, "learning_rate": 8.04910544347164e-06, "loss": 3.9548, "step": 8200 }, { "epoch": 0.3906547392462885, "grad_norm": 4.46875, "learning_rate": 8.046726303768558e-06, "loss": 4.146, "step": 8210 }, { "epoch": 0.3911305671869052, "grad_norm": 4.65625, "learning_rate": 8.044347164065475e-06, "loss": 3.9495, "step": 8220 }, { "epoch": 0.3916063951275219, "grad_norm": 5.96875, "learning_rate": 8.041968024362391e-06, "loss": 4.064, "step": 8230 }, { "epoch": 0.39208222306813856, "grad_norm": 4.59375, "learning_rate": 8.039588884659307e-06, "loss": 4.1755, "step": 8240 }, { "epoch": 0.39255805100875524, "grad_norm": 4.8125, "learning_rate": 8.037209744956225e-06, "loss": 4.1421, "step": 8250 }, { "epoch": 0.3930338789493719, "grad_norm": 4.375, "learning_rate": 8.03483060525314e-06, "loss": 3.984, "step": 8260 }, { "epoch": 0.3935097068899886, "grad_norm": 4.625, "learning_rate": 8.032451465550057e-06, "loss": 4.0727, "step": 8270 }, { "epoch": 0.3939855348306053, "grad_norm": 4.4375, "learning_rate": 8.030072325846974e-06, "loss": 4.0066, "step": 8280 }, { "epoch": 0.3944613627712219, "grad_norm": 4.09375, "learning_rate": 8.027693186143891e-06, "loss": 4.0276, "step": 8290 }, { "epoch": 0.3949371907118386, "grad_norm": 4.59375, "learning_rate": 8.025314046440808e-06, "loss": 4.2555, "step": 8300 }, { "epoch": 0.39541301865245526, "grad_norm": 4.875, "learning_rate": 8.022934906737723e-06, "loss": 4.0677, "step": 8310 }, { "epoch": 0.39588884659307194, "grad_norm": 4.90625, "learning_rate": 8.020555767034642e-06, "loss": 3.9935, "step": 8320 }, { "epoch": 0.3963646745336886, "grad_norm": 4.59375, "learning_rate": 8.018176627331557e-06, "loss": 4.1275, "step": 8330 }, { "epoch": 0.3968405024743053, "grad_norm": 4.34375, "learning_rate": 8.015797487628474e-06, "loss": 4.0598, "step": 8340 }, { "epoch": 0.397316330414922, "grad_norm": 4.5625, "learning_rate": 8.013418347925391e-06, "loss": 4.1323, "step": 8350 }, { "epoch": 0.39779215835553866, "grad_norm": 4.40625, "learning_rate": 8.011039208222308e-06, "loss": 4.0345, "step": 8360 }, { "epoch": 0.3982679862961553, "grad_norm": 5.1875, "learning_rate": 8.008660068519223e-06, "loss": 3.9533, "step": 8370 }, { "epoch": 0.39874381423677197, "grad_norm": 4.625, "learning_rate": 8.00628092881614e-06, "loss": 4.0228, "step": 8380 }, { "epoch": 0.39921964217738864, "grad_norm": 4.5, "learning_rate": 8.003901789113057e-06, "loss": 4.1696, "step": 8390 }, { "epoch": 0.3996954701180053, "grad_norm": 5.0, "learning_rate": 8.001522649409974e-06, "loss": 4.0009, "step": 8400 }, { "epoch": 0.400171298058622, "grad_norm": 4.75, "learning_rate": 7.999143509706891e-06, "loss": 4.1707, "step": 8410 }, { "epoch": 0.4006471259992387, "grad_norm": 4.28125, "learning_rate": 7.996764370003808e-06, "loss": 4.026, "step": 8420 }, { "epoch": 0.40112295393985536, "grad_norm": 5.21875, "learning_rate": 7.994385230300725e-06, "loss": 4.0771, "step": 8430 }, { "epoch": 0.40159878188047204, "grad_norm": 4.90625, "learning_rate": 7.99200609059764e-06, "loss": 4.0767, "step": 8440 }, { "epoch": 0.40207460982108867, "grad_norm": 4.4375, "learning_rate": 7.989626950894557e-06, "loss": 3.84, "step": 8450 }, { "epoch": 0.40255043776170535, "grad_norm": 4.75, "learning_rate": 7.987247811191474e-06, "loss": 3.9286, "step": 8460 }, { "epoch": 0.40302626570232203, "grad_norm": 4.5, "learning_rate": 7.98486867148839e-06, "loss": 3.8393, "step": 8470 }, { "epoch": 0.4035020936429387, "grad_norm": 4.46875, "learning_rate": 7.982489531785308e-06, "loss": 4.0338, "step": 8480 }, { "epoch": 0.4039779215835554, "grad_norm": 4.875, "learning_rate": 7.980110392082225e-06, "loss": 4.2028, "step": 8490 }, { "epoch": 0.40445374952417207, "grad_norm": 4.84375, "learning_rate": 7.977731252379141e-06, "loss": 4.0863, "step": 8500 }, { "epoch": 0.40492957746478875, "grad_norm": 4.4375, "learning_rate": 7.975352112676057e-06, "loss": 4.1193, "step": 8510 }, { "epoch": 0.40540540540540543, "grad_norm": 4.90625, "learning_rate": 7.972972972972974e-06, "loss": 4.0864, "step": 8520 }, { "epoch": 0.40588123334602205, "grad_norm": 4.65625, "learning_rate": 7.97059383326989e-06, "loss": 4.1448, "step": 8530 }, { "epoch": 0.40635706128663873, "grad_norm": 4.59375, "learning_rate": 7.968214693566807e-06, "loss": 4.0682, "step": 8540 }, { "epoch": 0.4068328892272554, "grad_norm": 4.59375, "learning_rate": 7.965835553863723e-06, "loss": 4.0731, "step": 8550 }, { "epoch": 0.4073087171678721, "grad_norm": 4.6875, "learning_rate": 7.96345641416064e-06, "loss": 4.1422, "step": 8560 }, { "epoch": 0.40778454510848877, "grad_norm": 4.5625, "learning_rate": 7.961077274457556e-06, "loss": 4.1015, "step": 8570 }, { "epoch": 0.40826037304910545, "grad_norm": 4.75, "learning_rate": 7.958698134754473e-06, "loss": 4.1523, "step": 8580 }, { "epoch": 0.40873620098972213, "grad_norm": 4.9375, "learning_rate": 7.95631899505139e-06, "loss": 4.2264, "step": 8590 }, { "epoch": 0.4092120289303388, "grad_norm": 4.46875, "learning_rate": 7.953939855348307e-06, "loss": 4.0196, "step": 8600 }, { "epoch": 0.40968785687095544, "grad_norm": 5.09375, "learning_rate": 7.951560715645224e-06, "loss": 3.8756, "step": 8610 }, { "epoch": 0.4101636848115721, "grad_norm": 4.96875, "learning_rate": 7.94918157594214e-06, "loss": 4.0834, "step": 8620 }, { "epoch": 0.4106395127521888, "grad_norm": 4.5625, "learning_rate": 7.946802436239056e-06, "loss": 3.9515, "step": 8630 }, { "epoch": 0.4111153406928055, "grad_norm": 7.03125, "learning_rate": 7.944423296535973e-06, "loss": 4.0364, "step": 8640 }, { "epoch": 0.41159116863342216, "grad_norm": 4.6875, "learning_rate": 7.94204415683289e-06, "loss": 4.0389, "step": 8650 }, { "epoch": 0.41206699657403884, "grad_norm": 4.65625, "learning_rate": 7.939665017129807e-06, "loss": 4.1478, "step": 8660 }, { "epoch": 0.4125428245146555, "grad_norm": 4.625, "learning_rate": 7.937285877426724e-06, "loss": 4.0136, "step": 8670 }, { "epoch": 0.4130186524552722, "grad_norm": 4.96875, "learning_rate": 7.934906737723639e-06, "loss": 4.012, "step": 8680 }, { "epoch": 0.4134944803958888, "grad_norm": 4.71875, "learning_rate": 7.932527598020556e-06, "loss": 4.0531, "step": 8690 }, { "epoch": 0.4139703083365055, "grad_norm": 4.75, "learning_rate": 7.930148458317473e-06, "loss": 3.8953, "step": 8700 }, { "epoch": 0.4144461362771222, "grad_norm": 4.9375, "learning_rate": 7.92776931861439e-06, "loss": 4.1939, "step": 8710 }, { "epoch": 0.41492196421773886, "grad_norm": 4.625, "learning_rate": 7.925390178911307e-06, "loss": 3.9285, "step": 8720 }, { "epoch": 0.41539779215835554, "grad_norm": 4.625, "learning_rate": 7.923011039208222e-06, "loss": 4.2629, "step": 8730 }, { "epoch": 0.4158736200989722, "grad_norm": 4.53125, "learning_rate": 7.92063189950514e-06, "loss": 4.0599, "step": 8740 }, { "epoch": 0.4163494480395889, "grad_norm": 4.8125, "learning_rate": 7.918252759802056e-06, "loss": 3.9922, "step": 8750 }, { "epoch": 0.4168252759802056, "grad_norm": 4.84375, "learning_rate": 7.915873620098973e-06, "loss": 4.0063, "step": 8760 }, { "epoch": 0.41730110392082226, "grad_norm": 4.8125, "learning_rate": 7.91349448039589e-06, "loss": 4.0765, "step": 8770 }, { "epoch": 0.4177769318614389, "grad_norm": 4.65625, "learning_rate": 7.911115340692807e-06, "loss": 4.0268, "step": 8780 }, { "epoch": 0.41825275980205556, "grad_norm": 4.71875, "learning_rate": 7.908736200989723e-06, "loss": 4.1132, "step": 8790 }, { "epoch": 0.41872858774267224, "grad_norm": 4.5, "learning_rate": 7.906357061286639e-06, "loss": 4.1542, "step": 8800 }, { "epoch": 0.4192044156832889, "grad_norm": 4.5, "learning_rate": 7.903977921583556e-06, "loss": 3.8899, "step": 8810 }, { "epoch": 0.4196802436239056, "grad_norm": 4.5625, "learning_rate": 7.901598781880473e-06, "loss": 4.0266, "step": 8820 }, { "epoch": 0.4201560715645223, "grad_norm": 4.65625, "learning_rate": 7.89921964217739e-06, "loss": 4.0735, "step": 8830 }, { "epoch": 0.42063189950513896, "grad_norm": 5.0625, "learning_rate": 7.896840502474306e-06, "loss": 4.2432, "step": 8840 }, { "epoch": 0.42110772744575564, "grad_norm": 4.90625, "learning_rate": 7.894461362771223e-06, "loss": 4.0563, "step": 8850 }, { "epoch": 0.42158355538637227, "grad_norm": 4.84375, "learning_rate": 7.892082223068139e-06, "loss": 3.9096, "step": 8860 }, { "epoch": 0.42205938332698895, "grad_norm": 5.0, "learning_rate": 7.889703083365055e-06, "loss": 3.9045, "step": 8870 }, { "epoch": 0.4225352112676056, "grad_norm": 4.90625, "learning_rate": 7.887323943661972e-06, "loss": 4.1233, "step": 8880 }, { "epoch": 0.4230110392082223, "grad_norm": 4.65625, "learning_rate": 7.88494480395889e-06, "loss": 4.0719, "step": 8890 }, { "epoch": 0.423486867148839, "grad_norm": 4.8125, "learning_rate": 7.882565664255806e-06, "loss": 4.0798, "step": 8900 }, { "epoch": 0.42396269508945567, "grad_norm": 4.9375, "learning_rate": 7.880186524552721e-06, "loss": 4.1731, "step": 8910 }, { "epoch": 0.42443852303007235, "grad_norm": 4.53125, "learning_rate": 7.87780738484964e-06, "loss": 4.0267, "step": 8920 }, { "epoch": 0.424914350970689, "grad_norm": 4.53125, "learning_rate": 7.875428245146555e-06, "loss": 4.0209, "step": 8930 }, { "epoch": 0.42539017891130565, "grad_norm": 4.96875, "learning_rate": 7.873049105443472e-06, "loss": 3.9994, "step": 8940 }, { "epoch": 0.42586600685192233, "grad_norm": 4.6875, "learning_rate": 7.870669965740389e-06, "loss": 4.1071, "step": 8950 }, { "epoch": 0.426341834792539, "grad_norm": 4.53125, "learning_rate": 7.868290826037306e-06, "loss": 4.1658, "step": 8960 }, { "epoch": 0.4268176627331557, "grad_norm": 4.40625, "learning_rate": 7.865911686334223e-06, "loss": 4.0561, "step": 8970 }, { "epoch": 0.42729349067377237, "grad_norm": 4.53125, "learning_rate": 7.863532546631138e-06, "loss": 3.9119, "step": 8980 }, { "epoch": 0.42776931861438905, "grad_norm": 4.3125, "learning_rate": 7.861153406928057e-06, "loss": 4.0949, "step": 8990 }, { "epoch": 0.42824514655500573, "grad_norm": 4.46875, "learning_rate": 7.858774267224972e-06, "loss": 3.8909, "step": 9000 }, { "epoch": 0.4287209744956224, "grad_norm": 5.0625, "learning_rate": 7.856395127521889e-06, "loss": 4.0228, "step": 9010 }, { "epoch": 0.42919680243623903, "grad_norm": 4.59375, "learning_rate": 7.854015987818806e-06, "loss": 4.1163, "step": 9020 }, { "epoch": 0.4296726303768557, "grad_norm": 4.84375, "learning_rate": 7.851636848115723e-06, "loss": 4.1152, "step": 9030 }, { "epoch": 0.4301484583174724, "grad_norm": 4.6875, "learning_rate": 7.849257708412638e-06, "loss": 4.0903, "step": 9040 }, { "epoch": 0.4306242862580891, "grad_norm": 5.0, "learning_rate": 7.846878568709555e-06, "loss": 4.131, "step": 9050 }, { "epoch": 0.43110011419870575, "grad_norm": 4.53125, "learning_rate": 7.844499429006472e-06, "loss": 3.8553, "step": 9060 }, { "epoch": 0.43157594213932243, "grad_norm": 4.71875, "learning_rate": 7.842120289303389e-06, "loss": 3.997, "step": 9070 }, { "epoch": 0.4320517700799391, "grad_norm": 4.71875, "learning_rate": 7.839741149600306e-06, "loss": 4.0341, "step": 9080 }, { "epoch": 0.4325275980205558, "grad_norm": 4.5625, "learning_rate": 7.83736200989722e-06, "loss": 4.1243, "step": 9090 }, { "epoch": 0.4330034259611724, "grad_norm": 5.0, "learning_rate": 7.83498287019414e-06, "loss": 4.029, "step": 9100 }, { "epoch": 0.4334792539017891, "grad_norm": 4.3125, "learning_rate": 7.832603730491055e-06, "loss": 4.2541, "step": 9110 }, { "epoch": 0.4339550818424058, "grad_norm": 4.6875, "learning_rate": 7.830224590787972e-06, "loss": 4.0478, "step": 9120 }, { "epoch": 0.43443090978302246, "grad_norm": 4.8125, "learning_rate": 7.827845451084888e-06, "loss": 4.0643, "step": 9130 }, { "epoch": 0.43490673772363914, "grad_norm": 4.59375, "learning_rate": 7.825466311381805e-06, "loss": 3.9756, "step": 9140 }, { "epoch": 0.4353825656642558, "grad_norm": 5.125, "learning_rate": 7.823087171678722e-06, "loss": 4.0083, "step": 9150 }, { "epoch": 0.4358583936048725, "grad_norm": 4.25, "learning_rate": 7.820708031975637e-06, "loss": 3.9386, "step": 9160 }, { "epoch": 0.4363342215454892, "grad_norm": 4.625, "learning_rate": 7.818328892272556e-06, "loss": 4.09, "step": 9170 }, { "epoch": 0.4368100494861058, "grad_norm": 5.03125, "learning_rate": 7.815949752569471e-06, "loss": 4.1253, "step": 9180 }, { "epoch": 0.4372858774267225, "grad_norm": 4.9375, "learning_rate": 7.813570612866388e-06, "loss": 4.0921, "step": 9190 }, { "epoch": 0.43776170536733916, "grad_norm": 4.5, "learning_rate": 7.811191473163303e-06, "loss": 4.0545, "step": 9200 }, { "epoch": 0.43823753330795584, "grad_norm": 4.625, "learning_rate": 7.808812333460222e-06, "loss": 4.1686, "step": 9210 }, { "epoch": 0.4387133612485725, "grad_norm": 4.71875, "learning_rate": 7.806433193757137e-06, "loss": 3.988, "step": 9220 }, { "epoch": 0.4391891891891892, "grad_norm": 14.375, "learning_rate": 7.804054054054054e-06, "loss": 4.1903, "step": 9230 }, { "epoch": 0.4396650171298059, "grad_norm": 4.5, "learning_rate": 7.801674914350971e-06, "loss": 4.2268, "step": 9240 }, { "epoch": 0.44014084507042256, "grad_norm": 4.625, "learning_rate": 7.799295774647888e-06, "loss": 4.023, "step": 9250 }, { "epoch": 0.4406166730110392, "grad_norm": 4.65625, "learning_rate": 7.796916634944805e-06, "loss": 4.1673, "step": 9260 }, { "epoch": 0.44109250095165586, "grad_norm": 5.28125, "learning_rate": 7.79453749524172e-06, "loss": 4.0333, "step": 9270 }, { "epoch": 0.44156832889227254, "grad_norm": 4.5625, "learning_rate": 7.792158355538639e-06, "loss": 4.1718, "step": 9280 }, { "epoch": 0.4420441568328892, "grad_norm": 4.71875, "learning_rate": 7.789779215835554e-06, "loss": 4.1078, "step": 9290 }, { "epoch": 0.4425199847735059, "grad_norm": 5.5, "learning_rate": 7.787400076132471e-06, "loss": 4.0494, "step": 9300 }, { "epoch": 0.4429958127141226, "grad_norm": 4.71875, "learning_rate": 7.785020936429388e-06, "loss": 4.0851, "step": 9310 }, { "epoch": 0.44347164065473926, "grad_norm": 4.65625, "learning_rate": 7.782641796726305e-06, "loss": 4.1267, "step": 9320 }, { "epoch": 0.44394746859535594, "grad_norm": 4.9375, "learning_rate": 7.780262657023222e-06, "loss": 4.0454, "step": 9330 }, { "epoch": 0.44442329653597257, "grad_norm": 4.75, "learning_rate": 7.777883517320137e-06, "loss": 4.0167, "step": 9340 }, { "epoch": 0.44489912447658925, "grad_norm": 5.0, "learning_rate": 7.775504377617055e-06, "loss": 4.1635, "step": 9350 }, { "epoch": 0.4453749524172059, "grad_norm": 4.5, "learning_rate": 7.77312523791397e-06, "loss": 4.0145, "step": 9360 }, { "epoch": 0.4458507803578226, "grad_norm": 4.5625, "learning_rate": 7.770746098210888e-06, "loss": 3.9908, "step": 9370 }, { "epoch": 0.4463266082984393, "grad_norm": 4.75, "learning_rate": 7.768366958507805e-06, "loss": 4.0018, "step": 9380 }, { "epoch": 0.44680243623905597, "grad_norm": 4.9375, "learning_rate": 7.765987818804721e-06, "loss": 3.9939, "step": 9390 }, { "epoch": 0.44727826417967265, "grad_norm": 4.5625, "learning_rate": 7.763608679101637e-06, "loss": 3.9706, "step": 9400 }, { "epoch": 0.4477540921202893, "grad_norm": 4.9375, "learning_rate": 7.761229539398554e-06, "loss": 4.0599, "step": 9410 }, { "epoch": 0.44822992006090595, "grad_norm": 4.8125, "learning_rate": 7.75885039969547e-06, "loss": 4.1081, "step": 9420 }, { "epoch": 0.44870574800152263, "grad_norm": 4.4375, "learning_rate": 7.756471259992387e-06, "loss": 4.0171, "step": 9430 }, { "epoch": 0.4491815759421393, "grad_norm": 4.03125, "learning_rate": 7.754092120289304e-06, "loss": 3.9122, "step": 9440 }, { "epoch": 0.449657403882756, "grad_norm": 4.53125, "learning_rate": 7.75171298058622e-06, "loss": 4.1097, "step": 9450 }, { "epoch": 0.45013323182337267, "grad_norm": 5.03125, "learning_rate": 7.749333840883138e-06, "loss": 4.0836, "step": 9460 }, { "epoch": 0.45060905976398935, "grad_norm": 4.71875, "learning_rate": 7.746954701180053e-06, "loss": 3.9705, "step": 9470 }, { "epoch": 0.45108488770460603, "grad_norm": 4.96875, "learning_rate": 7.74457556147697e-06, "loss": 3.9884, "step": 9480 }, { "epoch": 0.4515607156452227, "grad_norm": 4.71875, "learning_rate": 7.742196421773887e-06, "loss": 3.8952, "step": 9490 }, { "epoch": 0.45203654358583933, "grad_norm": 4.625, "learning_rate": 7.739817282070804e-06, "loss": 4.0709, "step": 9500 }, { "epoch": 0.452512371526456, "grad_norm": 4.78125, "learning_rate": 7.737438142367721e-06, "loss": 4.1257, "step": 9510 }, { "epoch": 0.4529881994670727, "grad_norm": 4.40625, "learning_rate": 7.735059002664636e-06, "loss": 4.0513, "step": 9520 }, { "epoch": 0.4534640274076894, "grad_norm": 4.8125, "learning_rate": 7.732679862961555e-06, "loss": 4.0129, "step": 9530 }, { "epoch": 0.45393985534830605, "grad_norm": 4.59375, "learning_rate": 7.73030072325847e-06, "loss": 4.0906, "step": 9540 }, { "epoch": 0.45441568328892273, "grad_norm": 5.0, "learning_rate": 7.727921583555387e-06, "loss": 4.1011, "step": 9550 }, { "epoch": 0.4548915112295394, "grad_norm": 4.78125, "learning_rate": 7.725542443852304e-06, "loss": 4.1635, "step": 9560 }, { "epoch": 0.4553673391701561, "grad_norm": 4.46875, "learning_rate": 7.72316330414922e-06, "loss": 3.8562, "step": 9570 }, { "epoch": 0.4558431671107727, "grad_norm": 4.625, "learning_rate": 7.720784164446136e-06, "loss": 4.2242, "step": 9580 }, { "epoch": 0.4563189950513894, "grad_norm": 4.53125, "learning_rate": 7.718405024743053e-06, "loss": 3.9076, "step": 9590 }, { "epoch": 0.4567948229920061, "grad_norm": 4.4375, "learning_rate": 7.71602588503997e-06, "loss": 4.0506, "step": 9600 }, { "epoch": 0.45727065093262276, "grad_norm": 5.15625, "learning_rate": 7.713646745336887e-06, "loss": 3.9729, "step": 9610 }, { "epoch": 0.45774647887323944, "grad_norm": 4.9375, "learning_rate": 7.711267605633804e-06, "loss": 4.1227, "step": 9620 }, { "epoch": 0.4582223068138561, "grad_norm": 20.25, "learning_rate": 7.70888846593072e-06, "loss": 4.0235, "step": 9630 }, { "epoch": 0.4586981347544728, "grad_norm": 4.65625, "learning_rate": 7.706509326227638e-06, "loss": 3.9877, "step": 9640 }, { "epoch": 0.4591739626950895, "grad_norm": 4.6875, "learning_rate": 7.704130186524553e-06, "loss": 4.1225, "step": 9650 }, { "epoch": 0.4596497906357061, "grad_norm": 4.875, "learning_rate": 7.70175104682147e-06, "loss": 3.9854, "step": 9660 }, { "epoch": 0.4601256185763228, "grad_norm": 4.3125, "learning_rate": 7.699371907118387e-06, "loss": 3.9507, "step": 9670 }, { "epoch": 0.46060144651693946, "grad_norm": 4.59375, "learning_rate": 7.696992767415304e-06, "loss": 4.1436, "step": 9680 }, { "epoch": 0.46107727445755614, "grad_norm": 4.6875, "learning_rate": 7.69461362771222e-06, "loss": 4.1127, "step": 9690 }, { "epoch": 0.4615531023981728, "grad_norm": 4.625, "learning_rate": 7.692234488009137e-06, "loss": 4.0434, "step": 9700 }, { "epoch": 0.4620289303387895, "grad_norm": 4.59375, "learning_rate": 7.689855348306053e-06, "loss": 4.152, "step": 9710 }, { "epoch": 0.4625047582794062, "grad_norm": 5.1875, "learning_rate": 7.68747620860297e-06, "loss": 3.9086, "step": 9720 }, { "epoch": 0.46298058622002286, "grad_norm": 4.6875, "learning_rate": 7.685097068899886e-06, "loss": 4.1181, "step": 9730 }, { "epoch": 0.46345641416063954, "grad_norm": 4.53125, "learning_rate": 7.682717929196803e-06, "loss": 4.0861, "step": 9740 }, { "epoch": 0.46393224210125616, "grad_norm": 4.5, "learning_rate": 7.68033878949372e-06, "loss": 3.9611, "step": 9750 }, { "epoch": 0.46440807004187284, "grad_norm": 4.71875, "learning_rate": 7.677959649790635e-06, "loss": 3.9973, "step": 9760 }, { "epoch": 0.4648838979824895, "grad_norm": 4.84375, "learning_rate": 7.675580510087552e-06, "loss": 4.1605, "step": 9770 }, { "epoch": 0.4653597259231062, "grad_norm": 4.875, "learning_rate": 7.67320137038447e-06, "loss": 3.9693, "step": 9780 }, { "epoch": 0.4658355538637229, "grad_norm": 4.6875, "learning_rate": 7.670822230681386e-06, "loss": 3.9255, "step": 9790 }, { "epoch": 0.46631138180433956, "grad_norm": 4.46875, "learning_rate": 7.668443090978303e-06, "loss": 4.0904, "step": 9800 }, { "epoch": 0.46678720974495624, "grad_norm": 4.03125, "learning_rate": 7.66606395127522e-06, "loss": 3.947, "step": 9810 }, { "epoch": 0.4672630376855729, "grad_norm": 4.71875, "learning_rate": 7.663684811572137e-06, "loss": 4.0348, "step": 9820 }, { "epoch": 0.46773886562618955, "grad_norm": 4.8125, "learning_rate": 7.661305671869052e-06, "loss": 3.9808, "step": 9830 }, { "epoch": 0.46821469356680623, "grad_norm": 4.9375, "learning_rate": 7.658926532165969e-06, "loss": 4.1114, "step": 9840 }, { "epoch": 0.4686905215074229, "grad_norm": 4.8125, "learning_rate": 7.656547392462886e-06, "loss": 3.8889, "step": 9850 }, { "epoch": 0.4691663494480396, "grad_norm": 4.6875, "learning_rate": 7.654168252759803e-06, "loss": 4.0915, "step": 9860 }, { "epoch": 0.46964217738865627, "grad_norm": 4.53125, "learning_rate": 7.65178911305672e-06, "loss": 4.0441, "step": 9870 }, { "epoch": 0.47011800532927295, "grad_norm": 5.03125, "learning_rate": 7.649409973353637e-06, "loss": 4.0567, "step": 9880 }, { "epoch": 0.4705938332698896, "grad_norm": 4.78125, "learning_rate": 7.647030833650552e-06, "loss": 4.0815, "step": 9890 }, { "epoch": 0.4710696612105063, "grad_norm": 4.59375, "learning_rate": 7.644651693947469e-06, "loss": 4.1606, "step": 9900 }, { "epoch": 0.47154548915112293, "grad_norm": 5.53125, "learning_rate": 7.642272554244386e-06, "loss": 3.9775, "step": 9910 }, { "epoch": 0.4720213170917396, "grad_norm": 4.53125, "learning_rate": 7.639893414541303e-06, "loss": 4.0499, "step": 9920 }, { "epoch": 0.4724971450323563, "grad_norm": 4.90625, "learning_rate": 7.63751427483822e-06, "loss": 3.9946, "step": 9930 }, { "epoch": 0.47297297297297297, "grad_norm": 4.65625, "learning_rate": 7.635135135135135e-06, "loss": 4.049, "step": 9940 }, { "epoch": 0.47344880091358965, "grad_norm": 4.4375, "learning_rate": 7.632755995432053e-06, "loss": 3.9993, "step": 9950 }, { "epoch": 0.47392462885420633, "grad_norm": 4.875, "learning_rate": 7.630376855728969e-06, "loss": 3.9024, "step": 9960 }, { "epoch": 0.474400456794823, "grad_norm": 4.4375, "learning_rate": 7.627997716025886e-06, "loss": 4.0048, "step": 9970 }, { "epoch": 0.4748762847354397, "grad_norm": 4.90625, "learning_rate": 7.625618576322802e-06, "loss": 4.0581, "step": 9980 }, { "epoch": 0.4753521126760563, "grad_norm": 4.78125, "learning_rate": 7.623239436619719e-06, "loss": 3.9108, "step": 9990 }, { "epoch": 0.475827940616673, "grad_norm": 4.65625, "learning_rate": 7.6208602969166355e-06, "loss": 4.1818, "step": 10000 }, { "epoch": 0.4763037685572897, "grad_norm": 4.53125, "learning_rate": 7.618481157213552e-06, "loss": 4.068, "step": 10010 }, { "epoch": 0.47677959649790635, "grad_norm": 4.625, "learning_rate": 7.6161020175104685e-06, "loss": 4.064, "step": 10020 }, { "epoch": 0.47725542443852303, "grad_norm": 4.5, "learning_rate": 7.613722877807385e-06, "loss": 4.0673, "step": 10030 }, { "epoch": 0.4777312523791397, "grad_norm": 4.59375, "learning_rate": 7.611343738104302e-06, "loss": 4.1277, "step": 10040 }, { "epoch": 0.4782070803197564, "grad_norm": 4.8125, "learning_rate": 7.608964598401218e-06, "loss": 4.0548, "step": 10050 }, { "epoch": 0.4786829082603731, "grad_norm": 4.84375, "learning_rate": 7.606585458698136e-06, "loss": 4.2003, "step": 10060 }, { "epoch": 0.4791587362009897, "grad_norm": 4.28125, "learning_rate": 7.604206318995052e-06, "loss": 4.0466, "step": 10070 }, { "epoch": 0.4796345641416064, "grad_norm": 4.3125, "learning_rate": 7.601827179291968e-06, "loss": 4.0707, "step": 10080 }, { "epoch": 0.48011039208222306, "grad_norm": 4.6875, "learning_rate": 7.599448039588885e-06, "loss": 3.8443, "step": 10090 }, { "epoch": 0.48058622002283974, "grad_norm": 4.90625, "learning_rate": 7.597068899885802e-06, "loss": 4.1018, "step": 10100 }, { "epoch": 0.4810620479634564, "grad_norm": 5.0625, "learning_rate": 7.594689760182718e-06, "loss": 4.1831, "step": 10110 }, { "epoch": 0.4815378759040731, "grad_norm": 4.53125, "learning_rate": 7.592310620479635e-06, "loss": 4.0633, "step": 10120 }, { "epoch": 0.4820137038446898, "grad_norm": 4.53125, "learning_rate": 7.589931480776552e-06, "loss": 4.0465, "step": 10130 }, { "epoch": 0.48248953178530646, "grad_norm": 4.96875, "learning_rate": 7.587552341073469e-06, "loss": 4.1459, "step": 10140 }, { "epoch": 0.4829653597259231, "grad_norm": 4.78125, "learning_rate": 7.585173201370385e-06, "loss": 4.0844, "step": 10150 }, { "epoch": 0.48344118766653976, "grad_norm": 4.1875, "learning_rate": 7.582794061667301e-06, "loss": 4.025, "step": 10160 }, { "epoch": 0.48391701560715644, "grad_norm": 4.28125, "learning_rate": 7.580414921964219e-06, "loss": 4.0564, "step": 10170 }, { "epoch": 0.4843928435477731, "grad_norm": 4.65625, "learning_rate": 7.578035782261135e-06, "loss": 4.0999, "step": 10180 }, { "epoch": 0.4848686714883898, "grad_norm": 4.09375, "learning_rate": 7.575656642558051e-06, "loss": 3.9703, "step": 10190 }, { "epoch": 0.4853444994290065, "grad_norm": 4.65625, "learning_rate": 7.573277502854969e-06, "loss": 4.0011, "step": 10200 }, { "epoch": 0.48582032736962316, "grad_norm": 4.71875, "learning_rate": 7.570898363151885e-06, "loss": 3.9764, "step": 10210 }, { "epoch": 0.48629615531023984, "grad_norm": 4.875, "learning_rate": 7.568519223448802e-06, "loss": 3.912, "step": 10220 }, { "epoch": 0.48677198325085647, "grad_norm": 4.3125, "learning_rate": 7.566140083745718e-06, "loss": 4.1512, "step": 10230 }, { "epoch": 0.48724781119147315, "grad_norm": 4.53125, "learning_rate": 7.5637609440426355e-06, "loss": 3.9626, "step": 10240 }, { "epoch": 0.4877236391320898, "grad_norm": 4.46875, "learning_rate": 7.5613818043395516e-06, "loss": 4.2019, "step": 10250 }, { "epoch": 0.4881994670727065, "grad_norm": 4.46875, "learning_rate": 7.559002664636468e-06, "loss": 3.9752, "step": 10260 }, { "epoch": 0.4886752950133232, "grad_norm": 4.5, "learning_rate": 7.556623524933385e-06, "loss": 4.0679, "step": 10270 }, { "epoch": 0.48915112295393987, "grad_norm": 4.65625, "learning_rate": 7.5542443852303015e-06, "loss": 4.1294, "step": 10280 }, { "epoch": 0.48962695089455655, "grad_norm": 4.5625, "learning_rate": 7.5518652455272175e-06, "loss": 4.0745, "step": 10290 }, { "epoch": 0.4901027788351732, "grad_norm": 4.96875, "learning_rate": 7.5494861058241345e-06, "loss": 4.0567, "step": 10300 }, { "epoch": 0.49057860677578985, "grad_norm": 4.875, "learning_rate": 7.547106966121051e-06, "loss": 4.0765, "step": 10310 }, { "epoch": 0.49105443471640653, "grad_norm": 4.78125, "learning_rate": 7.544727826417968e-06, "loss": 4.1341, "step": 10320 }, { "epoch": 0.4915302626570232, "grad_norm": 4.65625, "learning_rate": 7.542348686714884e-06, "loss": 4.0094, "step": 10330 }, { "epoch": 0.4920060905976399, "grad_norm": 5.125, "learning_rate": 7.5399695470118e-06, "loss": 4.2084, "step": 10340 }, { "epoch": 0.49248191853825657, "grad_norm": 4.71875, "learning_rate": 7.537590407308718e-06, "loss": 4.0989, "step": 10350 }, { "epoch": 0.49295774647887325, "grad_norm": 4.5, "learning_rate": 7.535211267605634e-06, "loss": 4.0678, "step": 10360 }, { "epoch": 0.49343357441948993, "grad_norm": 4.5, "learning_rate": 7.53283212790255e-06, "loss": 3.9699, "step": 10370 }, { "epoch": 0.4939094023601066, "grad_norm": 4.71875, "learning_rate": 7.530452988199468e-06, "loss": 4.2733, "step": 10380 }, { "epoch": 0.49438523030072323, "grad_norm": 4.53125, "learning_rate": 7.528073848496384e-06, "loss": 4.0913, "step": 10390 }, { "epoch": 0.4948610582413399, "grad_norm": 4.46875, "learning_rate": 7.525694708793301e-06, "loss": 3.9948, "step": 10400 }, { "epoch": 0.4953368861819566, "grad_norm": 4.5, "learning_rate": 7.523315569090217e-06, "loss": 4.0125, "step": 10410 }, { "epoch": 0.4958127141225733, "grad_norm": 4.59375, "learning_rate": 7.520936429387135e-06, "loss": 3.7995, "step": 10420 }, { "epoch": 0.49628854206318995, "grad_norm": 4.5625, "learning_rate": 7.518557289684051e-06, "loss": 4.0771, "step": 10430 }, { "epoch": 0.49676437000380663, "grad_norm": 4.71875, "learning_rate": 7.516178149980967e-06, "loss": 4.0079, "step": 10440 }, { "epoch": 0.4972401979444233, "grad_norm": 5.1875, "learning_rate": 7.513799010277885e-06, "loss": 4.1862, "step": 10450 }, { "epoch": 0.49771602588504, "grad_norm": 4.875, "learning_rate": 7.511419870574801e-06, "loss": 4.0739, "step": 10460 }, { "epoch": 0.4981918538256566, "grad_norm": 4.75, "learning_rate": 7.509040730871717e-06, "loss": 4.0781, "step": 10470 }, { "epoch": 0.4986676817662733, "grad_norm": 4.71875, "learning_rate": 7.506661591168634e-06, "loss": 3.9341, "step": 10480 }, { "epoch": 0.49914350970689, "grad_norm": 4.40625, "learning_rate": 7.504282451465551e-06, "loss": 4.0952, "step": 10490 }, { "epoch": 0.49961933764750666, "grad_norm": 5.0, "learning_rate": 7.501903311762468e-06, "loss": 3.9586, "step": 10500 }, { "epoch": 0.5000951655881233, "grad_norm": 4.78125, "learning_rate": 7.499524172059384e-06, "loss": 4.0602, "step": 10510 }, { "epoch": 0.50057099352874, "grad_norm": 4.71875, "learning_rate": 7.4971450323563015e-06, "loss": 4.176, "step": 10520 }, { "epoch": 0.5010468214693566, "grad_norm": 4.5, "learning_rate": 7.4947658926532176e-06, "loss": 3.9778, "step": 10530 }, { "epoch": 0.5015226494099734, "grad_norm": 4.65625, "learning_rate": 7.492386752950134e-06, "loss": 4.1717, "step": 10540 }, { "epoch": 0.50199847735059, "grad_norm": 4.25, "learning_rate": 7.49000761324705e-06, "loss": 4.034, "step": 10550 }, { "epoch": 0.5024743052912067, "grad_norm": 4.625, "learning_rate": 7.4876284735439675e-06, "loss": 4.0563, "step": 10560 }, { "epoch": 0.5029501332318234, "grad_norm": 4.9375, "learning_rate": 7.4852493338408835e-06, "loss": 4.1799, "step": 10570 }, { "epoch": 0.5034259611724401, "grad_norm": 4.5625, "learning_rate": 7.4828701941378005e-06, "loss": 4.0498, "step": 10580 }, { "epoch": 0.5039017891130567, "grad_norm": 4.15625, "learning_rate": 7.480491054434717e-06, "loss": 3.9922, "step": 10590 }, { "epoch": 0.5043776170536733, "grad_norm": 5.0, "learning_rate": 7.478111914731634e-06, "loss": 4.0034, "step": 10600 }, { "epoch": 0.5048534449942901, "grad_norm": 4.5, "learning_rate": 7.47573277502855e-06, "loss": 4.0448, "step": 10610 }, { "epoch": 0.5053292729349067, "grad_norm": 4.6875, "learning_rate": 7.473353635325466e-06, "loss": 4.0999, "step": 10620 }, { "epoch": 0.5058051008755234, "grad_norm": 4.59375, "learning_rate": 7.470974495622384e-06, "loss": 4.0363, "step": 10630 }, { "epoch": 0.5062809288161401, "grad_norm": 5.90625, "learning_rate": 7.4685953559193e-06, "loss": 4.0198, "step": 10640 }, { "epoch": 0.5067567567567568, "grad_norm": 4.8125, "learning_rate": 7.466216216216216e-06, "loss": 4.0244, "step": 10650 }, { "epoch": 0.5072325846973734, "grad_norm": 4.125, "learning_rate": 7.463837076513133e-06, "loss": 4.1706, "step": 10660 }, { "epoch": 0.50770841263799, "grad_norm": 4.59375, "learning_rate": 7.46145793681005e-06, "loss": 4.0273, "step": 10670 }, { "epoch": 0.5081842405786068, "grad_norm": 4.25, "learning_rate": 7.459078797106967e-06, "loss": 3.9352, "step": 10680 }, { "epoch": 0.5086600685192234, "grad_norm": 4.65625, "learning_rate": 7.456699657403883e-06, "loss": 4.06, "step": 10690 }, { "epoch": 0.5091358964598401, "grad_norm": 4.4375, "learning_rate": 7.4543205177008e-06, "loss": 4.0672, "step": 10700 }, { "epoch": 0.5096117244004568, "grad_norm": 4.96875, "learning_rate": 7.451941377997717e-06, "loss": 4.0702, "step": 10710 }, { "epoch": 0.5100875523410735, "grad_norm": 4.75, "learning_rate": 7.449562238294633e-06, "loss": 4.1287, "step": 10720 }, { "epoch": 0.5105633802816901, "grad_norm": 4.5, "learning_rate": 7.447183098591549e-06, "loss": 4.1298, "step": 10730 }, { "epoch": 0.5110392082223069, "grad_norm": 4.5, "learning_rate": 7.444803958888467e-06, "loss": 4.0354, "step": 10740 }, { "epoch": 0.5115150361629235, "grad_norm": 4.4375, "learning_rate": 7.442424819185383e-06, "loss": 4.1193, "step": 10750 }, { "epoch": 0.5119908641035401, "grad_norm": 4.84375, "learning_rate": 7.4400456794823e-06, "loss": 4.0505, "step": 10760 }, { "epoch": 0.5124666920441568, "grad_norm": 4.375, "learning_rate": 7.437666539779217e-06, "loss": 3.9897, "step": 10770 }, { "epoch": 0.5129425199847735, "grad_norm": 4.90625, "learning_rate": 7.435287400076134e-06, "loss": 4.034, "step": 10780 }, { "epoch": 0.5134183479253902, "grad_norm": 4.875, "learning_rate": 7.43290826037305e-06, "loss": 3.9337, "step": 10790 }, { "epoch": 0.5138941758660068, "grad_norm": 4.71875, "learning_rate": 7.430529120669966e-06, "loss": 3.9022, "step": 10800 }, { "epoch": 0.5143700038066236, "grad_norm": 4.46875, "learning_rate": 7.4281499809668836e-06, "loss": 4.0194, "step": 10810 }, { "epoch": 0.5148458317472402, "grad_norm": 4.4375, "learning_rate": 7.4257708412638e-06, "loss": 4.1164, "step": 10820 }, { "epoch": 0.5153216596878569, "grad_norm": 4.46875, "learning_rate": 7.423391701560716e-06, "loss": 3.9883, "step": 10830 }, { "epoch": 0.5157974876284735, "grad_norm": 4.78125, "learning_rate": 7.4210125618576335e-06, "loss": 4.0243, "step": 10840 }, { "epoch": 0.5162733155690902, "grad_norm": 4.3125, "learning_rate": 7.4186334221545495e-06, "loss": 4.0437, "step": 10850 }, { "epoch": 0.5167491435097069, "grad_norm": 4.21875, "learning_rate": 7.4162542824514665e-06, "loss": 3.9917, "step": 10860 }, { "epoch": 0.5172249714503235, "grad_norm": 4.375, "learning_rate": 7.4138751427483825e-06, "loss": 4.1784, "step": 10870 }, { "epoch": 0.5177007993909403, "grad_norm": 4.25, "learning_rate": 7.4114960030452994e-06, "loss": 4.0859, "step": 10880 }, { "epoch": 0.5181766273315569, "grad_norm": 4.8125, "learning_rate": 7.409116863342216e-06, "loss": 3.9327, "step": 10890 }, { "epoch": 0.5186524552721736, "grad_norm": 4.90625, "learning_rate": 7.406737723639132e-06, "loss": 4.1313, "step": 10900 }, { "epoch": 0.5191282832127903, "grad_norm": 4.25, "learning_rate": 7.4043585839360485e-06, "loss": 4.1496, "step": 10910 }, { "epoch": 0.5196041111534069, "grad_norm": 4.875, "learning_rate": 7.401979444232966e-06, "loss": 4.0262, "step": 10920 }, { "epoch": 0.5200799390940236, "grad_norm": 5.0625, "learning_rate": 7.399600304529882e-06, "loss": 4.0176, "step": 10930 }, { "epoch": 0.5205557670346402, "grad_norm": 4.375, "learning_rate": 7.397221164826799e-06, "loss": 4.1028, "step": 10940 }, { "epoch": 0.521031594975257, "grad_norm": 4.65625, "learning_rate": 7.394842025123716e-06, "loss": 4.0742, "step": 10950 }, { "epoch": 0.5215074229158736, "grad_norm": 4.6875, "learning_rate": 7.392462885420632e-06, "loss": 3.9017, "step": 10960 }, { "epoch": 0.5219832508564903, "grad_norm": 4.46875, "learning_rate": 7.390083745717549e-06, "loss": 4.0458, "step": 10970 }, { "epoch": 0.522459078797107, "grad_norm": 4.3125, "learning_rate": 7.387704606014465e-06, "loss": 4.0727, "step": 10980 }, { "epoch": 0.5229349067377237, "grad_norm": 4.5625, "learning_rate": 7.385325466311383e-06, "loss": 4.1271, "step": 10990 }, { "epoch": 0.5234107346783403, "grad_norm": 4.8125, "learning_rate": 7.382946326608299e-06, "loss": 3.9702, "step": 11000 }, { "epoch": 0.5238865626189569, "grad_norm": 4.53125, "learning_rate": 7.380567186905215e-06, "loss": 3.9499, "step": 11010 }, { "epoch": 0.5243623905595737, "grad_norm": 4.9375, "learning_rate": 7.378188047202133e-06, "loss": 4.1384, "step": 11020 }, { "epoch": 0.5248382185001903, "grad_norm": 4.71875, "learning_rate": 7.375808907499049e-06, "loss": 3.9532, "step": 11030 }, { "epoch": 0.525314046440807, "grad_norm": 4.6875, "learning_rate": 7.373429767795966e-06, "loss": 3.9327, "step": 11040 }, { "epoch": 0.5257898743814237, "grad_norm": 4.71875, "learning_rate": 7.371050628092882e-06, "loss": 4.1369, "step": 11050 }, { "epoch": 0.5262657023220404, "grad_norm": 4.78125, "learning_rate": 7.368671488389799e-06, "loss": 4.2124, "step": 11060 }, { "epoch": 0.526741530262657, "grad_norm": 4.5625, "learning_rate": 7.366292348686716e-06, "loss": 4.0123, "step": 11070 }, { "epoch": 0.5272173582032736, "grad_norm": 4.75, "learning_rate": 7.363913208983632e-06, "loss": 4.1901, "step": 11080 }, { "epoch": 0.5276931861438904, "grad_norm": 4.53125, "learning_rate": 7.3615340692805496e-06, "loss": 4.0274, "step": 11090 }, { "epoch": 0.528169014084507, "grad_norm": 4.5, "learning_rate": 7.359154929577466e-06, "loss": 4.1987, "step": 11100 }, { "epoch": 0.5286448420251237, "grad_norm": 4.59375, "learning_rate": 7.356775789874382e-06, "loss": 4.0537, "step": 11110 }, { "epoch": 0.5291206699657404, "grad_norm": 4.96875, "learning_rate": 7.354396650171299e-06, "loss": 3.8954, "step": 11120 }, { "epoch": 0.5295964979063571, "grad_norm": 4.90625, "learning_rate": 7.3520175104682155e-06, "loss": 4.0217, "step": 11130 }, { "epoch": 0.5300723258469737, "grad_norm": 4.8125, "learning_rate": 7.349638370765132e-06, "loss": 4.1729, "step": 11140 }, { "epoch": 0.5305481537875905, "grad_norm": 4.59375, "learning_rate": 7.3472592310620485e-06, "loss": 4.0186, "step": 11150 }, { "epoch": 0.5310239817282071, "grad_norm": 4.65625, "learning_rate": 7.3448800913589654e-06, "loss": 4.0902, "step": 11160 }, { "epoch": 0.5314998096688237, "grad_norm": 4.59375, "learning_rate": 7.342500951655882e-06, "loss": 3.9269, "step": 11170 }, { "epoch": 0.5319756376094404, "grad_norm": 4.71875, "learning_rate": 7.340121811952798e-06, "loss": 4.0447, "step": 11180 }, { "epoch": 0.5324514655500571, "grad_norm": 4.71875, "learning_rate": 7.3377426722497145e-06, "loss": 3.8405, "step": 11190 }, { "epoch": 0.5329272934906738, "grad_norm": 4.375, "learning_rate": 7.335363532546632e-06, "loss": 3.8833, "step": 11200 }, { "epoch": 0.5334031214312904, "grad_norm": 4.78125, "learning_rate": 7.332984392843548e-06, "loss": 3.8354, "step": 11210 }, { "epoch": 0.5338789493719072, "grad_norm": 5.09375, "learning_rate": 7.330605253140464e-06, "loss": 4.0918, "step": 11220 }, { "epoch": 0.5343547773125238, "grad_norm": 4.59375, "learning_rate": 7.328226113437381e-06, "loss": 4.0213, "step": 11230 }, { "epoch": 0.5348306052531404, "grad_norm": 4.34375, "learning_rate": 7.325846973734298e-06, "loss": 4.0927, "step": 11240 }, { "epoch": 0.5353064331937571, "grad_norm": 4.78125, "learning_rate": 7.323467834031215e-06, "loss": 4.0796, "step": 11250 }, { "epoch": 0.5357822611343738, "grad_norm": 4.625, "learning_rate": 7.321088694328131e-06, "loss": 4.0651, "step": 11260 }, { "epoch": 0.5362580890749905, "grad_norm": 4.28125, "learning_rate": 7.318709554625049e-06, "loss": 4.0108, "step": 11270 }, { "epoch": 0.5367339170156071, "grad_norm": 4.34375, "learning_rate": 7.316330414921965e-06, "loss": 3.9107, "step": 11280 }, { "epoch": 0.5372097449562239, "grad_norm": 4.46875, "learning_rate": 7.313951275218881e-06, "loss": 4.3082, "step": 11290 }, { "epoch": 0.5376855728968405, "grad_norm": 4.1875, "learning_rate": 7.311572135515798e-06, "loss": 3.9937, "step": 11300 }, { "epoch": 0.5381614008374572, "grad_norm": 4.78125, "learning_rate": 7.309192995812715e-06, "loss": 4.0918, "step": 11310 }, { "epoch": 0.5386372287780739, "grad_norm": 4.75, "learning_rate": 7.306813856109631e-06, "loss": 4.1854, "step": 11320 }, { "epoch": 0.5391130567186905, "grad_norm": 4.8125, "learning_rate": 7.304434716406548e-06, "loss": 4.1337, "step": 11330 }, { "epoch": 0.5395888846593072, "grad_norm": 4.90625, "learning_rate": 7.302055576703465e-06, "loss": 4.0625, "step": 11340 }, { "epoch": 0.5400647125999238, "grad_norm": 5.09375, "learning_rate": 7.299676437000382e-06, "loss": 4.0177, "step": 11350 }, { "epoch": 0.5405405405405406, "grad_norm": 4.75, "learning_rate": 7.297297297297298e-06, "loss": 3.9554, "step": 11360 }, { "epoch": 0.5410163684811572, "grad_norm": 4.65625, "learning_rate": 7.294918157594214e-06, "loss": 4.1686, "step": 11370 }, { "epoch": 0.5414921964217739, "grad_norm": 4.65625, "learning_rate": 7.292539017891132e-06, "loss": 4.1998, "step": 11380 }, { "epoch": 0.5419680243623906, "grad_norm": 4.4375, "learning_rate": 7.290159878188048e-06, "loss": 3.9503, "step": 11390 }, { "epoch": 0.5424438523030072, "grad_norm": 4.375, "learning_rate": 7.287780738484964e-06, "loss": 3.9531, "step": 11400 }, { "epoch": 0.5429196802436239, "grad_norm": 4.65625, "learning_rate": 7.2854015987818815e-06, "loss": 4.0291, "step": 11410 }, { "epoch": 0.5433955081842405, "grad_norm": 4.5625, "learning_rate": 7.283022459078798e-06, "loss": 3.8578, "step": 11420 }, { "epoch": 0.5438713361248573, "grad_norm": 10.4375, "learning_rate": 7.2806433193757145e-06, "loss": 4.25, "step": 11430 }, { "epoch": 0.5443471640654739, "grad_norm": 4.875, "learning_rate": 7.278264179672631e-06, "loss": 4.0522, "step": 11440 }, { "epoch": 0.5448229920060906, "grad_norm": 4.8125, "learning_rate": 7.275885039969548e-06, "loss": 4.1021, "step": 11450 }, { "epoch": 0.5452988199467073, "grad_norm": 4.78125, "learning_rate": 7.273505900266464e-06, "loss": 4.005, "step": 11460 }, { "epoch": 0.545774647887324, "grad_norm": 4.65625, "learning_rate": 7.2711267605633805e-06, "loss": 4.1352, "step": 11470 }, { "epoch": 0.5462504758279406, "grad_norm": 4.8125, "learning_rate": 7.268747620860298e-06, "loss": 4.1259, "step": 11480 }, { "epoch": 0.5467263037685572, "grad_norm": 4.78125, "learning_rate": 7.266368481157214e-06, "loss": 4.1814, "step": 11490 }, { "epoch": 0.547202131709174, "grad_norm": 4.90625, "learning_rate": 7.26398934145413e-06, "loss": 4.1219, "step": 11500 }, { "epoch": 0.5476779596497906, "grad_norm": 5.125, "learning_rate": 7.261610201751047e-06, "loss": 4.0774, "step": 11510 }, { "epoch": 0.5481537875904073, "grad_norm": 4.46875, "learning_rate": 7.259231062047964e-06, "loss": 4.0232, "step": 11520 }, { "epoch": 0.548629615531024, "grad_norm": 4.875, "learning_rate": 7.256851922344881e-06, "loss": 4.162, "step": 11530 }, { "epoch": 0.5491054434716407, "grad_norm": 4.625, "learning_rate": 7.254472782641797e-06, "loss": 3.9871, "step": 11540 }, { "epoch": 0.5495812714122573, "grad_norm": 4.53125, "learning_rate": 7.252093642938713e-06, "loss": 4.2224, "step": 11550 }, { "epoch": 0.550057099352874, "grad_norm": 4.8125, "learning_rate": 7.249714503235631e-06, "loss": 3.9575, "step": 11560 }, { "epoch": 0.5505329272934907, "grad_norm": 5.15625, "learning_rate": 7.247335363532547e-06, "loss": 4.015, "step": 11570 }, { "epoch": 0.5510087552341073, "grad_norm": 4.5625, "learning_rate": 7.244956223829463e-06, "loss": 4.1351, "step": 11580 }, { "epoch": 0.551484583174724, "grad_norm": 4.3125, "learning_rate": 7.242577084126381e-06, "loss": 4.0914, "step": 11590 }, { "epoch": 0.5519604111153407, "grad_norm": 4.65625, "learning_rate": 7.240197944423297e-06, "loss": 4.077, "step": 11600 }, { "epoch": 0.5524362390559574, "grad_norm": 4.1875, "learning_rate": 7.237818804720214e-06, "loss": 4.0893, "step": 11610 }, { "epoch": 0.552912066996574, "grad_norm": 4.65625, "learning_rate": 7.23543966501713e-06, "loss": 3.9318, "step": 11620 }, { "epoch": 0.5533878949371908, "grad_norm": 4.34375, "learning_rate": 7.233060525314048e-06, "loss": 4.1682, "step": 11630 }, { "epoch": 0.5538637228778074, "grad_norm": 4.5625, "learning_rate": 7.230681385610964e-06, "loss": 4.0474, "step": 11640 }, { "epoch": 0.554339550818424, "grad_norm": 4.03125, "learning_rate": 7.22830224590788e-06, "loss": 3.8519, "step": 11650 }, { "epoch": 0.5548153787590407, "grad_norm": 4.71875, "learning_rate": 7.225923106204798e-06, "loss": 3.9447, "step": 11660 }, { "epoch": 0.5552912066996574, "grad_norm": 5.09375, "learning_rate": 7.223543966501714e-06, "loss": 4.1161, "step": 11670 }, { "epoch": 0.5557670346402741, "grad_norm": 4.78125, "learning_rate": 7.22116482679863e-06, "loss": 4.0164, "step": 11680 }, { "epoch": 0.5562428625808907, "grad_norm": 4.78125, "learning_rate": 7.218785687095547e-06, "loss": 4.1349, "step": 11690 }, { "epoch": 0.5567186905215075, "grad_norm": 5.0, "learning_rate": 7.216406547392464e-06, "loss": 4.172, "step": 11700 }, { "epoch": 0.5571945184621241, "grad_norm": 4.8125, "learning_rate": 7.2140274076893805e-06, "loss": 4.0729, "step": 11710 }, { "epoch": 0.5576703464027408, "grad_norm": 6.21875, "learning_rate": 7.211648267986297e-06, "loss": 3.9797, "step": 11720 }, { "epoch": 0.5581461743433574, "grad_norm": 4.875, "learning_rate": 7.2092691282832135e-06, "loss": 4.0222, "step": 11730 }, { "epoch": 0.5586220022839741, "grad_norm": 5.125, "learning_rate": 7.20688998858013e-06, "loss": 3.9469, "step": 11740 }, { "epoch": 0.5590978302245908, "grad_norm": 4.625, "learning_rate": 7.2045108488770465e-06, "loss": 4.0853, "step": 11750 }, { "epoch": 0.5595736581652074, "grad_norm": 4.90625, "learning_rate": 7.2021317091739625e-06, "loss": 4.0284, "step": 11760 }, { "epoch": 0.5600494861058242, "grad_norm": 4.75, "learning_rate": 7.19975256947088e-06, "loss": 4.096, "step": 11770 }, { "epoch": 0.5605253140464408, "grad_norm": 5.0625, "learning_rate": 7.197373429767796e-06, "loss": 3.9253, "step": 11780 }, { "epoch": 0.5610011419870575, "grad_norm": 4.96875, "learning_rate": 7.194994290064713e-06, "loss": 4.0769, "step": 11790 }, { "epoch": 0.5614769699276742, "grad_norm": 4.5, "learning_rate": 7.192615150361629e-06, "loss": 3.9719, "step": 11800 }, { "epoch": 0.5619527978682908, "grad_norm": 4.71875, "learning_rate": 7.190236010658547e-06, "loss": 4.1842, "step": 11810 }, { "epoch": 0.5624286258089075, "grad_norm": 4.625, "learning_rate": 7.187856870955463e-06, "loss": 3.9786, "step": 11820 }, { "epoch": 0.5629044537495241, "grad_norm": 4.8125, "learning_rate": 7.185477731252379e-06, "loss": 3.964, "step": 11830 }, { "epoch": 0.5633802816901409, "grad_norm": 4.875, "learning_rate": 7.183098591549297e-06, "loss": 4.0466, "step": 11840 }, { "epoch": 0.5638561096307575, "grad_norm": 4.90625, "learning_rate": 7.180719451846213e-06, "loss": 4.214, "step": 11850 }, { "epoch": 0.5643319375713742, "grad_norm": 4.90625, "learning_rate": 7.178340312143129e-06, "loss": 3.8595, "step": 11860 }, { "epoch": 0.5648077655119909, "grad_norm": 5.0625, "learning_rate": 7.175961172440046e-06, "loss": 4.0457, "step": 11870 }, { "epoch": 0.5652835934526076, "grad_norm": 4.78125, "learning_rate": 7.173582032736963e-06, "loss": 4.1472, "step": 11880 }, { "epoch": 0.5657594213932242, "grad_norm": 4.53125, "learning_rate": 7.17120289303388e-06, "loss": 3.9388, "step": 11890 }, { "epoch": 0.5662352493338408, "grad_norm": 4.6875, "learning_rate": 7.168823753330796e-06, "loss": 3.8862, "step": 11900 }, { "epoch": 0.5667110772744576, "grad_norm": 4.84375, "learning_rate": 7.166444613627713e-06, "loss": 3.9665, "step": 11910 }, { "epoch": 0.5671869052150742, "grad_norm": 4.65625, "learning_rate": 7.16406547392463e-06, "loss": 4.0925, "step": 11920 }, { "epoch": 0.5676627331556909, "grad_norm": 5.0, "learning_rate": 7.161686334221546e-06, "loss": 4.1095, "step": 11930 }, { "epoch": 0.5681385610963076, "grad_norm": 4.71875, "learning_rate": 7.159307194518462e-06, "loss": 3.859, "step": 11940 }, { "epoch": 0.5686143890369243, "grad_norm": 4.625, "learning_rate": 7.15692805481538e-06, "loss": 4.0839, "step": 11950 }, { "epoch": 0.5690902169775409, "grad_norm": 4.46875, "learning_rate": 7.154548915112296e-06, "loss": 3.9614, "step": 11960 }, { "epoch": 0.5695660449181575, "grad_norm": 4.625, "learning_rate": 7.152169775409213e-06, "loss": 3.9359, "step": 11970 }, { "epoch": 0.5700418728587743, "grad_norm": 4.3125, "learning_rate": 7.14979063570613e-06, "loss": 4.0816, "step": 11980 }, { "epoch": 0.5705177007993909, "grad_norm": 4.5625, "learning_rate": 7.147411496003046e-06, "loss": 3.96, "step": 11990 }, { "epoch": 0.5709935287400076, "grad_norm": 4.65625, "learning_rate": 7.145032356299963e-06, "loss": 3.923, "step": 12000 }, { "epoch": 0.5714693566806243, "grad_norm": 4.875, "learning_rate": 7.142653216596879e-06, "loss": 4.1065, "step": 12010 }, { "epoch": 0.571945184621241, "grad_norm": 4.625, "learning_rate": 7.140274076893796e-06, "loss": 4.0908, "step": 12020 }, { "epoch": 0.5724210125618576, "grad_norm": 4.75, "learning_rate": 7.1378949371907125e-06, "loss": 4.0967, "step": 12030 }, { "epoch": 0.5728968405024744, "grad_norm": 5.46875, "learning_rate": 7.1355157974876285e-06, "loss": 3.9784, "step": 12040 }, { "epoch": 0.573372668443091, "grad_norm": 4.875, "learning_rate": 7.133136657784546e-06, "loss": 4.0098, "step": 12050 }, { "epoch": 0.5738484963837076, "grad_norm": 4.78125, "learning_rate": 7.130757518081462e-06, "loss": 4.0232, "step": 12060 }, { "epoch": 0.5743243243243243, "grad_norm": 4.875, "learning_rate": 7.128378378378379e-06, "loss": 4.174, "step": 12070 }, { "epoch": 0.574800152264941, "grad_norm": 4.25, "learning_rate": 7.125999238675295e-06, "loss": 3.9075, "step": 12080 }, { "epoch": 0.5752759802055577, "grad_norm": 5.03125, "learning_rate": 7.123620098972212e-06, "loss": 4.1558, "step": 12090 }, { "epoch": 0.5757518081461743, "grad_norm": 4.6875, "learning_rate": 7.121240959269129e-06, "loss": 3.8877, "step": 12100 }, { "epoch": 0.5762276360867911, "grad_norm": 5.0, "learning_rate": 7.118861819566045e-06, "loss": 4.0427, "step": 12110 }, { "epoch": 0.5767034640274077, "grad_norm": 5.0, "learning_rate": 7.116482679862961e-06, "loss": 4.1449, "step": 12120 }, { "epoch": 0.5771792919680243, "grad_norm": 4.5, "learning_rate": 7.114103540159879e-06, "loss": 4.0706, "step": 12130 }, { "epoch": 0.577655119908641, "grad_norm": 4.40625, "learning_rate": 7.111724400456795e-06, "loss": 4.066, "step": 12140 }, { "epoch": 0.5781309478492577, "grad_norm": 4.40625, "learning_rate": 7.109345260753712e-06, "loss": 4.1725, "step": 12150 }, { "epoch": 0.5786067757898744, "grad_norm": 4.5, "learning_rate": 7.106966121050629e-06, "loss": 4.1361, "step": 12160 }, { "epoch": 0.579082603730491, "grad_norm": 5.0, "learning_rate": 7.104586981347545e-06, "loss": 3.9893, "step": 12170 }, { "epoch": 0.5795584316711078, "grad_norm": 4.46875, "learning_rate": 7.102207841644462e-06, "loss": 4.1121, "step": 12180 }, { "epoch": 0.5800342596117244, "grad_norm": 4.875, "learning_rate": 7.099828701941378e-06, "loss": 4.0051, "step": 12190 }, { "epoch": 0.5805100875523411, "grad_norm": 4.75, "learning_rate": 7.097449562238296e-06, "loss": 3.9333, "step": 12200 }, { "epoch": 0.5809859154929577, "grad_norm": 4.5625, "learning_rate": 7.095070422535212e-06, "loss": 4.0624, "step": 12210 }, { "epoch": 0.5814617434335744, "grad_norm": 4.59375, "learning_rate": 7.092691282832128e-06, "loss": 4.0435, "step": 12220 }, { "epoch": 0.5819375713741911, "grad_norm": 4.96875, "learning_rate": 7.090312143129046e-06, "loss": 3.9578, "step": 12230 }, { "epoch": 0.5824133993148077, "grad_norm": 4.34375, "learning_rate": 7.087933003425962e-06, "loss": 3.9305, "step": 12240 }, { "epoch": 0.5828892272554245, "grad_norm": 5.09375, "learning_rate": 7.085553863722878e-06, "loss": 3.9885, "step": 12250 }, { "epoch": 0.5833650551960411, "grad_norm": 4.59375, "learning_rate": 7.083174724019795e-06, "loss": 4.0378, "step": 12260 }, { "epoch": 0.5838408831366578, "grad_norm": 4.90625, "learning_rate": 7.080795584316712e-06, "loss": 3.9471, "step": 12270 }, { "epoch": 0.5843167110772745, "grad_norm": 4.59375, "learning_rate": 7.0784164446136286e-06, "loss": 3.9404, "step": 12280 }, { "epoch": 0.5847925390178911, "grad_norm": 4.78125, "learning_rate": 7.076037304910545e-06, "loss": 4.0124, "step": 12290 }, { "epoch": 0.5852683669585078, "grad_norm": 4.90625, "learning_rate": 7.073658165207462e-06, "loss": 3.8477, "step": 12300 }, { "epoch": 0.5857441948991244, "grad_norm": 4.59375, "learning_rate": 7.0712790255043785e-06, "loss": 4.0348, "step": 12310 }, { "epoch": 0.5862200228397412, "grad_norm": 5.28125, "learning_rate": 7.0688998858012945e-06, "loss": 4.0378, "step": 12320 }, { "epoch": 0.5866958507803578, "grad_norm": 4.78125, "learning_rate": 7.0665207460982115e-06, "loss": 4.0021, "step": 12330 }, { "epoch": 0.5871716787209745, "grad_norm": 5.0625, "learning_rate": 7.064141606395128e-06, "loss": 4.0073, "step": 12340 }, { "epoch": 0.5876475066615912, "grad_norm": 4.6875, "learning_rate": 7.0617624666920444e-06, "loss": 4.0724, "step": 12350 }, { "epoch": 0.5881233346022079, "grad_norm": 4.84375, "learning_rate": 7.059383326988961e-06, "loss": 3.9862, "step": 12360 }, { "epoch": 0.5885991625428245, "grad_norm": 4.25, "learning_rate": 7.057004187285878e-06, "loss": 4.034, "step": 12370 }, { "epoch": 0.5890749904834411, "grad_norm": 4.78125, "learning_rate": 7.054625047582795e-06, "loss": 4.033, "step": 12380 }, { "epoch": 0.5895508184240579, "grad_norm": 5.21875, "learning_rate": 7.052245907879711e-06, "loss": 4.0891, "step": 12390 }, { "epoch": 0.5900266463646745, "grad_norm": 4.65625, "learning_rate": 7.049866768176627e-06, "loss": 4.0023, "step": 12400 }, { "epoch": 0.5905024743052912, "grad_norm": 5.03125, "learning_rate": 7.047487628473545e-06, "loss": 3.9762, "step": 12410 }, { "epoch": 0.5909783022459079, "grad_norm": 4.375, "learning_rate": 7.045108488770461e-06, "loss": 4.0383, "step": 12420 }, { "epoch": 0.5914541301865246, "grad_norm": 4.46875, "learning_rate": 7.042729349067377e-06, "loss": 4.0823, "step": 12430 }, { "epoch": 0.5919299581271412, "grad_norm": 4.53125, "learning_rate": 7.040350209364294e-06, "loss": 3.9008, "step": 12440 }, { "epoch": 0.5924057860677578, "grad_norm": 4.375, "learning_rate": 7.037971069661211e-06, "loss": 4.0204, "step": 12450 }, { "epoch": 0.5928816140083746, "grad_norm": 4.75, "learning_rate": 7.035591929958128e-06, "loss": 4.0558, "step": 12460 }, { "epoch": 0.5933574419489912, "grad_norm": 4.59375, "learning_rate": 7.033212790255044e-06, "loss": 3.9419, "step": 12470 }, { "epoch": 0.5938332698896079, "grad_norm": 4.5, "learning_rate": 7.030833650551962e-06, "loss": 3.9694, "step": 12480 }, { "epoch": 0.5943090978302246, "grad_norm": 4.4375, "learning_rate": 7.028454510848878e-06, "loss": 4.0193, "step": 12490 }, { "epoch": 0.5947849257708413, "grad_norm": 4.96875, "learning_rate": 7.026075371145794e-06, "loss": 4.0674, "step": 12500 }, { "epoch": 0.5952607537114579, "grad_norm": 4.78125, "learning_rate": 7.02369623144271e-06, "loss": 4.0289, "step": 12510 }, { "epoch": 0.5957365816520747, "grad_norm": 29.25, "learning_rate": 7.021317091739628e-06, "loss": 4.0764, "step": 12520 }, { "epoch": 0.5962124095926913, "grad_norm": 5.21875, "learning_rate": 7.018937952036544e-06, "loss": 4.1372, "step": 12530 }, { "epoch": 0.5966882375333079, "grad_norm": 4.625, "learning_rate": 7.016558812333461e-06, "loss": 3.9813, "step": 12540 }, { "epoch": 0.5971640654739246, "grad_norm": 4.5625, "learning_rate": 7.014179672630378e-06, "loss": 4.0662, "step": 12550 }, { "epoch": 0.5976398934145413, "grad_norm": 4.84375, "learning_rate": 7.0118005329272946e-06, "loss": 4.0076, "step": 12560 }, { "epoch": 0.598115721355158, "grad_norm": 4.28125, "learning_rate": 7.009421393224211e-06, "loss": 4.101, "step": 12570 }, { "epoch": 0.5985915492957746, "grad_norm": 4.78125, "learning_rate": 7.007042253521127e-06, "loss": 4.0663, "step": 12580 }, { "epoch": 0.5990673772363914, "grad_norm": 4.875, "learning_rate": 7.0046631138180445e-06, "loss": 4.0566, "step": 12590 }, { "epoch": 0.599543205177008, "grad_norm": 4.46875, "learning_rate": 7.0022839741149605e-06, "loss": 3.9349, "step": 12600 }, { "epoch": 0.6000190331176246, "grad_norm": 4.78125, "learning_rate": 6.999904834411877e-06, "loss": 4.0335, "step": 12610 }, { "epoch": 0.6004948610582413, "grad_norm": 4.96875, "learning_rate": 6.997525694708794e-06, "loss": 4.0475, "step": 12620 }, { "epoch": 0.600970688998858, "grad_norm": 4.6875, "learning_rate": 6.9951465550057104e-06, "loss": 3.9647, "step": 12630 }, { "epoch": 0.6014465169394747, "grad_norm": 4.9375, "learning_rate": 6.992767415302627e-06, "loss": 4.0324, "step": 12640 }, { "epoch": 0.6019223448800913, "grad_norm": 4.5, "learning_rate": 6.990388275599543e-06, "loss": 4.0478, "step": 12650 }, { "epoch": 0.6023981728207081, "grad_norm": 4.84375, "learning_rate": 6.988009135896461e-06, "loss": 4.0161, "step": 12660 }, { "epoch": 0.6028740007613247, "grad_norm": 4.9375, "learning_rate": 6.985629996193377e-06, "loss": 4.1069, "step": 12670 }, { "epoch": 0.6033498287019414, "grad_norm": 4.5, "learning_rate": 6.983250856490293e-06, "loss": 4.0035, "step": 12680 }, { "epoch": 0.603825656642558, "grad_norm": 4.59375, "learning_rate": 6.980871716787209e-06, "loss": 4.1962, "step": 12690 }, { "epoch": 0.6043014845831747, "grad_norm": 4.5625, "learning_rate": 6.978492577084127e-06, "loss": 3.9665, "step": 12700 }, { "epoch": 0.6047773125237914, "grad_norm": 4.65625, "learning_rate": 6.976113437381043e-06, "loss": 3.9489, "step": 12710 }, { "epoch": 0.605253140464408, "grad_norm": 4.59375, "learning_rate": 6.97373429767796e-06, "loss": 4.1808, "step": 12720 }, { "epoch": 0.6057289684050248, "grad_norm": 4.625, "learning_rate": 6.971355157974877e-06, "loss": 3.9952, "step": 12730 }, { "epoch": 0.6062047963456414, "grad_norm": 4.65625, "learning_rate": 6.968976018271794e-06, "loss": 4.0644, "step": 12740 }, { "epoch": 0.6066806242862581, "grad_norm": 4.6875, "learning_rate": 6.96659687856871e-06, "loss": 4.0414, "step": 12750 }, { "epoch": 0.6071564522268748, "grad_norm": 4.625, "learning_rate": 6.964217738865626e-06, "loss": 4.0879, "step": 12760 }, { "epoch": 0.6076322801674915, "grad_norm": 4.375, "learning_rate": 6.961838599162544e-06, "loss": 4.0486, "step": 12770 }, { "epoch": 0.6081081081081081, "grad_norm": 4.84375, "learning_rate": 6.95945945945946e-06, "loss": 4.1119, "step": 12780 }, { "epoch": 0.6085839360487247, "grad_norm": 4.90625, "learning_rate": 6.957080319756376e-06, "loss": 3.9836, "step": 12790 }, { "epoch": 0.6090597639893415, "grad_norm": 4.875, "learning_rate": 6.954701180053294e-06, "loss": 4.0298, "step": 12800 }, { "epoch": 0.6095355919299581, "grad_norm": 4.9375, "learning_rate": 6.95232204035021e-06, "loss": 3.9453, "step": 12810 }, { "epoch": 0.6100114198705748, "grad_norm": 4.65625, "learning_rate": 6.949942900647127e-06, "loss": 4.2291, "step": 12820 }, { "epoch": 0.6104872478111915, "grad_norm": 4.65625, "learning_rate": 6.947563760944043e-06, "loss": 4.0047, "step": 12830 }, { "epoch": 0.6109630757518082, "grad_norm": 4.75, "learning_rate": 6.9451846212409606e-06, "loss": 3.9334, "step": 12840 }, { "epoch": 0.6114389036924248, "grad_norm": 5.09375, "learning_rate": 6.942805481537877e-06, "loss": 4.0709, "step": 12850 }, { "epoch": 0.6119147316330414, "grad_norm": 4.75, "learning_rate": 6.940426341834793e-06, "loss": 4.0804, "step": 12860 }, { "epoch": 0.6123905595736582, "grad_norm": 4.6875, "learning_rate": 6.9380472021317105e-06, "loss": 4.0134, "step": 12870 }, { "epoch": 0.6128663875142748, "grad_norm": 5.125, "learning_rate": 6.9356680624286265e-06, "loss": 4.0892, "step": 12880 }, { "epoch": 0.6133422154548915, "grad_norm": 4.96875, "learning_rate": 6.933288922725543e-06, "loss": 4.0737, "step": 12890 }, { "epoch": 0.6138180433955082, "grad_norm": 4.6875, "learning_rate": 6.9309097830224595e-06, "loss": 4.0017, "step": 12900 }, { "epoch": 0.6142938713361249, "grad_norm": 4.71875, "learning_rate": 6.9285306433193764e-06, "loss": 4.0167, "step": 12910 }, { "epoch": 0.6147696992767415, "grad_norm": 4.96875, "learning_rate": 6.926151503616293e-06, "loss": 4.2263, "step": 12920 }, { "epoch": 0.6152455272173583, "grad_norm": 4.8125, "learning_rate": 6.923772363913209e-06, "loss": 4.092, "step": 12930 }, { "epoch": 0.6157213551579749, "grad_norm": 4.71875, "learning_rate": 6.921393224210126e-06, "loss": 4.1431, "step": 12940 }, { "epoch": 0.6161971830985915, "grad_norm": 4.46875, "learning_rate": 6.919014084507043e-06, "loss": 4.1061, "step": 12950 }, { "epoch": 0.6166730110392082, "grad_norm": 4.75, "learning_rate": 6.916634944803959e-06, "loss": 4.1959, "step": 12960 }, { "epoch": 0.6171488389798249, "grad_norm": 4.90625, "learning_rate": 6.914255805100875e-06, "loss": 4.1139, "step": 12970 }, { "epoch": 0.6176246669204416, "grad_norm": 4.34375, "learning_rate": 6.911876665397793e-06, "loss": 4.0038, "step": 12980 }, { "epoch": 0.6181004948610582, "grad_norm": 4.90625, "learning_rate": 6.909497525694709e-06, "loss": 3.785, "step": 12990 }, { "epoch": 0.618576322801675, "grad_norm": 4.59375, "learning_rate": 6.907118385991626e-06, "loss": 4.0373, "step": 13000 }, { "epoch": 0.6190521507422916, "grad_norm": 4.84375, "learning_rate": 6.904739246288542e-06, "loss": 4.1462, "step": 13010 }, { "epoch": 0.6195279786829082, "grad_norm": 4.75, "learning_rate": 6.902360106585459e-06, "loss": 4.0576, "step": 13020 }, { "epoch": 0.620003806623525, "grad_norm": 4.78125, "learning_rate": 6.899980966882376e-06, "loss": 4.0459, "step": 13030 }, { "epoch": 0.6204796345641416, "grad_norm": 4.8125, "learning_rate": 6.897601827179292e-06, "loss": 4.0294, "step": 13040 }, { "epoch": 0.6209554625047583, "grad_norm": 4.8125, "learning_rate": 6.89522268747621e-06, "loss": 4.0457, "step": 13050 }, { "epoch": 0.6214312904453749, "grad_norm": 4.75, "learning_rate": 6.892843547773126e-06, "loss": 4.1453, "step": 13060 }, { "epoch": 0.6219071183859917, "grad_norm": 4.59375, "learning_rate": 6.890464408070042e-06, "loss": 4.2202, "step": 13070 }, { "epoch": 0.6223829463266083, "grad_norm": 4.65625, "learning_rate": 6.888085268366959e-06, "loss": 4.1082, "step": 13080 }, { "epoch": 0.622858774267225, "grad_norm": 5.0625, "learning_rate": 6.885706128663876e-06, "loss": 4.1011, "step": 13090 }, { "epoch": 0.6233346022078416, "grad_norm": 4.6875, "learning_rate": 6.883326988960793e-06, "loss": 4.0696, "step": 13100 }, { "epoch": 0.6238104301484583, "grad_norm": 4.84375, "learning_rate": 6.880947849257709e-06, "loss": 4.1373, "step": 13110 }, { "epoch": 0.624286258089075, "grad_norm": 4.5625, "learning_rate": 6.878568709554626e-06, "loss": 4.1126, "step": 13120 }, { "epoch": 0.6247620860296916, "grad_norm": 4.40625, "learning_rate": 6.876189569851543e-06, "loss": 4.1606, "step": 13130 }, { "epoch": 0.6252379139703084, "grad_norm": 4.625, "learning_rate": 6.873810430148459e-06, "loss": 3.9757, "step": 13140 }, { "epoch": 0.625713741910925, "grad_norm": 5.0, "learning_rate": 6.871431290445375e-06, "loss": 4.0514, "step": 13150 }, { "epoch": 0.6261895698515417, "grad_norm": 4.625, "learning_rate": 6.8690521507422925e-06, "loss": 3.9333, "step": 13160 }, { "epoch": 0.6266653977921584, "grad_norm": 4.53125, "learning_rate": 6.866673011039209e-06, "loss": 4.035, "step": 13170 }, { "epoch": 0.627141225732775, "grad_norm": 4.8125, "learning_rate": 6.8642938713361255e-06, "loss": 3.9146, "step": 13180 }, { "epoch": 0.6276170536733917, "grad_norm": 4.9375, "learning_rate": 6.8619147316330424e-06, "loss": 3.9646, "step": 13190 }, { "epoch": 0.6280928816140083, "grad_norm": 5.90625, "learning_rate": 6.8595355919299585e-06, "loss": 3.9519, "step": 13200 }, { "epoch": 0.6285687095546251, "grad_norm": 5.03125, "learning_rate": 6.857156452226875e-06, "loss": 4.1435, "step": 13210 }, { "epoch": 0.6290445374952417, "grad_norm": 4.75, "learning_rate": 6.8547773125237915e-06, "loss": 4.1188, "step": 13220 }, { "epoch": 0.6295203654358584, "grad_norm": 5.125, "learning_rate": 6.852398172820709e-06, "loss": 4.1323, "step": 13230 }, { "epoch": 0.629996193376475, "grad_norm": 5.09375, "learning_rate": 6.850019033117625e-06, "loss": 4.1131, "step": 13240 }, { "epoch": 0.6304720213170918, "grad_norm": 4.53125, "learning_rate": 6.847639893414541e-06, "loss": 3.9855, "step": 13250 }, { "epoch": 0.6309478492577084, "grad_norm": 5.0, "learning_rate": 6.845260753711459e-06, "loss": 4.1427, "step": 13260 }, { "epoch": 0.631423677198325, "grad_norm": 4.625, "learning_rate": 6.842881614008375e-06, "loss": 4.0488, "step": 13270 }, { "epoch": 0.6318995051389418, "grad_norm": 5.03125, "learning_rate": 6.840502474305291e-06, "loss": 4.1417, "step": 13280 }, { "epoch": 0.6323753330795584, "grad_norm": 4.90625, "learning_rate": 6.838123334602208e-06, "loss": 3.9953, "step": 13290 }, { "epoch": 0.6328511610201751, "grad_norm": 4.5, "learning_rate": 6.835744194899125e-06, "loss": 4.0306, "step": 13300 }, { "epoch": 0.6333269889607918, "grad_norm": 5.03125, "learning_rate": 6.833365055196042e-06, "loss": 3.9734, "step": 13310 }, { "epoch": 0.6338028169014085, "grad_norm": 5.0, "learning_rate": 6.830985915492958e-06, "loss": 4.0691, "step": 13320 }, { "epoch": 0.6342786448420251, "grad_norm": 4.78125, "learning_rate": 6.828606775789874e-06, "loss": 4.0131, "step": 13330 }, { "epoch": 0.6347544727826417, "grad_norm": 4.90625, "learning_rate": 6.826227636086792e-06, "loss": 4.1109, "step": 13340 }, { "epoch": 0.6352303007232585, "grad_norm": 4.875, "learning_rate": 6.823848496383708e-06, "loss": 4.0662, "step": 13350 }, { "epoch": 0.6357061286638751, "grad_norm": 4.75, "learning_rate": 6.821469356680625e-06, "loss": 4.0261, "step": 13360 }, { "epoch": 0.6361819566044918, "grad_norm": 4.5, "learning_rate": 6.819090216977542e-06, "loss": 4.1211, "step": 13370 }, { "epoch": 0.6366577845451085, "grad_norm": 4.875, "learning_rate": 6.816711077274458e-06, "loss": 4.0652, "step": 13380 }, { "epoch": 0.6371336124857252, "grad_norm": 4.65625, "learning_rate": 6.814331937571375e-06, "loss": 4.0719, "step": 13390 }, { "epoch": 0.6376094404263418, "grad_norm": 4.5625, "learning_rate": 6.811952797868291e-06, "loss": 3.9332, "step": 13400 }, { "epoch": 0.6380852683669586, "grad_norm": 4.875, "learning_rate": 6.809573658165209e-06, "loss": 4.1557, "step": 13410 }, { "epoch": 0.6385610963075752, "grad_norm": 4.46875, "learning_rate": 6.807194518462125e-06, "loss": 4.0631, "step": 13420 }, { "epoch": 0.6390369242481918, "grad_norm": 4.59375, "learning_rate": 6.804815378759041e-06, "loss": 4.065, "step": 13430 }, { "epoch": 0.6395127521888085, "grad_norm": 5.28125, "learning_rate": 6.8024362390559585e-06, "loss": 4.0514, "step": 13440 }, { "epoch": 0.6399885801294252, "grad_norm": 5.0625, "learning_rate": 6.800057099352875e-06, "loss": 3.9361, "step": 13450 }, { "epoch": 0.6404644080700419, "grad_norm": 4.625, "learning_rate": 6.797677959649791e-06, "loss": 4.0358, "step": 13460 }, { "epoch": 0.6409402360106585, "grad_norm": 4.84375, "learning_rate": 6.795298819946708e-06, "loss": 4.1424, "step": 13470 }, { "epoch": 0.6414160639512753, "grad_norm": 5.125, "learning_rate": 6.7929196802436245e-06, "loss": 4.0052, "step": 13480 }, { "epoch": 0.6418918918918919, "grad_norm": 4.8125, "learning_rate": 6.790540540540541e-06, "loss": 4.0452, "step": 13490 }, { "epoch": 0.6423677198325085, "grad_norm": 5.09375, "learning_rate": 6.7881614008374575e-06, "loss": 4.0502, "step": 13500 }, { "epoch": 0.6428435477731252, "grad_norm": 5.34375, "learning_rate": 6.785782261134375e-06, "loss": 3.9838, "step": 13510 }, { "epoch": 0.6433193757137419, "grad_norm": 4.78125, "learning_rate": 6.783403121431291e-06, "loss": 4.11, "step": 13520 }, { "epoch": 0.6437952036543586, "grad_norm": 4.40625, "learning_rate": 6.781023981728207e-06, "loss": 4.1565, "step": 13530 }, { "epoch": 0.6442710315949752, "grad_norm": 4.71875, "learning_rate": 6.7786448420251234e-06, "loss": 4.0054, "step": 13540 }, { "epoch": 0.644746859535592, "grad_norm": 4.375, "learning_rate": 6.776265702322041e-06, "loss": 3.8749, "step": 13550 }, { "epoch": 0.6452226874762086, "grad_norm": 4.8125, "learning_rate": 6.773886562618957e-06, "loss": 4.1009, "step": 13560 }, { "epoch": 0.6456985154168253, "grad_norm": 4.78125, "learning_rate": 6.771507422915874e-06, "loss": 3.8633, "step": 13570 }, { "epoch": 0.646174343357442, "grad_norm": 4.875, "learning_rate": 6.769128283212791e-06, "loss": 4.1009, "step": 13580 }, { "epoch": 0.6466501712980586, "grad_norm": 4.9375, "learning_rate": 6.766749143509708e-06, "loss": 4.1426, "step": 13590 }, { "epoch": 0.6471259992386753, "grad_norm": 5.0, "learning_rate": 6.764370003806624e-06, "loss": 4.0686, "step": 13600 }, { "epoch": 0.6476018271792919, "grad_norm": 4.96875, "learning_rate": 6.76199086410354e-06, "loss": 4.1247, "step": 13610 }, { "epoch": 0.6480776551199087, "grad_norm": 4.5625, "learning_rate": 6.759611724400458e-06, "loss": 4.0942, "step": 13620 }, { "epoch": 0.6485534830605253, "grad_norm": 4.75, "learning_rate": 6.757232584697374e-06, "loss": 4.0901, "step": 13630 }, { "epoch": 0.649029311001142, "grad_norm": 4.65625, "learning_rate": 6.75485344499429e-06, "loss": 4.0247, "step": 13640 }, { "epoch": 0.6495051389417587, "grad_norm": 4.84375, "learning_rate": 6.752474305291207e-06, "loss": 3.9325, "step": 13650 }, { "epoch": 0.6499809668823754, "grad_norm": 5.0625, "learning_rate": 6.750095165588124e-06, "loss": 4.2106, "step": 13660 }, { "epoch": 0.650456794822992, "grad_norm": 4.46875, "learning_rate": 6.747716025885041e-06, "loss": 4.0667, "step": 13670 }, { "epoch": 0.6509326227636086, "grad_norm": 4.53125, "learning_rate": 6.745336886181957e-06, "loss": 4.0466, "step": 13680 }, { "epoch": 0.6514084507042254, "grad_norm": 4.40625, "learning_rate": 6.742957746478875e-06, "loss": 4.0631, "step": 13690 }, { "epoch": 0.651884278644842, "grad_norm": 4.59375, "learning_rate": 6.740578606775791e-06, "loss": 4.074, "step": 13700 }, { "epoch": 0.6523601065854587, "grad_norm": 4.6875, "learning_rate": 6.738199467072707e-06, "loss": 4.0321, "step": 13710 }, { "epoch": 0.6528359345260754, "grad_norm": 4.53125, "learning_rate": 6.735820327369623e-06, "loss": 4.0885, "step": 13720 }, { "epoch": 0.6533117624666921, "grad_norm": 4.3125, "learning_rate": 6.733441187666541e-06, "loss": 4.093, "step": 13730 }, { "epoch": 0.6537875904073087, "grad_norm": 4.59375, "learning_rate": 6.731062047963457e-06, "loss": 3.9989, "step": 13740 }, { "epoch": 0.6542634183479253, "grad_norm": 4.84375, "learning_rate": 6.728682908260374e-06, "loss": 4.1149, "step": 13750 }, { "epoch": 0.6547392462885421, "grad_norm": 4.84375, "learning_rate": 6.7263037685572905e-06, "loss": 3.9898, "step": 13760 }, { "epoch": 0.6552150742291587, "grad_norm": 4.5625, "learning_rate": 6.723924628854207e-06, "loss": 4.1202, "step": 13770 }, { "epoch": 0.6556909021697754, "grad_norm": 4.90625, "learning_rate": 6.7215454891511235e-06, "loss": 4.1522, "step": 13780 }, { "epoch": 0.6561667301103921, "grad_norm": 4.6875, "learning_rate": 6.7191663494480395e-06, "loss": 4.0313, "step": 13790 }, { "epoch": 0.6566425580510088, "grad_norm": 4.75, "learning_rate": 6.716787209744957e-06, "loss": 4.027, "step": 13800 }, { "epoch": 0.6571183859916254, "grad_norm": 5.28125, "learning_rate": 6.714408070041873e-06, "loss": 4.0202, "step": 13810 }, { "epoch": 0.6575942139322422, "grad_norm": 4.78125, "learning_rate": 6.7120289303387894e-06, "loss": 3.9659, "step": 13820 }, { "epoch": 0.6580700418728588, "grad_norm": 4.625, "learning_rate": 6.709649790635707e-06, "loss": 4.0037, "step": 13830 }, { "epoch": 0.6585458698134754, "grad_norm": 5.125, "learning_rate": 6.707270650932623e-06, "loss": 4.1056, "step": 13840 }, { "epoch": 0.6590216977540921, "grad_norm": 4.6875, "learning_rate": 6.70489151122954e-06, "loss": 3.975, "step": 13850 }, { "epoch": 0.6594975256947088, "grad_norm": 4.65625, "learning_rate": 6.702512371526456e-06, "loss": 4.0643, "step": 13860 }, { "epoch": 0.6599733536353255, "grad_norm": 4.90625, "learning_rate": 6.700133231823374e-06, "loss": 3.9475, "step": 13870 }, { "epoch": 0.6604491815759421, "grad_norm": 4.9375, "learning_rate": 6.69775409212029e-06, "loss": 4.17, "step": 13880 }, { "epoch": 0.6609250095165589, "grad_norm": 4.5, "learning_rate": 6.695374952417206e-06, "loss": 3.9517, "step": 13890 }, { "epoch": 0.6614008374571755, "grad_norm": 4.59375, "learning_rate": 6.692995812714122e-06, "loss": 4.0498, "step": 13900 }, { "epoch": 0.6618766653977921, "grad_norm": 4.875, "learning_rate": 6.69061667301104e-06, "loss": 3.9288, "step": 13910 }, { "epoch": 0.6623524933384088, "grad_norm": 5.53125, "learning_rate": 6.688237533307956e-06, "loss": 3.9363, "step": 13920 }, { "epoch": 0.6628283212790255, "grad_norm": 4.4375, "learning_rate": 6.685858393604873e-06, "loss": 4.003, "step": 13930 }, { "epoch": 0.6633041492196422, "grad_norm": 4.375, "learning_rate": 6.68347925390179e-06, "loss": 4.0651, "step": 13940 }, { "epoch": 0.6637799771602588, "grad_norm": 4.8125, "learning_rate": 6.681100114198707e-06, "loss": 3.9536, "step": 13950 }, { "epoch": 0.6642558051008756, "grad_norm": 4.96875, "learning_rate": 6.678720974495623e-06, "loss": 4.0803, "step": 13960 }, { "epoch": 0.6647316330414922, "grad_norm": 4.90625, "learning_rate": 6.676341834792539e-06, "loss": 3.9229, "step": 13970 }, { "epoch": 0.6652074609821089, "grad_norm": 5.21875, "learning_rate": 6.673962695089457e-06, "loss": 3.8941, "step": 13980 }, { "epoch": 0.6656832889227255, "grad_norm": 4.96875, "learning_rate": 6.671583555386373e-06, "loss": 3.9671, "step": 13990 }, { "epoch": 0.6661591168633422, "grad_norm": 4.6875, "learning_rate": 6.669204415683289e-06, "loss": 4.0469, "step": 14000 }, { "epoch": 0.6666349448039589, "grad_norm": 5.15625, "learning_rate": 6.666825275980207e-06, "loss": 4.0977, "step": 14010 }, { "epoch": 0.6671107727445755, "grad_norm": 4.65625, "learning_rate": 6.664446136277123e-06, "loss": 4.1163, "step": 14020 }, { "epoch": 0.6675866006851923, "grad_norm": 4.625, "learning_rate": 6.6620669965740396e-06, "loss": 4.1442, "step": 14030 }, { "epoch": 0.6680624286258089, "grad_norm": 4.59375, "learning_rate": 6.659687856870956e-06, "loss": 3.9885, "step": 14040 }, { "epoch": 0.6685382565664256, "grad_norm": 4.84375, "learning_rate": 6.6573087171678726e-06, "loss": 4.0725, "step": 14050 }, { "epoch": 0.6690140845070423, "grad_norm": 4.71875, "learning_rate": 6.6549295774647895e-06, "loss": 4.0457, "step": 14060 }, { "epoch": 0.6694899124476589, "grad_norm": 7.46875, "learning_rate": 6.6525504377617055e-06, "loss": 3.8997, "step": 14070 }, { "epoch": 0.6699657403882756, "grad_norm": 5.0625, "learning_rate": 6.650171298058623e-06, "loss": 3.9945, "step": 14080 }, { "epoch": 0.6704415683288922, "grad_norm": 4.6875, "learning_rate": 6.647792158355539e-06, "loss": 3.9914, "step": 14090 }, { "epoch": 0.670917396269509, "grad_norm": 5.09375, "learning_rate": 6.6454130186524554e-06, "loss": 4.0261, "step": 14100 }, { "epoch": 0.6713932242101256, "grad_norm": 4.75, "learning_rate": 6.643033878949372e-06, "loss": 4.2516, "step": 14110 }, { "epoch": 0.6718690521507423, "grad_norm": 5.03125, "learning_rate": 6.640654739246289e-06, "loss": 3.9177, "step": 14120 }, { "epoch": 0.672344880091359, "grad_norm": 5.09375, "learning_rate": 6.638275599543206e-06, "loss": 4.0422, "step": 14130 }, { "epoch": 0.6728207080319757, "grad_norm": 4.78125, "learning_rate": 6.635896459840122e-06, "loss": 4.0072, "step": 14140 }, { "epoch": 0.6732965359725923, "grad_norm": 4.71875, "learning_rate": 6.633517320137039e-06, "loss": 4.0872, "step": 14150 }, { "epoch": 0.6737723639132089, "grad_norm": 4.8125, "learning_rate": 6.631138180433956e-06, "loss": 3.9471, "step": 14160 }, { "epoch": 0.6742481918538257, "grad_norm": 4.4375, "learning_rate": 6.628759040730872e-06, "loss": 4.1719, "step": 14170 }, { "epoch": 0.6747240197944423, "grad_norm": 4.65625, "learning_rate": 6.626379901027788e-06, "loss": 4.06, "step": 14180 }, { "epoch": 0.675199847735059, "grad_norm": 4.59375, "learning_rate": 6.624000761324706e-06, "loss": 3.9833, "step": 14190 }, { "epoch": 0.6756756756756757, "grad_norm": 4.9375, "learning_rate": 6.621621621621622e-06, "loss": 3.9666, "step": 14200 }, { "epoch": 0.6761515036162924, "grad_norm": 4.84375, "learning_rate": 6.619242481918539e-06, "loss": 4.0352, "step": 14210 }, { "epoch": 0.676627331556909, "grad_norm": 4.65625, "learning_rate": 6.616863342215455e-06, "loss": 3.8445, "step": 14220 }, { "epoch": 0.6771031594975256, "grad_norm": 4.71875, "learning_rate": 6.614484202512372e-06, "loss": 3.9476, "step": 14230 }, { "epoch": 0.6775789874381424, "grad_norm": 4.90625, "learning_rate": 6.612105062809289e-06, "loss": 4.0472, "step": 14240 }, { "epoch": 0.678054815378759, "grad_norm": 4.5, "learning_rate": 6.609725923106205e-06, "loss": 4.1557, "step": 14250 }, { "epoch": 0.6785306433193757, "grad_norm": 5.15625, "learning_rate": 6.607346783403123e-06, "loss": 3.8937, "step": 14260 }, { "epoch": 0.6790064712599924, "grad_norm": 4.6875, "learning_rate": 6.604967643700039e-06, "loss": 4.1217, "step": 14270 }, { "epoch": 0.6794822992006091, "grad_norm": 4.34375, "learning_rate": 6.602588503996955e-06, "loss": 3.9507, "step": 14280 }, { "epoch": 0.6799581271412257, "grad_norm": 4.78125, "learning_rate": 6.600209364293872e-06, "loss": 3.9439, "step": 14290 }, { "epoch": 0.6804339550818425, "grad_norm": 4.5625, "learning_rate": 6.597830224590789e-06, "loss": 4.0533, "step": 14300 }, { "epoch": 0.6809097830224591, "grad_norm": 5.28125, "learning_rate": 6.595451084887705e-06, "loss": 4.0272, "step": 14310 }, { "epoch": 0.6813856109630757, "grad_norm": 5.03125, "learning_rate": 6.593071945184622e-06, "loss": 4.1535, "step": 14320 }, { "epoch": 0.6818614389036924, "grad_norm": 4.6875, "learning_rate": 6.5906928054815386e-06, "loss": 4.2072, "step": 14330 }, { "epoch": 0.6823372668443091, "grad_norm": 5.125, "learning_rate": 6.5883136657784555e-06, "loss": 3.9462, "step": 14340 }, { "epoch": 0.6828130947849258, "grad_norm": 4.625, "learning_rate": 6.5859345260753715e-06, "loss": 4.0719, "step": 14350 }, { "epoch": 0.6832889227255424, "grad_norm": 4.96875, "learning_rate": 6.583555386372288e-06, "loss": 4.087, "step": 14360 }, { "epoch": 0.6837647506661592, "grad_norm": 5.03125, "learning_rate": 6.581176246669205e-06, "loss": 4.0513, "step": 14370 }, { "epoch": 0.6842405786067758, "grad_norm": 5.15625, "learning_rate": 6.5787971069661214e-06, "loss": 3.9592, "step": 14380 }, { "epoch": 0.6847164065473924, "grad_norm": 4.46875, "learning_rate": 6.576417967263038e-06, "loss": 4.03, "step": 14390 }, { "epoch": 0.6851922344880091, "grad_norm": 4.4375, "learning_rate": 6.574038827559955e-06, "loss": 4.0327, "step": 14400 }, { "epoch": 0.6856680624286258, "grad_norm": 4.46875, "learning_rate": 6.571659687856871e-06, "loss": 4.2721, "step": 14410 }, { "epoch": 0.6861438903692425, "grad_norm": 4.75, "learning_rate": 6.569280548153788e-06, "loss": 4.0487, "step": 14420 }, { "epoch": 0.6866197183098591, "grad_norm": 4.96875, "learning_rate": 6.566901408450704e-06, "loss": 4.1846, "step": 14430 }, { "epoch": 0.6870955462504759, "grad_norm": 4.78125, "learning_rate": 6.564522268747622e-06, "loss": 4.1364, "step": 14440 }, { "epoch": 0.6875713741910925, "grad_norm": 4.65625, "learning_rate": 6.562143129044538e-06, "loss": 3.9347, "step": 14450 }, { "epoch": 0.6880472021317092, "grad_norm": 4.625, "learning_rate": 6.559763989341454e-06, "loss": 3.8709, "step": 14460 }, { "epoch": 0.6885230300723258, "grad_norm": 4.875, "learning_rate": 6.557384849638372e-06, "loss": 3.9563, "step": 14470 }, { "epoch": 0.6889988580129425, "grad_norm": 4.4375, "learning_rate": 6.555005709935288e-06, "loss": 4.1614, "step": 14480 }, { "epoch": 0.6894746859535592, "grad_norm": 5.0625, "learning_rate": 6.552626570232204e-06, "loss": 3.9556, "step": 14490 }, { "epoch": 0.6899505138941758, "grad_norm": 5.21875, "learning_rate": 6.550247430529121e-06, "loss": 3.9672, "step": 14500 }, { "epoch": 0.6904263418347926, "grad_norm": 4.96875, "learning_rate": 6.547868290826038e-06, "loss": 4.0394, "step": 14510 }, { "epoch": 0.6909021697754092, "grad_norm": 4.5, "learning_rate": 6.545489151122955e-06, "loss": 4.0281, "step": 14520 }, { "epoch": 0.6913779977160259, "grad_norm": 4.46875, "learning_rate": 6.543110011419871e-06, "loss": 4.0836, "step": 14530 }, { "epoch": 0.6918538256566426, "grad_norm": 4.34375, "learning_rate": 6.540730871716787e-06, "loss": 4.0846, "step": 14540 }, { "epoch": 0.6923296535972592, "grad_norm": 4.6875, "learning_rate": 6.538351732013705e-06, "loss": 4.0175, "step": 14550 }, { "epoch": 0.6928054815378759, "grad_norm": 5.15625, "learning_rate": 6.535972592310621e-06, "loss": 4.0309, "step": 14560 }, { "epoch": 0.6932813094784925, "grad_norm": 5.5, "learning_rate": 6.533593452607537e-06, "loss": 3.9668, "step": 14570 }, { "epoch": 0.6937571374191093, "grad_norm": 4.375, "learning_rate": 6.531214312904455e-06, "loss": 4.0784, "step": 14580 }, { "epoch": 0.6942329653597259, "grad_norm": 4.75, "learning_rate": 6.528835173201371e-06, "loss": 3.9882, "step": 14590 }, { "epoch": 0.6947087933003426, "grad_norm": 4.6875, "learning_rate": 6.526456033498288e-06, "loss": 4.0732, "step": 14600 }, { "epoch": 0.6951846212409593, "grad_norm": 4.84375, "learning_rate": 6.524076893795204e-06, "loss": 4.0426, "step": 14610 }, { "epoch": 0.695660449181576, "grad_norm": 5.0, "learning_rate": 6.5216977540921215e-06, "loss": 4.153, "step": 14620 }, { "epoch": 0.6961362771221926, "grad_norm": 4.75, "learning_rate": 6.5193186143890375e-06, "loss": 3.8707, "step": 14630 }, { "epoch": 0.6966121050628092, "grad_norm": 4.59375, "learning_rate": 6.516939474685954e-06, "loss": 4.1609, "step": 14640 }, { "epoch": 0.697087933003426, "grad_norm": 4.71875, "learning_rate": 6.514560334982871e-06, "loss": 4.1585, "step": 14650 }, { "epoch": 0.6975637609440426, "grad_norm": 4.84375, "learning_rate": 6.5121811952797874e-06, "loss": 4.1681, "step": 14660 }, { "epoch": 0.6980395888846593, "grad_norm": 4.4375, "learning_rate": 6.5098020555767035e-06, "loss": 3.9111, "step": 14670 }, { "epoch": 0.698515416825276, "grad_norm": 5.28125, "learning_rate": 6.50742291587362e-06, "loss": 3.998, "step": 14680 }, { "epoch": 0.6989912447658927, "grad_norm": 4.53125, "learning_rate": 6.505043776170537e-06, "loss": 4.0188, "step": 14690 }, { "epoch": 0.6994670727065093, "grad_norm": 4.875, "learning_rate": 6.502664636467454e-06, "loss": 4.0839, "step": 14700 }, { "epoch": 0.699942900647126, "grad_norm": 5.0, "learning_rate": 6.50028549676437e-06, "loss": 4.1666, "step": 14710 }, { "epoch": 0.7004187285877427, "grad_norm": 4.71875, "learning_rate": 6.497906357061288e-06, "loss": 3.9175, "step": 14720 }, { "epoch": 0.7008945565283593, "grad_norm": 4.75, "learning_rate": 6.495527217358204e-06, "loss": 3.9781, "step": 14730 }, { "epoch": 0.701370384468976, "grad_norm": 4.5625, "learning_rate": 6.49314807765512e-06, "loss": 4.2034, "step": 14740 }, { "epoch": 0.7018462124095927, "grad_norm": 4.71875, "learning_rate": 6.490768937952036e-06, "loss": 4.0188, "step": 14750 }, { "epoch": 0.7023220403502094, "grad_norm": 4.78125, "learning_rate": 6.488389798248954e-06, "loss": 3.9283, "step": 14760 }, { "epoch": 0.702797868290826, "grad_norm": 5.09375, "learning_rate": 6.48601065854587e-06, "loss": 4.0739, "step": 14770 }, { "epoch": 0.7032736962314428, "grad_norm": 4.65625, "learning_rate": 6.483631518842787e-06, "loss": 3.8935, "step": 14780 }, { "epoch": 0.7037495241720594, "grad_norm": 5.125, "learning_rate": 6.481252379139703e-06, "loss": 4.1385, "step": 14790 }, { "epoch": 0.704225352112676, "grad_norm": 4.59375, "learning_rate": 6.478873239436621e-06, "loss": 4.0633, "step": 14800 }, { "epoch": 0.7047011800532927, "grad_norm": 4.9375, "learning_rate": 6.476494099733537e-06, "loss": 4.0417, "step": 14810 }, { "epoch": 0.7051770079939094, "grad_norm": 4.625, "learning_rate": 6.474114960030453e-06, "loss": 4.0117, "step": 14820 }, { "epoch": 0.7056528359345261, "grad_norm": 5.03125, "learning_rate": 6.471735820327371e-06, "loss": 4.1043, "step": 14830 }, { "epoch": 0.7061286638751427, "grad_norm": 4.8125, "learning_rate": 6.469356680624287e-06, "loss": 3.9967, "step": 14840 }, { "epoch": 0.7066044918157595, "grad_norm": 4.1875, "learning_rate": 6.466977540921203e-06, "loss": 4.0511, "step": 14850 }, { "epoch": 0.7070803197563761, "grad_norm": 4.53125, "learning_rate": 6.46459840121812e-06, "loss": 4.1069, "step": 14860 }, { "epoch": 0.7075561476969928, "grad_norm": 4.71875, "learning_rate": 6.462219261515037e-06, "loss": 3.9059, "step": 14870 }, { "epoch": 0.7080319756376094, "grad_norm": 4.625, "learning_rate": 6.459840121811954e-06, "loss": 4.0731, "step": 14880 }, { "epoch": 0.7085078035782261, "grad_norm": 5.125, "learning_rate": 6.45746098210887e-06, "loss": 4.0779, "step": 14890 }, { "epoch": 0.7089836315188428, "grad_norm": 4.90625, "learning_rate": 6.4550818424057875e-06, "loss": 4.0168, "step": 14900 }, { "epoch": 0.7094594594594594, "grad_norm": 4.59375, "learning_rate": 6.4527027027027035e-06, "loss": 3.9365, "step": 14910 }, { "epoch": 0.7099352874000762, "grad_norm": 4.8125, "learning_rate": 6.45032356299962e-06, "loss": 4.1625, "step": 14920 }, { "epoch": 0.7104111153406928, "grad_norm": 4.875, "learning_rate": 6.447944423296536e-06, "loss": 3.978, "step": 14930 }, { "epoch": 0.7108869432813095, "grad_norm": 4.375, "learning_rate": 6.4455652835934534e-06, "loss": 3.8699, "step": 14940 }, { "epoch": 0.7113627712219261, "grad_norm": 4.84375, "learning_rate": 6.4431861438903695e-06, "loss": 4.0182, "step": 14950 }, { "epoch": 0.7118385991625428, "grad_norm": 4.625, "learning_rate": 6.440807004187286e-06, "loss": 3.9587, "step": 14960 }, { "epoch": 0.7123144271031595, "grad_norm": 4.875, "learning_rate": 6.438427864484203e-06, "loss": 4.1878, "step": 14970 }, { "epoch": 0.7127902550437761, "grad_norm": 4.875, "learning_rate": 6.43604872478112e-06, "loss": 3.9124, "step": 14980 }, { "epoch": 0.7132660829843929, "grad_norm": 4.9375, "learning_rate": 6.433669585078036e-06, "loss": 4.0487, "step": 14990 }, { "epoch": 0.7137419109250095, "grad_norm": 4.84375, "learning_rate": 6.431290445374952e-06, "loss": 4.0771, "step": 15000 }, { "epoch": 0.7142177388656262, "grad_norm": 4.53125, "learning_rate": 6.42891130567187e-06, "loss": 3.974, "step": 15010 }, { "epoch": 0.7146935668062429, "grad_norm": 4.5625, "learning_rate": 6.426532165968786e-06, "loss": 4.0646, "step": 15020 }, { "epoch": 0.7151693947468596, "grad_norm": 4.8125, "learning_rate": 6.424153026265702e-06, "loss": 3.9566, "step": 15030 }, { "epoch": 0.7156452226874762, "grad_norm": 4.71875, "learning_rate": 6.42177388656262e-06, "loss": 4.0277, "step": 15040 }, { "epoch": 0.7161210506280928, "grad_norm": 4.65625, "learning_rate": 6.419394746859536e-06, "loss": 3.991, "step": 15050 }, { "epoch": 0.7165968785687096, "grad_norm": 4.8125, "learning_rate": 6.417015607156453e-06, "loss": 3.9634, "step": 15060 }, { "epoch": 0.7170727065093262, "grad_norm": 4.71875, "learning_rate": 6.414636467453369e-06, "loss": 4.1163, "step": 15070 }, { "epoch": 0.7175485344499429, "grad_norm": 4.8125, "learning_rate": 6.412257327750286e-06, "loss": 4.0721, "step": 15080 }, { "epoch": 0.7180243623905596, "grad_norm": 4.6875, "learning_rate": 6.409878188047203e-06, "loss": 3.9575, "step": 15090 }, { "epoch": 0.7185001903311763, "grad_norm": 4.6875, "learning_rate": 6.407499048344119e-06, "loss": 4.0285, "step": 15100 }, { "epoch": 0.7189760182717929, "grad_norm": 4.625, "learning_rate": 6.405119908641035e-06, "loss": 3.9912, "step": 15110 }, { "epoch": 0.7194518462124095, "grad_norm": 4.65625, "learning_rate": 6.402740768937953e-06, "loss": 3.8879, "step": 15120 }, { "epoch": 0.7199276741530263, "grad_norm": 5.09375, "learning_rate": 6.400361629234869e-06, "loss": 3.9873, "step": 15130 }, { "epoch": 0.7204035020936429, "grad_norm": 4.71875, "learning_rate": 6.397982489531786e-06, "loss": 4.0289, "step": 15140 }, { "epoch": 0.7208793300342596, "grad_norm": 4.625, "learning_rate": 6.395603349828703e-06, "loss": 3.8799, "step": 15150 }, { "epoch": 0.7213551579748763, "grad_norm": 4.90625, "learning_rate": 6.39322421012562e-06, "loss": 4.0913, "step": 15160 }, { "epoch": 0.721830985915493, "grad_norm": 4.71875, "learning_rate": 6.390845070422536e-06, "loss": 3.9589, "step": 15170 }, { "epoch": 0.7223068138561096, "grad_norm": 5.1875, "learning_rate": 6.388465930719452e-06, "loss": 4.1395, "step": 15180 }, { "epoch": 0.7227826417967264, "grad_norm": 5.28125, "learning_rate": 6.3860867910163695e-06, "loss": 4.0085, "step": 15190 }, { "epoch": 0.723258469737343, "grad_norm": 4.8125, "learning_rate": 6.383707651313286e-06, "loss": 3.9083, "step": 15200 }, { "epoch": 0.7237342976779596, "grad_norm": 4.5, "learning_rate": 6.381328511610202e-06, "loss": 3.9614, "step": 15210 }, { "epoch": 0.7242101256185763, "grad_norm": 4.6875, "learning_rate": 6.3789493719071194e-06, "loss": 4.0667, "step": 15220 }, { "epoch": 0.724685953559193, "grad_norm": 4.53125, "learning_rate": 6.3765702322040355e-06, "loss": 4.0204, "step": 15230 }, { "epoch": 0.7251617814998097, "grad_norm": 5.125, "learning_rate": 6.374191092500952e-06, "loss": 3.8962, "step": 15240 }, { "epoch": 0.7256376094404263, "grad_norm": 5.46875, "learning_rate": 6.3718119527978685e-06, "loss": 3.9932, "step": 15250 }, { "epoch": 0.7261134373810431, "grad_norm": 5.96875, "learning_rate": 6.369432813094785e-06, "loss": 4.0175, "step": 15260 }, { "epoch": 0.7265892653216597, "grad_norm": 5.125, "learning_rate": 6.367053673391702e-06, "loss": 4.0296, "step": 15270 }, { "epoch": 0.7270650932622763, "grad_norm": 4.28125, "learning_rate": 6.364674533688618e-06, "loss": 4.0186, "step": 15280 }, { "epoch": 0.727540921202893, "grad_norm": 4.21875, "learning_rate": 6.362295393985536e-06, "loss": 3.8662, "step": 15290 }, { "epoch": 0.7280167491435097, "grad_norm": 4.9375, "learning_rate": 6.359916254282452e-06, "loss": 3.9384, "step": 15300 }, { "epoch": 0.7284925770841264, "grad_norm": 4.5625, "learning_rate": 6.357537114579368e-06, "loss": 4.1278, "step": 15310 }, { "epoch": 0.728968405024743, "grad_norm": 4.96875, "learning_rate": 6.355157974876285e-06, "loss": 4.0877, "step": 15320 }, { "epoch": 0.7294442329653598, "grad_norm": 5.5, "learning_rate": 6.352778835173202e-06, "loss": 3.9853, "step": 15330 }, { "epoch": 0.7299200609059764, "grad_norm": 4.53125, "learning_rate": 6.350399695470118e-06, "loss": 4.1527, "step": 15340 }, { "epoch": 0.7303958888465931, "grad_norm": 4.75, "learning_rate": 6.348020555767035e-06, "loss": 4.0184, "step": 15350 }, { "epoch": 0.7308717167872097, "grad_norm": 4.75, "learning_rate": 6.345641416063952e-06, "loss": 4.0936, "step": 15360 }, { "epoch": 0.7313475447278264, "grad_norm": 4.71875, "learning_rate": 6.343262276360869e-06, "loss": 3.9445, "step": 15370 }, { "epoch": 0.7318233726684431, "grad_norm": 4.3125, "learning_rate": 6.340883136657785e-06, "loss": 3.9757, "step": 15380 }, { "epoch": 0.7322992006090597, "grad_norm": 4.65625, "learning_rate": 6.338503996954701e-06, "loss": 4.0576, "step": 15390 }, { "epoch": 0.7327750285496765, "grad_norm": 4.8125, "learning_rate": 6.336124857251619e-06, "loss": 4.0788, "step": 15400 }, { "epoch": 0.7332508564902931, "grad_norm": 4.78125, "learning_rate": 6.333745717548535e-06, "loss": 4.1015, "step": 15410 }, { "epoch": 0.7337266844309098, "grad_norm": 5.125, "learning_rate": 6.331366577845452e-06, "loss": 4.0623, "step": 15420 }, { "epoch": 0.7342025123715265, "grad_norm": 5.03125, "learning_rate": 6.328987438142368e-06, "loss": 4.1324, "step": 15430 }, { "epoch": 0.7346783403121431, "grad_norm": 4.96875, "learning_rate": 6.326608298439285e-06, "loss": 4.1288, "step": 15440 }, { "epoch": 0.7351541682527598, "grad_norm": 4.59375, "learning_rate": 6.324229158736202e-06, "loss": 4.1466, "step": 15450 }, { "epoch": 0.7356299961933764, "grad_norm": 4.84375, "learning_rate": 6.321850019033118e-06, "loss": 4.1606, "step": 15460 }, { "epoch": 0.7361058241339932, "grad_norm": 5.0625, "learning_rate": 6.3194708793300355e-06, "loss": 4.1264, "step": 15470 }, { "epoch": 0.7365816520746098, "grad_norm": 4.59375, "learning_rate": 6.317091739626952e-06, "loss": 3.9833, "step": 15480 }, { "epoch": 0.7370574800152265, "grad_norm": 4.625, "learning_rate": 6.314712599923868e-06, "loss": 3.9901, "step": 15490 }, { "epoch": 0.7375333079558432, "grad_norm": 4.53125, "learning_rate": 6.312333460220785e-06, "loss": 4.0213, "step": 15500 }, { "epoch": 0.7380091358964599, "grad_norm": 5.28125, "learning_rate": 6.3099543205177015e-06, "loss": 4.0444, "step": 15510 }, { "epoch": 0.7384849638370765, "grad_norm": 4.4375, "learning_rate": 6.3075751808146176e-06, "loss": 3.8167, "step": 15520 }, { "epoch": 0.7389607917776931, "grad_norm": 4.84375, "learning_rate": 6.3051960411115345e-06, "loss": 4.0674, "step": 15530 }, { "epoch": 0.7394366197183099, "grad_norm": 4.6875, "learning_rate": 6.302816901408451e-06, "loss": 4.0585, "step": 15540 }, { "epoch": 0.7399124476589265, "grad_norm": 4.59375, "learning_rate": 6.300437761705368e-06, "loss": 3.9698, "step": 15550 }, { "epoch": 0.7403882755995432, "grad_norm": 4.5625, "learning_rate": 6.298058622002284e-06, "loss": 4.0501, "step": 15560 }, { "epoch": 0.7408641035401599, "grad_norm": 4.6875, "learning_rate": 6.2956794822992004e-06, "loss": 4.0147, "step": 15570 }, { "epoch": 0.7413399314807766, "grad_norm": 4.5625, "learning_rate": 6.293300342596118e-06, "loss": 4.1134, "step": 15580 }, { "epoch": 0.7418157594213932, "grad_norm": 4.71875, "learning_rate": 6.290921202893034e-06, "loss": 4.064, "step": 15590 }, { "epoch": 0.74229158736201, "grad_norm": 4.5, "learning_rate": 6.28854206318995e-06, "loss": 3.9913, "step": 15600 }, { "epoch": 0.7427674153026266, "grad_norm": 4.59375, "learning_rate": 6.286162923486868e-06, "loss": 3.9778, "step": 15610 }, { "epoch": 0.7432432432432432, "grad_norm": 4.9375, "learning_rate": 6.283783783783784e-06, "loss": 3.996, "step": 15620 }, { "epoch": 0.7437190711838599, "grad_norm": 4.65625, "learning_rate": 6.281404644080701e-06, "loss": 4.063, "step": 15630 }, { "epoch": 0.7441948991244766, "grad_norm": 4.9375, "learning_rate": 6.279025504377617e-06, "loss": 4.0665, "step": 15640 }, { "epoch": 0.7446707270650933, "grad_norm": 4.75, "learning_rate": 6.276646364674535e-06, "loss": 4.0082, "step": 15650 }, { "epoch": 0.7451465550057099, "grad_norm": 4.40625, "learning_rate": 6.274267224971451e-06, "loss": 4.0517, "step": 15660 }, { "epoch": 0.7456223829463267, "grad_norm": 10.3125, "learning_rate": 6.271888085268367e-06, "loss": 4.1103, "step": 15670 }, { "epoch": 0.7460982108869433, "grad_norm": 4.6875, "learning_rate": 6.269508945565284e-06, "loss": 4.0882, "step": 15680 }, { "epoch": 0.7465740388275599, "grad_norm": 5.0, "learning_rate": 6.267129805862201e-06, "loss": 4.0366, "step": 15690 }, { "epoch": 0.7470498667681766, "grad_norm": 5.0625, "learning_rate": 6.264750666159117e-06, "loss": 4.1119, "step": 15700 }, { "epoch": 0.7475256947087933, "grad_norm": 4.875, "learning_rate": 6.262371526456034e-06, "loss": 3.9956, "step": 15710 }, { "epoch": 0.74800152264941, "grad_norm": 4.6875, "learning_rate": 6.259992386752951e-06, "loss": 4.0683, "step": 15720 }, { "epoch": 0.7484773505900266, "grad_norm": 4.28125, "learning_rate": 6.257613247049868e-06, "loss": 3.969, "step": 15730 }, { "epoch": 0.7489531785306434, "grad_norm": 4.8125, "learning_rate": 6.255234107346784e-06, "loss": 4.1181, "step": 15740 }, { "epoch": 0.74942900647126, "grad_norm": 5.59375, "learning_rate": 6.2528549676437e-06, "loss": 4.0113, "step": 15750 }, { "epoch": 0.7499048344118767, "grad_norm": 4.53125, "learning_rate": 6.250475827940618e-06, "loss": 3.8787, "step": 15760 }, { "epoch": 0.7503806623524933, "grad_norm": 4.59375, "learning_rate": 6.248096688237534e-06, "loss": 4.1557, "step": 15770 }, { "epoch": 0.75085649029311, "grad_norm": 4.53125, "learning_rate": 6.24571754853445e-06, "loss": 3.935, "step": 15780 }, { "epoch": 0.7513323182337267, "grad_norm": 4.6875, "learning_rate": 6.2433384088313675e-06, "loss": 3.9685, "step": 15790 }, { "epoch": 0.7518081461743433, "grad_norm": 4.625, "learning_rate": 6.2409592691282836e-06, "loss": 4.0064, "step": 15800 }, { "epoch": 0.7522839741149601, "grad_norm": 4.75, "learning_rate": 6.2385801294252005e-06, "loss": 3.9972, "step": 15810 }, { "epoch": 0.7527598020555767, "grad_norm": 4.6875, "learning_rate": 6.2362009897221165e-06, "loss": 4.0692, "step": 15820 }, { "epoch": 0.7532356299961934, "grad_norm": 5.0, "learning_rate": 6.233821850019034e-06, "loss": 3.9985, "step": 15830 }, { "epoch": 0.75371145793681, "grad_norm": 4.59375, "learning_rate": 6.23144271031595e-06, "loss": 3.9679, "step": 15840 }, { "epoch": 0.7541872858774267, "grad_norm": 5.0, "learning_rate": 6.2290635706128664e-06, "loss": 4.0682, "step": 15850 }, { "epoch": 0.7546631138180434, "grad_norm": 4.71875, "learning_rate": 6.226684430909784e-06, "loss": 3.9293, "step": 15860 }, { "epoch": 0.75513894175866, "grad_norm": 4.875, "learning_rate": 6.2243052912067e-06, "loss": 4.0489, "step": 15870 }, { "epoch": 0.7556147696992768, "grad_norm": 4.65625, "learning_rate": 6.221926151503616e-06, "loss": 4.1019, "step": 15880 }, { "epoch": 0.7560905976398934, "grad_norm": 4.8125, "learning_rate": 6.219547011800533e-06, "loss": 3.9441, "step": 15890 }, { "epoch": 0.7565664255805101, "grad_norm": 5.125, "learning_rate": 6.21716787209745e-06, "loss": 4.1587, "step": 15900 }, { "epoch": 0.7570422535211268, "grad_norm": 4.65625, "learning_rate": 6.214788732394367e-06, "loss": 4.0733, "step": 15910 }, { "epoch": 0.7575180814617435, "grad_norm": 4.78125, "learning_rate": 6.212409592691283e-06, "loss": 4.1701, "step": 15920 }, { "epoch": 0.7579939094023601, "grad_norm": 4.96875, "learning_rate": 6.210030452988201e-06, "loss": 3.9001, "step": 15930 }, { "epoch": 0.7584697373429767, "grad_norm": 5.03125, "learning_rate": 6.207651313285117e-06, "loss": 4.0682, "step": 15940 }, { "epoch": 0.7589455652835935, "grad_norm": 4.9375, "learning_rate": 6.205272173582033e-06, "loss": 4.1432, "step": 15950 }, { "epoch": 0.7594213932242101, "grad_norm": 4.8125, "learning_rate": 6.202893033878949e-06, "loss": 3.8908, "step": 15960 }, { "epoch": 0.7598972211648268, "grad_norm": 4.71875, "learning_rate": 6.200513894175867e-06, "loss": 4.1056, "step": 15970 }, { "epoch": 0.7603730491054435, "grad_norm": 4.53125, "learning_rate": 6.198134754472783e-06, "loss": 3.9985, "step": 15980 }, { "epoch": 0.7608488770460602, "grad_norm": 4.625, "learning_rate": 6.1957556147697e-06, "loss": 4.0783, "step": 15990 }, { "epoch": 0.7613247049866768, "grad_norm": 4.28125, "learning_rate": 6.193376475066616e-06, "loss": 4.0523, "step": 16000 }, { "epoch": 0.7618005329272934, "grad_norm": 4.375, "learning_rate": 6.190997335363534e-06, "loss": 4.0694, "step": 16010 }, { "epoch": 0.7622763608679102, "grad_norm": 4.875, "learning_rate": 6.18861819566045e-06, "loss": 4.147, "step": 16020 }, { "epoch": 0.7627521888085268, "grad_norm": 4.3125, "learning_rate": 6.186239055957366e-06, "loss": 4.0269, "step": 16030 }, { "epoch": 0.7632280167491435, "grad_norm": 5.125, "learning_rate": 6.183859916254284e-06, "loss": 3.8811, "step": 16040 }, { "epoch": 0.7637038446897602, "grad_norm": 4.8125, "learning_rate": 6.1814807765512e-06, "loss": 4.0341, "step": 16050 }, { "epoch": 0.7641796726303769, "grad_norm": 4.625, "learning_rate": 6.179101636848116e-06, "loss": 4.0727, "step": 16060 }, { "epoch": 0.7646555005709935, "grad_norm": 4.65625, "learning_rate": 6.176722497145033e-06, "loss": 4.0159, "step": 16070 }, { "epoch": 0.7651313285116103, "grad_norm": 5.28125, "learning_rate": 6.1743433574419496e-06, "loss": 3.9368, "step": 16080 }, { "epoch": 0.7656071564522269, "grad_norm": 5.0, "learning_rate": 6.1719642177388665e-06, "loss": 3.9434, "step": 16090 }, { "epoch": 0.7660829843928435, "grad_norm": 4.6875, "learning_rate": 6.1695850780357825e-06, "loss": 4.0184, "step": 16100 }, { "epoch": 0.7665588123334602, "grad_norm": 4.5, "learning_rate": 6.1672059383326995e-06, "loss": 4.059, "step": 16110 }, { "epoch": 0.7670346402740769, "grad_norm": 5.375, "learning_rate": 6.164826798629616e-06, "loss": 4.0581, "step": 16120 }, { "epoch": 0.7675104682146936, "grad_norm": 4.59375, "learning_rate": 6.1624476589265324e-06, "loss": 4.0027, "step": 16130 }, { "epoch": 0.7679862961553102, "grad_norm": 4.40625, "learning_rate": 6.1600685192234485e-06, "loss": 4.0505, "step": 16140 }, { "epoch": 0.768462124095927, "grad_norm": 4.4375, "learning_rate": 6.157689379520366e-06, "loss": 4.0247, "step": 16150 }, { "epoch": 0.7689379520365436, "grad_norm": 5.15625, "learning_rate": 6.155310239817282e-06, "loss": 3.8351, "step": 16160 }, { "epoch": 0.7694137799771602, "grad_norm": 4.625, "learning_rate": 6.152931100114199e-06, "loss": 3.9376, "step": 16170 }, { "epoch": 0.7698896079177769, "grad_norm": 4.5625, "learning_rate": 6.150551960411116e-06, "loss": 4.063, "step": 16180 }, { "epoch": 0.7703654358583936, "grad_norm": 5.0, "learning_rate": 6.148172820708033e-06, "loss": 4.0742, "step": 16190 }, { "epoch": 0.7708412637990103, "grad_norm": 5.375, "learning_rate": 6.145793681004949e-06, "loss": 3.977, "step": 16200 }, { "epoch": 0.7713170917396269, "grad_norm": 4.25, "learning_rate": 6.143414541301865e-06, "loss": 4.0151, "step": 16210 }, { "epoch": 0.7717929196802437, "grad_norm": 4.90625, "learning_rate": 6.141035401598783e-06, "loss": 4.1343, "step": 16220 }, { "epoch": 0.7722687476208603, "grad_norm": 5.03125, "learning_rate": 6.138656261895699e-06, "loss": 4.0956, "step": 16230 }, { "epoch": 0.772744575561477, "grad_norm": 4.5, "learning_rate": 6.136277122192615e-06, "loss": 4.0048, "step": 16240 }, { "epoch": 0.7732204035020936, "grad_norm": 5.15625, "learning_rate": 6.133897982489533e-06, "loss": 4.0492, "step": 16250 }, { "epoch": 0.7736962314427103, "grad_norm": 4.59375, "learning_rate": 6.131518842786449e-06, "loss": 4.0997, "step": 16260 }, { "epoch": 0.774172059383327, "grad_norm": 4.84375, "learning_rate": 6.129139703083366e-06, "loss": 3.9697, "step": 16270 }, { "epoch": 0.7746478873239436, "grad_norm": 4.8125, "learning_rate": 6.126760563380282e-06, "loss": 4.0642, "step": 16280 }, { "epoch": 0.7751237152645604, "grad_norm": 4.84375, "learning_rate": 6.124381423677199e-06, "loss": 3.9423, "step": 16290 }, { "epoch": 0.775599543205177, "grad_norm": 4.5625, "learning_rate": 6.122002283974116e-06, "loss": 4.1297, "step": 16300 }, { "epoch": 0.7760753711457937, "grad_norm": 4.78125, "learning_rate": 6.119623144271032e-06, "loss": 3.9061, "step": 16310 }, { "epoch": 0.7765511990864103, "grad_norm": 4.78125, "learning_rate": 6.117244004567948e-06, "loss": 3.9688, "step": 16320 }, { "epoch": 0.777027027027027, "grad_norm": 4.8125, "learning_rate": 6.114864864864866e-06, "loss": 4.0037, "step": 16330 }, { "epoch": 0.7775028549676437, "grad_norm": 4.90625, "learning_rate": 6.112485725161782e-06, "loss": 4.0437, "step": 16340 }, { "epoch": 0.7779786829082603, "grad_norm": 4.65625, "learning_rate": 6.110106585458699e-06, "loss": 4.0203, "step": 16350 }, { "epoch": 0.7784545108488771, "grad_norm": 4.65625, "learning_rate": 6.1077274457556156e-06, "loss": 4.0364, "step": 16360 }, { "epoch": 0.7789303387894937, "grad_norm": 4.625, "learning_rate": 6.105348306052532e-06, "loss": 4.1037, "step": 16370 }, { "epoch": 0.7794061667301104, "grad_norm": 4.90625, "learning_rate": 6.1029691663494485e-06, "loss": 4.1839, "step": 16380 }, { "epoch": 0.779881994670727, "grad_norm": 4.84375, "learning_rate": 6.100590026646365e-06, "loss": 4.0354, "step": 16390 }, { "epoch": 0.7803578226113438, "grad_norm": 4.65625, "learning_rate": 6.098210886943282e-06, "loss": 4.1208, "step": 16400 }, { "epoch": 0.7808336505519604, "grad_norm": 5.15625, "learning_rate": 6.0958317472401984e-06, "loss": 4.0279, "step": 16410 }, { "epoch": 0.781309478492577, "grad_norm": 4.6875, "learning_rate": 6.0934526075371145e-06, "loss": 4.115, "step": 16420 }, { "epoch": 0.7817853064331938, "grad_norm": 4.53125, "learning_rate": 6.091073467834032e-06, "loss": 3.8589, "step": 16430 }, { "epoch": 0.7822611343738104, "grad_norm": 4.96875, "learning_rate": 6.088694328130948e-06, "loss": 3.9043, "step": 16440 }, { "epoch": 0.7827369623144271, "grad_norm": 4.65625, "learning_rate": 6.086315188427865e-06, "loss": 4.1897, "step": 16450 }, { "epoch": 0.7832127902550438, "grad_norm": 4.40625, "learning_rate": 6.083936048724781e-06, "loss": 4.1155, "step": 16460 }, { "epoch": 0.7836886181956605, "grad_norm": 4.5, "learning_rate": 6.081556909021698e-06, "loss": 3.9021, "step": 16470 }, { "epoch": 0.7841644461362771, "grad_norm": 4.59375, "learning_rate": 6.079177769318615e-06, "loss": 4.0676, "step": 16480 }, { "epoch": 0.7846402740768937, "grad_norm": 4.125, "learning_rate": 6.076798629615531e-06, "loss": 3.8652, "step": 16490 }, { "epoch": 0.7851161020175105, "grad_norm": 4.59375, "learning_rate": 6.074419489912449e-06, "loss": 4.1084, "step": 16500 }, { "epoch": 0.7855919299581271, "grad_norm": 5.09375, "learning_rate": 6.072040350209365e-06, "loss": 4.035, "step": 16510 }, { "epoch": 0.7860677578987438, "grad_norm": 4.6875, "learning_rate": 6.069661210506281e-06, "loss": 4.0431, "step": 16520 }, { "epoch": 0.7865435858393605, "grad_norm": 4.5, "learning_rate": 6.067282070803198e-06, "loss": 4.101, "step": 16530 }, { "epoch": 0.7870194137799772, "grad_norm": 4.53125, "learning_rate": 6.064902931100115e-06, "loss": 4.0232, "step": 16540 }, { "epoch": 0.7874952417205938, "grad_norm": 4.71875, "learning_rate": 6.062523791397031e-06, "loss": 3.915, "step": 16550 }, { "epoch": 0.7879710696612106, "grad_norm": 4.875, "learning_rate": 6.060144651693948e-06, "loss": 3.9692, "step": 16560 }, { "epoch": 0.7884468976018272, "grad_norm": 5.09375, "learning_rate": 6.057765511990864e-06, "loss": 4.0399, "step": 16570 }, { "epoch": 0.7889227255424438, "grad_norm": 4.59375, "learning_rate": 6.055386372287782e-06, "loss": 4.0336, "step": 16580 }, { "epoch": 0.7893985534830605, "grad_norm": 4.84375, "learning_rate": 6.053007232584698e-06, "loss": 4.0367, "step": 16590 }, { "epoch": 0.7898743814236772, "grad_norm": 4.59375, "learning_rate": 6.050628092881614e-06, "loss": 4.0724, "step": 16600 }, { "epoch": 0.7903502093642939, "grad_norm": 4.9375, "learning_rate": 6.048248953178532e-06, "loss": 3.9273, "step": 16610 }, { "epoch": 0.7908260373049105, "grad_norm": 4.4375, "learning_rate": 6.045869813475448e-06, "loss": 4.0302, "step": 16620 }, { "epoch": 0.7913018652455273, "grad_norm": 4.78125, "learning_rate": 6.043490673772364e-06, "loss": 4.1711, "step": 16630 }, { "epoch": 0.7917776931861439, "grad_norm": 5.25, "learning_rate": 6.041111534069281e-06, "loss": 3.8964, "step": 16640 }, { "epoch": 0.7922535211267606, "grad_norm": 4.65625, "learning_rate": 6.038732394366198e-06, "loss": 3.9571, "step": 16650 }, { "epoch": 0.7927293490673772, "grad_norm": 4.71875, "learning_rate": 6.0363532546631145e-06, "loss": 4.0282, "step": 16660 }, { "epoch": 0.7932051770079939, "grad_norm": 4.625, "learning_rate": 6.033974114960031e-06, "loss": 3.9315, "step": 16670 }, { "epoch": 0.7936810049486106, "grad_norm": 4.6875, "learning_rate": 6.031594975256948e-06, "loss": 4.0238, "step": 16680 }, { "epoch": 0.7941568328892272, "grad_norm": 4.125, "learning_rate": 6.0292158355538644e-06, "loss": 4.0536, "step": 16690 }, { "epoch": 0.794632660829844, "grad_norm": 4.46875, "learning_rate": 6.0268366958507805e-06, "loss": 4.0622, "step": 16700 }, { "epoch": 0.7951084887704606, "grad_norm": 4.59375, "learning_rate": 6.024457556147697e-06, "loss": 4.1305, "step": 16710 }, { "epoch": 0.7955843167110773, "grad_norm": 4.75, "learning_rate": 6.022078416444614e-06, "loss": 4.1786, "step": 16720 }, { "epoch": 0.796060144651694, "grad_norm": 4.84375, "learning_rate": 6.01969927674153e-06, "loss": 3.9846, "step": 16730 }, { "epoch": 0.7965359725923106, "grad_norm": 4.53125, "learning_rate": 6.017320137038447e-06, "loss": 4.051, "step": 16740 }, { "epoch": 0.7970118005329273, "grad_norm": 4.875, "learning_rate": 6.014940997335364e-06, "loss": 4.1123, "step": 16750 }, { "epoch": 0.7974876284735439, "grad_norm": 5.28125, "learning_rate": 6.012561857632281e-06, "loss": 3.8585, "step": 16760 }, { "epoch": 0.7979634564141607, "grad_norm": 4.84375, "learning_rate": 6.010182717929197e-06, "loss": 3.9704, "step": 16770 }, { "epoch": 0.7984392843547773, "grad_norm": 4.5, "learning_rate": 6.007803578226113e-06, "loss": 4.0485, "step": 16780 }, { "epoch": 0.798915112295394, "grad_norm": 5.15625, "learning_rate": 6.005424438523031e-06, "loss": 4.0118, "step": 16790 }, { "epoch": 0.7993909402360106, "grad_norm": 4.84375, "learning_rate": 6.003045298819947e-06, "loss": 3.9203, "step": 16800 }, { "epoch": 0.7998667681766274, "grad_norm": 4.90625, "learning_rate": 6.000666159116863e-06, "loss": 4.0677, "step": 16810 }, { "epoch": 0.800342596117244, "grad_norm": 4.5625, "learning_rate": 5.998287019413781e-06, "loss": 4.0095, "step": 16820 }, { "epoch": 0.8008184240578606, "grad_norm": 4.6875, "learning_rate": 5.995907879710697e-06, "loss": 4.1253, "step": 16830 }, { "epoch": 0.8012942519984774, "grad_norm": 4.8125, "learning_rate": 5.993528740007614e-06, "loss": 3.9376, "step": 16840 }, { "epoch": 0.801770079939094, "grad_norm": 4.875, "learning_rate": 5.99114960030453e-06, "loss": 4.0162, "step": 16850 }, { "epoch": 0.8022459078797107, "grad_norm": 5.0625, "learning_rate": 5.988770460601448e-06, "loss": 4.0098, "step": 16860 }, { "epoch": 0.8027217358203274, "grad_norm": 4.46875, "learning_rate": 5.986391320898364e-06, "loss": 4.001, "step": 16870 }, { "epoch": 0.8031975637609441, "grad_norm": 5.46875, "learning_rate": 5.98401218119528e-06, "loss": 3.997, "step": 16880 }, { "epoch": 0.8036733917015607, "grad_norm": 4.75, "learning_rate": 5.981633041492196e-06, "loss": 4.0341, "step": 16890 }, { "epoch": 0.8041492196421773, "grad_norm": 4.59375, "learning_rate": 5.979253901789114e-06, "loss": 3.9993, "step": 16900 }, { "epoch": 0.8046250475827941, "grad_norm": 4.4375, "learning_rate": 5.97687476208603e-06, "loss": 4.0356, "step": 16910 }, { "epoch": 0.8051008755234107, "grad_norm": 5.0, "learning_rate": 5.974495622382947e-06, "loss": 3.9351, "step": 16920 }, { "epoch": 0.8055767034640274, "grad_norm": 4.65625, "learning_rate": 5.972116482679864e-06, "loss": 4.0211, "step": 16930 }, { "epoch": 0.8060525314046441, "grad_norm": 5.0, "learning_rate": 5.9697373429767805e-06, "loss": 3.9058, "step": 16940 }, { "epoch": 0.8065283593452608, "grad_norm": 4.65625, "learning_rate": 5.967358203273697e-06, "loss": 4.0067, "step": 16950 }, { "epoch": 0.8070041872858774, "grad_norm": 4.75, "learning_rate": 5.964979063570613e-06, "loss": 4.0089, "step": 16960 }, { "epoch": 0.8074800152264942, "grad_norm": 4.59375, "learning_rate": 5.9625999238675304e-06, "loss": 4.0756, "step": 16970 }, { "epoch": 0.8079558431671108, "grad_norm": 4.8125, "learning_rate": 5.9602207841644465e-06, "loss": 3.8868, "step": 16980 }, { "epoch": 0.8084316711077274, "grad_norm": 4.65625, "learning_rate": 5.9578416444613626e-06, "loss": 4.0048, "step": 16990 }, { "epoch": 0.8089074990483441, "grad_norm": 4.65625, "learning_rate": 5.95546250475828e-06, "loss": 4.0272, "step": 17000 }, { "epoch": 0.8093833269889608, "grad_norm": 4.5625, "learning_rate": 5.953083365055196e-06, "loss": 3.8977, "step": 17010 }, { "epoch": 0.8098591549295775, "grad_norm": 4.78125, "learning_rate": 5.950704225352113e-06, "loss": 3.859, "step": 17020 }, { "epoch": 0.8103349828701941, "grad_norm": 4.53125, "learning_rate": 5.948325085649029e-06, "loss": 4.1255, "step": 17030 }, { "epoch": 0.8108108108108109, "grad_norm": 4.65625, "learning_rate": 5.945945945945947e-06, "loss": 4.043, "step": 17040 }, { "epoch": 0.8112866387514275, "grad_norm": 4.40625, "learning_rate": 5.943566806242863e-06, "loss": 3.8973, "step": 17050 }, { "epoch": 0.8117624666920441, "grad_norm": 5.0625, "learning_rate": 5.941187666539779e-06, "loss": 4.041, "step": 17060 }, { "epoch": 0.8122382946326608, "grad_norm": 4.40625, "learning_rate": 5.938808526836697e-06, "loss": 4.0638, "step": 17070 }, { "epoch": 0.8127141225732775, "grad_norm": 5.0, "learning_rate": 5.936429387133613e-06, "loss": 4.0839, "step": 17080 }, { "epoch": 0.8131899505138942, "grad_norm": 5.1875, "learning_rate": 5.934050247430529e-06, "loss": 3.9544, "step": 17090 }, { "epoch": 0.8136657784545108, "grad_norm": 4.6875, "learning_rate": 5.931671107727446e-06, "loss": 4.0468, "step": 17100 }, { "epoch": 0.8141416063951276, "grad_norm": 4.84375, "learning_rate": 5.929291968024363e-06, "loss": 4.0609, "step": 17110 }, { "epoch": 0.8146174343357442, "grad_norm": 5.0625, "learning_rate": 5.92691282832128e-06, "loss": 4.131, "step": 17120 }, { "epoch": 0.8150932622763609, "grad_norm": 5.03125, "learning_rate": 5.924533688618196e-06, "loss": 4.0551, "step": 17130 }, { "epoch": 0.8155690902169775, "grad_norm": 4.90625, "learning_rate": 5.922154548915113e-06, "loss": 4.1272, "step": 17140 }, { "epoch": 0.8160449181575942, "grad_norm": 4.625, "learning_rate": 5.91977540921203e-06, "loss": 4.065, "step": 17150 }, { "epoch": 0.8165207460982109, "grad_norm": 4.6875, "learning_rate": 5.917396269508946e-06, "loss": 4.0837, "step": 17160 }, { "epoch": 0.8169965740388275, "grad_norm": 5.03125, "learning_rate": 5.915017129805862e-06, "loss": 4.1537, "step": 17170 }, { "epoch": 0.8174724019794443, "grad_norm": 4.84375, "learning_rate": 5.91263799010278e-06, "loss": 3.9226, "step": 17180 }, { "epoch": 0.8179482299200609, "grad_norm": 10.0625, "learning_rate": 5.910258850399696e-06, "loss": 4.0112, "step": 17190 }, { "epoch": 0.8184240578606776, "grad_norm": 5.15625, "learning_rate": 5.907879710696613e-06, "loss": 4.1397, "step": 17200 }, { "epoch": 0.8188998858012942, "grad_norm": 4.78125, "learning_rate": 5.905500570993529e-06, "loss": 4.0973, "step": 17210 }, { "epoch": 0.8193757137419109, "grad_norm": 4.90625, "learning_rate": 5.9031214312904465e-06, "loss": 4.0013, "step": 17220 }, { "epoch": 0.8198515416825276, "grad_norm": 4.625, "learning_rate": 5.900742291587363e-06, "loss": 4.1381, "step": 17230 }, { "epoch": 0.8203273696231442, "grad_norm": 4.65625, "learning_rate": 5.898363151884279e-06, "loss": 3.97, "step": 17240 }, { "epoch": 0.820803197563761, "grad_norm": 5.03125, "learning_rate": 5.8959840121811964e-06, "loss": 4.1371, "step": 17250 }, { "epoch": 0.8212790255043776, "grad_norm": 4.84375, "learning_rate": 5.8936048724781125e-06, "loss": 4.0925, "step": 17260 }, { "epoch": 0.8217548534449943, "grad_norm": 5.40625, "learning_rate": 5.8912257327750286e-06, "loss": 4.0337, "step": 17270 }, { "epoch": 0.822230681385611, "grad_norm": 5.28125, "learning_rate": 5.8888465930719455e-06, "loss": 3.9863, "step": 17280 }, { "epoch": 0.8227065093262277, "grad_norm": 4.78125, "learning_rate": 5.886467453368862e-06, "loss": 4.0669, "step": 17290 }, { "epoch": 0.8231823372668443, "grad_norm": 4.5, "learning_rate": 5.884088313665779e-06, "loss": 4.0481, "step": 17300 }, { "epoch": 0.8236581652074609, "grad_norm": 4.84375, "learning_rate": 5.881709173962695e-06, "loss": 3.94, "step": 17310 }, { "epoch": 0.8241339931480777, "grad_norm": 4.9375, "learning_rate": 5.879330034259612e-06, "loss": 4.0237, "step": 17320 }, { "epoch": 0.8246098210886943, "grad_norm": 4.75, "learning_rate": 5.876950894556529e-06, "loss": 4.1386, "step": 17330 }, { "epoch": 0.825085649029311, "grad_norm": 4.65625, "learning_rate": 5.874571754853445e-06, "loss": 4.1289, "step": 17340 }, { "epoch": 0.8255614769699277, "grad_norm": 4.9375, "learning_rate": 5.872192615150361e-06, "loss": 4.0102, "step": 17350 }, { "epoch": 0.8260373049105444, "grad_norm": 4.8125, "learning_rate": 5.869813475447279e-06, "loss": 3.8317, "step": 17360 }, { "epoch": 0.826513132851161, "grad_norm": 5.125, "learning_rate": 5.867434335744195e-06, "loss": 4.1405, "step": 17370 }, { "epoch": 0.8269889607917776, "grad_norm": 4.71875, "learning_rate": 5.865055196041112e-06, "loss": 3.9364, "step": 17380 }, { "epoch": 0.8274647887323944, "grad_norm": 4.59375, "learning_rate": 5.862676056338029e-06, "loss": 4.0035, "step": 17390 }, { "epoch": 0.827940616673011, "grad_norm": 5.1875, "learning_rate": 5.860296916634945e-06, "loss": 3.8388, "step": 17400 }, { "epoch": 0.8284164446136277, "grad_norm": 4.90625, "learning_rate": 5.857917776931862e-06, "loss": 4.1678, "step": 17410 }, { "epoch": 0.8288922725542444, "grad_norm": 4.84375, "learning_rate": 5.855538637228778e-06, "loss": 4.0637, "step": 17420 }, { "epoch": 0.8293681004948611, "grad_norm": 4.6875, "learning_rate": 5.853159497525696e-06, "loss": 3.9432, "step": 17430 }, { "epoch": 0.8298439284354777, "grad_norm": 4.75, "learning_rate": 5.850780357822612e-06, "loss": 3.9246, "step": 17440 }, { "epoch": 0.8303197563760945, "grad_norm": 4.625, "learning_rate": 5.848401218119528e-06, "loss": 4.0346, "step": 17450 }, { "epoch": 0.8307955843167111, "grad_norm": 5.03125, "learning_rate": 5.846022078416446e-06, "loss": 4.0101, "step": 17460 }, { "epoch": 0.8312714122573277, "grad_norm": 4.78125, "learning_rate": 5.843642938713362e-06, "loss": 4.0436, "step": 17470 }, { "epoch": 0.8317472401979444, "grad_norm": 4.375, "learning_rate": 5.841263799010279e-06, "loss": 3.9906, "step": 17480 }, { "epoch": 0.8322230681385611, "grad_norm": 5.125, "learning_rate": 5.838884659307195e-06, "loss": 4.0651, "step": 17490 }, { "epoch": 0.8326988960791778, "grad_norm": 4.9375, "learning_rate": 5.836505519604112e-06, "loss": 3.9708, "step": 17500 }, { "epoch": 0.8331747240197944, "grad_norm": 5.0, "learning_rate": 5.834126379901029e-06, "loss": 4.1254, "step": 17510 }, { "epoch": 0.8336505519604112, "grad_norm": 4.71875, "learning_rate": 5.831747240197945e-06, "loss": 3.8732, "step": 17520 }, { "epoch": 0.8341263799010278, "grad_norm": 4.78125, "learning_rate": 5.829368100494861e-06, "loss": 3.9511, "step": 17530 }, { "epoch": 0.8346022078416445, "grad_norm": 4.96875, "learning_rate": 5.8269889607917785e-06, "loss": 4.1295, "step": 17540 }, { "epoch": 0.8350780357822611, "grad_norm": 5.03125, "learning_rate": 5.8246098210886946e-06, "loss": 4.0478, "step": 17550 }, { "epoch": 0.8355538637228778, "grad_norm": 4.71875, "learning_rate": 5.8222306813856115e-06, "loss": 4.0223, "step": 17560 }, { "epoch": 0.8360296916634945, "grad_norm": 4.71875, "learning_rate": 5.819851541682528e-06, "loss": 3.9765, "step": 17570 }, { "epoch": 0.8365055196041111, "grad_norm": 4.6875, "learning_rate": 5.8174724019794445e-06, "loss": 4.0603, "step": 17580 }, { "epoch": 0.8369813475447279, "grad_norm": 4.1875, "learning_rate": 5.815093262276361e-06, "loss": 3.9592, "step": 17590 }, { "epoch": 0.8374571754853445, "grad_norm": 4.625, "learning_rate": 5.8127141225732774e-06, "loss": 3.9949, "step": 17600 }, { "epoch": 0.8379330034259612, "grad_norm": 4.5, "learning_rate": 5.810334982870195e-06, "loss": 4.1, "step": 17610 }, { "epoch": 0.8384088313665778, "grad_norm": 4.5625, "learning_rate": 5.807955843167111e-06, "loss": 4.0307, "step": 17620 }, { "epoch": 0.8388846593071945, "grad_norm": 4.78125, "learning_rate": 5.805576703464027e-06, "loss": 4.1725, "step": 17630 }, { "epoch": 0.8393604872478112, "grad_norm": 4.8125, "learning_rate": 5.803197563760945e-06, "loss": 4.1092, "step": 17640 }, { "epoch": 0.8398363151884278, "grad_norm": 4.9375, "learning_rate": 5.800818424057861e-06, "loss": 4.0927, "step": 17650 }, { "epoch": 0.8403121431290446, "grad_norm": 5.125, "learning_rate": 5.798439284354777e-06, "loss": 4.0455, "step": 17660 }, { "epoch": 0.8407879710696612, "grad_norm": 4.6875, "learning_rate": 5.796060144651694e-06, "loss": 4.0599, "step": 17670 }, { "epoch": 0.8412637990102779, "grad_norm": 4.6875, "learning_rate": 5.793681004948611e-06, "loss": 4.0809, "step": 17680 }, { "epoch": 0.8417396269508945, "grad_norm": 4.5, "learning_rate": 5.791301865245528e-06, "loss": 3.885, "step": 17690 }, { "epoch": 0.8422154548915113, "grad_norm": 5.21875, "learning_rate": 5.788922725542444e-06, "loss": 4.1512, "step": 17700 }, { "epoch": 0.8426912828321279, "grad_norm": 4.9375, "learning_rate": 5.786543585839362e-06, "loss": 4.0476, "step": 17710 }, { "epoch": 0.8431671107727445, "grad_norm": 4.625, "learning_rate": 5.784164446136278e-06, "loss": 4.1078, "step": 17720 }, { "epoch": 0.8436429387133613, "grad_norm": 5.1875, "learning_rate": 5.781785306433194e-06, "loss": 4.047, "step": 17730 }, { "epoch": 0.8441187666539779, "grad_norm": 4.5625, "learning_rate": 5.779406166730111e-06, "loss": 4.1581, "step": 17740 }, { "epoch": 0.8445945945945946, "grad_norm": 4.53125, "learning_rate": 5.777027027027028e-06, "loss": 4.2108, "step": 17750 }, { "epoch": 0.8450704225352113, "grad_norm": 4.6875, "learning_rate": 5.774647887323944e-06, "loss": 3.9476, "step": 17760 }, { "epoch": 0.845546250475828, "grad_norm": 4.375, "learning_rate": 5.772268747620861e-06, "loss": 4.0543, "step": 17770 }, { "epoch": 0.8460220784164446, "grad_norm": 4.625, "learning_rate": 5.769889607917777e-06, "loss": 4.0462, "step": 17780 }, { "epoch": 0.8464979063570612, "grad_norm": 4.6875, "learning_rate": 5.767510468214695e-06, "loss": 4.1684, "step": 17790 }, { "epoch": 0.846973734297678, "grad_norm": 4.75, "learning_rate": 5.765131328511611e-06, "loss": 4.1128, "step": 17800 }, { "epoch": 0.8474495622382946, "grad_norm": 4.875, "learning_rate": 5.762752188808527e-06, "loss": 4.0164, "step": 17810 }, { "epoch": 0.8479253901789113, "grad_norm": 4.53125, "learning_rate": 5.7603730491054445e-06, "loss": 4.0143, "step": 17820 }, { "epoch": 0.848401218119528, "grad_norm": 4.84375, "learning_rate": 5.7579939094023606e-06, "loss": 3.916, "step": 17830 }, { "epoch": 0.8488770460601447, "grad_norm": 5.0, "learning_rate": 5.755614769699277e-06, "loss": 4.02, "step": 17840 }, { "epoch": 0.8493528740007613, "grad_norm": 4.75, "learning_rate": 5.7532356299961935e-06, "loss": 3.9725, "step": 17850 }, { "epoch": 0.849828701941378, "grad_norm": 5.34375, "learning_rate": 5.7508564902931105e-06, "loss": 4.1663, "step": 17860 }, { "epoch": 0.8503045298819947, "grad_norm": 4.90625, "learning_rate": 5.748477350590027e-06, "loss": 4.0947, "step": 17870 }, { "epoch": 0.8507803578226113, "grad_norm": 4.8125, "learning_rate": 5.7460982108869434e-06, "loss": 4.1808, "step": 17880 }, { "epoch": 0.851256185763228, "grad_norm": 4.21875, "learning_rate": 5.743719071183861e-06, "loss": 3.8728, "step": 17890 }, { "epoch": 0.8517320137038447, "grad_norm": 4.78125, "learning_rate": 5.741339931480777e-06, "loss": 4.0157, "step": 17900 }, { "epoch": 0.8522078416444614, "grad_norm": 5.03125, "learning_rate": 5.738960791777693e-06, "loss": 4.0015, "step": 17910 }, { "epoch": 0.852683669585078, "grad_norm": 4.84375, "learning_rate": 5.736581652074609e-06, "loss": 4.0145, "step": 17920 }, { "epoch": 0.8531594975256948, "grad_norm": 4.46875, "learning_rate": 5.734202512371527e-06, "loss": 3.9407, "step": 17930 }, { "epoch": 0.8536353254663114, "grad_norm": 5.0, "learning_rate": 5.731823372668443e-06, "loss": 3.9121, "step": 17940 }, { "epoch": 0.854111153406928, "grad_norm": 5.25, "learning_rate": 5.72944423296536e-06, "loss": 4.0619, "step": 17950 }, { "epoch": 0.8545869813475447, "grad_norm": 5.5625, "learning_rate": 5.727065093262277e-06, "loss": 4.0239, "step": 17960 }, { "epoch": 0.8550628092881614, "grad_norm": 5.125, "learning_rate": 5.724685953559194e-06, "loss": 4.1221, "step": 17970 }, { "epoch": 0.8555386372287781, "grad_norm": 4.65625, "learning_rate": 5.72230681385611e-06, "loss": 4.1437, "step": 17980 }, { "epoch": 0.8560144651693947, "grad_norm": 4.65625, "learning_rate": 5.719927674153026e-06, "loss": 3.9595, "step": 17990 }, { "epoch": 0.8564902931100115, "grad_norm": 4.125, "learning_rate": 5.717548534449944e-06, "loss": 3.9899, "step": 18000 }, { "epoch": 0.8569661210506281, "grad_norm": 4.46875, "learning_rate": 5.71516939474686e-06, "loss": 3.9662, "step": 18010 }, { "epoch": 0.8574419489912448, "grad_norm": 4.78125, "learning_rate": 5.712790255043776e-06, "loss": 3.9381, "step": 18020 }, { "epoch": 0.8579177769318614, "grad_norm": 4.71875, "learning_rate": 5.710411115340694e-06, "loss": 4.0773, "step": 18030 }, { "epoch": 0.8583936048724781, "grad_norm": 4.8125, "learning_rate": 5.70803197563761e-06, "loss": 4.069, "step": 18040 }, { "epoch": 0.8588694328130948, "grad_norm": 4.65625, "learning_rate": 5.705652835934527e-06, "loss": 3.8333, "step": 18050 }, { "epoch": 0.8593452607537114, "grad_norm": 5.375, "learning_rate": 5.703273696231443e-06, "loss": 3.9678, "step": 18060 }, { "epoch": 0.8598210886943282, "grad_norm": 4.3125, "learning_rate": 5.700894556528361e-06, "loss": 4.0099, "step": 18070 }, { "epoch": 0.8602969166349448, "grad_norm": 4.5625, "learning_rate": 5.698515416825277e-06, "loss": 4.0024, "step": 18080 }, { "epoch": 0.8607727445755615, "grad_norm": 4.625, "learning_rate": 5.696136277122193e-06, "loss": 3.9976, "step": 18090 }, { "epoch": 0.8612485725161781, "grad_norm": 4.40625, "learning_rate": 5.693757137419109e-06, "loss": 4.0315, "step": 18100 }, { "epoch": 0.8617244004567948, "grad_norm": 4.78125, "learning_rate": 5.6913779977160266e-06, "loss": 3.9037, "step": 18110 }, { "epoch": 0.8622002283974115, "grad_norm": 4.75, "learning_rate": 5.688998858012943e-06, "loss": 3.9689, "step": 18120 }, { "epoch": 0.8626760563380281, "grad_norm": 4.65625, "learning_rate": 5.6866197183098595e-06, "loss": 4.1216, "step": 18130 }, { "epoch": 0.8631518842786449, "grad_norm": 4.625, "learning_rate": 5.6842405786067765e-06, "loss": 3.9823, "step": 18140 }, { "epoch": 0.8636277122192615, "grad_norm": 4.75, "learning_rate": 5.681861438903693e-06, "loss": 3.9964, "step": 18150 }, { "epoch": 0.8641035401598782, "grad_norm": 4.71875, "learning_rate": 5.6794822992006094e-06, "loss": 4.0815, "step": 18160 }, { "epoch": 0.8645793681004948, "grad_norm": 4.71875, "learning_rate": 5.6771031594975255e-06, "loss": 4.0017, "step": 18170 }, { "epoch": 0.8650551960411116, "grad_norm": 4.625, "learning_rate": 5.674724019794443e-06, "loss": 4.0681, "step": 18180 }, { "epoch": 0.8655310239817282, "grad_norm": 4.34375, "learning_rate": 5.672344880091359e-06, "loss": 3.9023, "step": 18190 }, { "epoch": 0.8660068519223448, "grad_norm": 4.5625, "learning_rate": 5.669965740388275e-06, "loss": 4.0863, "step": 18200 }, { "epoch": 0.8664826798629616, "grad_norm": 4.8125, "learning_rate": 5.667586600685193e-06, "loss": 4.1895, "step": 18210 }, { "epoch": 0.8669585078035782, "grad_norm": 4.78125, "learning_rate": 5.665207460982109e-06, "loss": 4.0314, "step": 18220 }, { "epoch": 0.8674343357441949, "grad_norm": 4.53125, "learning_rate": 5.662828321279026e-06, "loss": 3.932, "step": 18230 }, { "epoch": 0.8679101636848116, "grad_norm": 5.03125, "learning_rate": 5.660449181575942e-06, "loss": 4.1678, "step": 18240 }, { "epoch": 0.8683859916254283, "grad_norm": 4.59375, "learning_rate": 5.65807004187286e-06, "loss": 4.0731, "step": 18250 }, { "epoch": 0.8688618195660449, "grad_norm": 4.78125, "learning_rate": 5.655690902169776e-06, "loss": 4.1606, "step": 18260 }, { "epoch": 0.8693376475066615, "grad_norm": 4.8125, "learning_rate": 5.653311762466692e-06, "loss": 3.9196, "step": 18270 }, { "epoch": 0.8698134754472783, "grad_norm": 4.5, "learning_rate": 5.65093262276361e-06, "loss": 3.957, "step": 18280 }, { "epoch": 0.8702893033878949, "grad_norm": 4.65625, "learning_rate": 5.648553483060526e-06, "loss": 3.9674, "step": 18290 }, { "epoch": 0.8707651313285116, "grad_norm": 4.8125, "learning_rate": 5.646174343357442e-06, "loss": 3.9733, "step": 18300 }, { "epoch": 0.8712409592691283, "grad_norm": 4.71875, "learning_rate": 5.643795203654359e-06, "loss": 4.2591, "step": 18310 }, { "epoch": 0.871716787209745, "grad_norm": 4.78125, "learning_rate": 5.641416063951276e-06, "loss": 4.0184, "step": 18320 }, { "epoch": 0.8721926151503616, "grad_norm": 4.28125, "learning_rate": 5.639036924248193e-06, "loss": 4.0744, "step": 18330 }, { "epoch": 0.8726684430909784, "grad_norm": 5.1875, "learning_rate": 5.636657784545109e-06, "loss": 4.0404, "step": 18340 }, { "epoch": 0.873144271031595, "grad_norm": 4.59375, "learning_rate": 5.634278644842026e-06, "loss": 4.0413, "step": 18350 }, { "epoch": 0.8736200989722116, "grad_norm": 5.0, "learning_rate": 5.631899505138943e-06, "loss": 4.1686, "step": 18360 }, { "epoch": 0.8740959269128283, "grad_norm": 4.5625, "learning_rate": 5.629520365435859e-06, "loss": 3.8593, "step": 18370 }, { "epoch": 0.874571754853445, "grad_norm": 4.90625, "learning_rate": 5.627141225732775e-06, "loss": 3.9498, "step": 18380 }, { "epoch": 0.8750475827940617, "grad_norm": 5.3125, "learning_rate": 5.6247620860296926e-06, "loss": 4.1522, "step": 18390 }, { "epoch": 0.8755234107346783, "grad_norm": 4.9375, "learning_rate": 5.622382946326609e-06, "loss": 4.0869, "step": 18400 }, { "epoch": 0.875999238675295, "grad_norm": 4.5, "learning_rate": 5.6200038066235255e-06, "loss": 3.9124, "step": 18410 }, { "epoch": 0.8764750666159117, "grad_norm": 4.9375, "learning_rate": 5.617624666920442e-06, "loss": 3.9362, "step": 18420 }, { "epoch": 0.8769508945565283, "grad_norm": 5.40625, "learning_rate": 5.6152455272173585e-06, "loss": 4.1594, "step": 18430 }, { "epoch": 0.877426722497145, "grad_norm": 4.9375, "learning_rate": 5.6128663875142754e-06, "loss": 3.8338, "step": 18440 }, { "epoch": 0.8779025504377617, "grad_norm": 4.375, "learning_rate": 5.6104872478111915e-06, "loss": 4.0883, "step": 18450 }, { "epoch": 0.8783783783783784, "grad_norm": 4.5625, "learning_rate": 5.608108108108109e-06, "loss": 3.7992, "step": 18460 }, { "epoch": 0.878854206318995, "grad_norm": 4.625, "learning_rate": 5.605728968405025e-06, "loss": 3.9831, "step": 18470 }, { "epoch": 0.8793300342596118, "grad_norm": 5.15625, "learning_rate": 5.603349828701941e-06, "loss": 3.9027, "step": 18480 }, { "epoch": 0.8798058622002284, "grad_norm": 5.03125, "learning_rate": 5.600970688998858e-06, "loss": 4.0928, "step": 18490 }, { "epoch": 0.8802816901408451, "grad_norm": 4.625, "learning_rate": 5.598591549295775e-06, "loss": 3.9227, "step": 18500 }, { "epoch": 0.8807575180814617, "grad_norm": 4.84375, "learning_rate": 5.596212409592692e-06, "loss": 3.8896, "step": 18510 }, { "epoch": 0.8812333460220784, "grad_norm": 5.0, "learning_rate": 5.593833269889608e-06, "loss": 3.9778, "step": 18520 }, { "epoch": 0.8817091739626951, "grad_norm": 4.59375, "learning_rate": 5.591454130186525e-06, "loss": 3.8258, "step": 18530 }, { "epoch": 0.8821850019033117, "grad_norm": 5.71875, "learning_rate": 5.589074990483442e-06, "loss": 4.0172, "step": 18540 }, { "epoch": 0.8826608298439285, "grad_norm": 4.15625, "learning_rate": 5.586695850780358e-06, "loss": 4.0599, "step": 18550 }, { "epoch": 0.8831366577845451, "grad_norm": 4.84375, "learning_rate": 5.584316711077274e-06, "loss": 3.9039, "step": 18560 }, { "epoch": 0.8836124857251618, "grad_norm": 4.5, "learning_rate": 5.581937571374192e-06, "loss": 4.0019, "step": 18570 }, { "epoch": 0.8840883136657784, "grad_norm": 5.0625, "learning_rate": 5.579558431671108e-06, "loss": 3.9754, "step": 18580 }, { "epoch": 0.8845641416063952, "grad_norm": 4.875, "learning_rate": 5.577179291968025e-06, "loss": 4.1409, "step": 18590 }, { "epoch": 0.8850399695470118, "grad_norm": 4.96875, "learning_rate": 5.574800152264942e-06, "loss": 4.0809, "step": 18600 }, { "epoch": 0.8855157974876284, "grad_norm": 4.875, "learning_rate": 5.572421012561858e-06, "loss": 3.9937, "step": 18610 }, { "epoch": 0.8859916254282452, "grad_norm": 4.71875, "learning_rate": 5.570041872858775e-06, "loss": 3.9163, "step": 18620 }, { "epoch": 0.8864674533688618, "grad_norm": 4.96875, "learning_rate": 5.567662733155691e-06, "loss": 3.8326, "step": 18630 }, { "epoch": 0.8869432813094785, "grad_norm": 4.90625, "learning_rate": 5.565283593452609e-06, "loss": 4.053, "step": 18640 }, { "epoch": 0.8874191092500952, "grad_norm": 4.75, "learning_rate": 5.562904453749525e-06, "loss": 3.9752, "step": 18650 }, { "epoch": 0.8878949371907119, "grad_norm": 4.78125, "learning_rate": 5.560525314046441e-06, "loss": 3.9664, "step": 18660 }, { "epoch": 0.8883707651313285, "grad_norm": 4.96875, "learning_rate": 5.558146174343358e-06, "loss": 4.0824, "step": 18670 }, { "epoch": 0.8888465930719451, "grad_norm": 5.0, "learning_rate": 5.555767034640275e-06, "loss": 4.0217, "step": 18680 }, { "epoch": 0.8893224210125619, "grad_norm": 4.65625, "learning_rate": 5.553387894937191e-06, "loss": 3.9807, "step": 18690 }, { "epoch": 0.8897982489531785, "grad_norm": 4.75, "learning_rate": 5.551008755234108e-06, "loss": 4.0726, "step": 18700 }, { "epoch": 0.8902740768937952, "grad_norm": 4.625, "learning_rate": 5.5486296155310245e-06, "loss": 4.0508, "step": 18710 }, { "epoch": 0.8907499048344119, "grad_norm": 4.53125, "learning_rate": 5.5462504758279414e-06, "loss": 4.0408, "step": 18720 }, { "epoch": 0.8912257327750286, "grad_norm": 5.25, "learning_rate": 5.5438713361248575e-06, "loss": 4.1578, "step": 18730 }, { "epoch": 0.8917015607156452, "grad_norm": 5.125, "learning_rate": 5.5414921964217736e-06, "loss": 3.9476, "step": 18740 }, { "epoch": 0.892177388656262, "grad_norm": 4.96875, "learning_rate": 5.539113056718691e-06, "loss": 4.0527, "step": 18750 }, { "epoch": 0.8926532165968786, "grad_norm": 5.34375, "learning_rate": 5.536733917015607e-06, "loss": 4.1184, "step": 18760 }, { "epoch": 0.8931290445374952, "grad_norm": 4.96875, "learning_rate": 5.534354777312524e-06, "loss": 4.0244, "step": 18770 }, { "epoch": 0.8936048724781119, "grad_norm": 4.96875, "learning_rate": 5.531975637609441e-06, "loss": 3.8668, "step": 18780 }, { "epoch": 0.8940807004187286, "grad_norm": 5.03125, "learning_rate": 5.529596497906357e-06, "loss": 3.9898, "step": 18790 }, { "epoch": 0.8945565283593453, "grad_norm": 4.65625, "learning_rate": 5.527217358203274e-06, "loss": 4.2378, "step": 18800 }, { "epoch": 0.8950323562999619, "grad_norm": 5.03125, "learning_rate": 5.52483821850019e-06, "loss": 3.9769, "step": 18810 }, { "epoch": 0.8955081842405787, "grad_norm": 4.25, "learning_rate": 5.522459078797108e-06, "loss": 3.9733, "step": 18820 }, { "epoch": 0.8959840121811953, "grad_norm": 4.96875, "learning_rate": 5.520079939094024e-06, "loss": 4.018, "step": 18830 }, { "epoch": 0.8964598401218119, "grad_norm": 4.875, "learning_rate": 5.51770079939094e-06, "loss": 4.0489, "step": 18840 }, { "epoch": 0.8969356680624286, "grad_norm": 4.40625, "learning_rate": 5.515321659687858e-06, "loss": 3.941, "step": 18850 }, { "epoch": 0.8974114960030453, "grad_norm": 5.0625, "learning_rate": 5.512942519984774e-06, "loss": 4.1211, "step": 18860 }, { "epoch": 0.897887323943662, "grad_norm": 4.875, "learning_rate": 5.51056338028169e-06, "loss": 3.9091, "step": 18870 }, { "epoch": 0.8983631518842786, "grad_norm": 4.96875, "learning_rate": 5.508184240578607e-06, "loss": 4.0218, "step": 18880 }, { "epoch": 0.8988389798248954, "grad_norm": 4.59375, "learning_rate": 5.505805100875524e-06, "loss": 4.0355, "step": 18890 }, { "epoch": 0.899314807765512, "grad_norm": 4.71875, "learning_rate": 5.503425961172441e-06, "loss": 3.9932, "step": 18900 }, { "epoch": 0.8997906357061287, "grad_norm": 4.8125, "learning_rate": 5.501046821469357e-06, "loss": 3.9633, "step": 18910 }, { "epoch": 0.9002664636467453, "grad_norm": 4.15625, "learning_rate": 5.498667681766275e-06, "loss": 4.0495, "step": 18920 }, { "epoch": 0.900742291587362, "grad_norm": 4.875, "learning_rate": 5.496288542063191e-06, "loss": 3.9323, "step": 18930 }, { "epoch": 0.9012181195279787, "grad_norm": 4.78125, "learning_rate": 5.493909402360107e-06, "loss": 4.0956, "step": 18940 }, { "epoch": 0.9016939474685953, "grad_norm": 4.46875, "learning_rate": 5.491530262657023e-06, "loss": 4.1011, "step": 18950 }, { "epoch": 0.9021697754092121, "grad_norm": 4.65625, "learning_rate": 5.489151122953941e-06, "loss": 4.0171, "step": 18960 }, { "epoch": 0.9026456033498287, "grad_norm": 4.9375, "learning_rate": 5.486771983250857e-06, "loss": 4.0417, "step": 18970 }, { "epoch": 0.9031214312904454, "grad_norm": 4.8125, "learning_rate": 5.484392843547774e-06, "loss": 4.0409, "step": 18980 }, { "epoch": 0.903597259231062, "grad_norm": 4.90625, "learning_rate": 5.48201370384469e-06, "loss": 3.959, "step": 18990 }, { "epoch": 0.9040730871716787, "grad_norm": 5.0625, "learning_rate": 5.4796345641416074e-06, "loss": 4.0394, "step": 19000 }, { "epoch": 0.9045489151122954, "grad_norm": 4.5, "learning_rate": 5.4772554244385235e-06, "loss": 3.9874, "step": 19010 }, { "epoch": 0.905024743052912, "grad_norm": 5.25, "learning_rate": 5.4748762847354396e-06, "loss": 4.1134, "step": 19020 }, { "epoch": 0.9055005709935288, "grad_norm": 4.84375, "learning_rate": 5.472497145032357e-06, "loss": 4.0102, "step": 19030 }, { "epoch": 0.9059763989341454, "grad_norm": 4.96875, "learning_rate": 5.470118005329273e-06, "loss": 4.0776, "step": 19040 }, { "epoch": 0.9064522268747621, "grad_norm": 4.5625, "learning_rate": 5.4677388656261895e-06, "loss": 3.9838, "step": 19050 }, { "epoch": 0.9069280548153787, "grad_norm": 4.75, "learning_rate": 5.465359725923106e-06, "loss": 3.9564, "step": 19060 }, { "epoch": 0.9074038827559955, "grad_norm": 5.0, "learning_rate": 5.462980586220023e-06, "loss": 4.056, "step": 19070 }, { "epoch": 0.9078797106966121, "grad_norm": 5.21875, "learning_rate": 5.46060144651694e-06, "loss": 4.1182, "step": 19080 }, { "epoch": 0.9083555386372287, "grad_norm": 4.8125, "learning_rate": 5.458222306813856e-06, "loss": 3.9616, "step": 19090 }, { "epoch": 0.9088313665778455, "grad_norm": 4.84375, "learning_rate": 5.455843167110774e-06, "loss": 3.9172, "step": 19100 }, { "epoch": 0.9093071945184621, "grad_norm": 4.90625, "learning_rate": 5.45346402740769e-06, "loss": 3.8766, "step": 19110 }, { "epoch": 0.9097830224590788, "grad_norm": 4.65625, "learning_rate": 5.451084887704606e-06, "loss": 4.0938, "step": 19120 }, { "epoch": 0.9102588503996955, "grad_norm": 5.28125, "learning_rate": 5.448705748001522e-06, "loss": 4.1533, "step": 19130 }, { "epoch": 0.9107346783403122, "grad_norm": 4.65625, "learning_rate": 5.44632660829844e-06, "loss": 3.9989, "step": 19140 }, { "epoch": 0.9112105062809288, "grad_norm": 4.8125, "learning_rate": 5.443947468595356e-06, "loss": 3.973, "step": 19150 }, { "epoch": 0.9116863342215454, "grad_norm": 4.78125, "learning_rate": 5.441568328892273e-06, "loss": 4.0436, "step": 19160 }, { "epoch": 0.9121621621621622, "grad_norm": 4.71875, "learning_rate": 5.43918918918919e-06, "loss": 4.0696, "step": 19170 }, { "epoch": 0.9126379901027788, "grad_norm": 4.6875, "learning_rate": 5.436810049486107e-06, "loss": 3.9869, "step": 19180 }, { "epoch": 0.9131138180433955, "grad_norm": 4.59375, "learning_rate": 5.434430909783023e-06, "loss": 4.1548, "step": 19190 }, { "epoch": 0.9135896459840122, "grad_norm": 4.78125, "learning_rate": 5.432051770079939e-06, "loss": 4.0638, "step": 19200 }, { "epoch": 0.9140654739246289, "grad_norm": 5.125, "learning_rate": 5.429672630376857e-06, "loss": 4.0872, "step": 19210 }, { "epoch": 0.9145413018652455, "grad_norm": 4.96875, "learning_rate": 5.427293490673773e-06, "loss": 3.9486, "step": 19220 }, { "epoch": 0.9150171298058623, "grad_norm": 4.75, "learning_rate": 5.424914350970689e-06, "loss": 3.9803, "step": 19230 }, { "epoch": 0.9154929577464789, "grad_norm": 4.28125, "learning_rate": 5.422535211267607e-06, "loss": 4.1294, "step": 19240 }, { "epoch": 0.9159687856870955, "grad_norm": 4.96875, "learning_rate": 5.420156071564523e-06, "loss": 4.0959, "step": 19250 }, { "epoch": 0.9164446136277122, "grad_norm": 4.90625, "learning_rate": 5.41777693186144e-06, "loss": 3.8394, "step": 19260 }, { "epoch": 0.9169204415683289, "grad_norm": 5.09375, "learning_rate": 5.415397792158356e-06, "loss": 4.0611, "step": 19270 }, { "epoch": 0.9173962695089456, "grad_norm": 4.84375, "learning_rate": 5.4130186524552734e-06, "loss": 4.0932, "step": 19280 }, { "epoch": 0.9178720974495622, "grad_norm": 4.59375, "learning_rate": 5.4106395127521895e-06, "loss": 3.9954, "step": 19290 }, { "epoch": 0.918347925390179, "grad_norm": 4.96875, "learning_rate": 5.4082603730491056e-06, "loss": 4.0877, "step": 19300 }, { "epoch": 0.9188237533307956, "grad_norm": 5.96875, "learning_rate": 5.405881233346022e-06, "loss": 3.9829, "step": 19310 }, { "epoch": 0.9192995812714122, "grad_norm": 5.03125, "learning_rate": 5.403502093642939e-06, "loss": 3.9527, "step": 19320 }, { "epoch": 0.9197754092120289, "grad_norm": 5.125, "learning_rate": 5.4011229539398555e-06, "loss": 4.0393, "step": 19330 }, { "epoch": 0.9202512371526456, "grad_norm": 4.9375, "learning_rate": 5.398743814236772e-06, "loss": 3.9883, "step": 19340 }, { "epoch": 0.9207270650932623, "grad_norm": 4.71875, "learning_rate": 5.396364674533689e-06, "loss": 4.0804, "step": 19350 }, { "epoch": 0.9212028930338789, "grad_norm": 5.4375, "learning_rate": 5.393985534830606e-06, "loss": 4.2425, "step": 19360 }, { "epoch": 0.9216787209744957, "grad_norm": 4.65625, "learning_rate": 5.391606395127522e-06, "loss": 4.022, "step": 19370 }, { "epoch": 0.9221545489151123, "grad_norm": 4.875, "learning_rate": 5.389227255424438e-06, "loss": 4.1257, "step": 19380 }, { "epoch": 0.922630376855729, "grad_norm": 4.65625, "learning_rate": 5.386848115721356e-06, "loss": 4.073, "step": 19390 }, { "epoch": 0.9231062047963456, "grad_norm": 4.40625, "learning_rate": 5.384468976018272e-06, "loss": 3.9014, "step": 19400 }, { "epoch": 0.9235820327369623, "grad_norm": 5.125, "learning_rate": 5.382089836315188e-06, "loss": 3.9514, "step": 19410 }, { "epoch": 0.924057860677579, "grad_norm": 5.09375, "learning_rate": 5.379710696612106e-06, "loss": 4.003, "step": 19420 }, { "epoch": 0.9245336886181956, "grad_norm": 4.5, "learning_rate": 5.377331556909022e-06, "loss": 4.0233, "step": 19430 }, { "epoch": 0.9250095165588124, "grad_norm": 4.625, "learning_rate": 5.374952417205939e-06, "loss": 3.9191, "step": 19440 }, { "epoch": 0.925485344499429, "grad_norm": 4.71875, "learning_rate": 5.372573277502855e-06, "loss": 3.8592, "step": 19450 }, { "epoch": 0.9259611724400457, "grad_norm": 4.59375, "learning_rate": 5.370194137799772e-06, "loss": 4.0794, "step": 19460 }, { "epoch": 0.9264370003806623, "grad_norm": 5.15625, "learning_rate": 5.367814998096689e-06, "loss": 4.0792, "step": 19470 }, { "epoch": 0.9269128283212791, "grad_norm": 4.8125, "learning_rate": 5.365435858393605e-06, "loss": 3.9836, "step": 19480 }, { "epoch": 0.9273886562618957, "grad_norm": 5.09375, "learning_rate": 5.363056718690523e-06, "loss": 4.0187, "step": 19490 }, { "epoch": 0.9278644842025123, "grad_norm": 4.90625, "learning_rate": 5.360677578987439e-06, "loss": 3.923, "step": 19500 }, { "epoch": 0.9283403121431291, "grad_norm": 4.59375, "learning_rate": 5.358298439284355e-06, "loss": 3.9866, "step": 19510 }, { "epoch": 0.9288161400837457, "grad_norm": 4.5625, "learning_rate": 5.355919299581272e-06, "loss": 3.9189, "step": 19520 }, { "epoch": 0.9292919680243624, "grad_norm": 5.25, "learning_rate": 5.353540159878189e-06, "loss": 4.0412, "step": 19530 }, { "epoch": 0.929767795964979, "grad_norm": 4.6875, "learning_rate": 5.351161020175106e-06, "loss": 4.1265, "step": 19540 }, { "epoch": 0.9302436239055958, "grad_norm": 4.375, "learning_rate": 5.348781880472022e-06, "loss": 4.0193, "step": 19550 }, { "epoch": 0.9307194518462124, "grad_norm": 4.6875, "learning_rate": 5.346402740768938e-06, "loss": 4.0737, "step": 19560 }, { "epoch": 0.931195279786829, "grad_norm": 5.09375, "learning_rate": 5.3440236010658555e-06, "loss": 4.075, "step": 19570 }, { "epoch": 0.9316711077274458, "grad_norm": 5.0, "learning_rate": 5.3416444613627716e-06, "loss": 4.028, "step": 19580 }, { "epoch": 0.9321469356680624, "grad_norm": 4.71875, "learning_rate": 5.339265321659688e-06, "loss": 4.0638, "step": 19590 }, { "epoch": 0.9326227636086791, "grad_norm": 4.75, "learning_rate": 5.336886181956605e-06, "loss": 4.1315, "step": 19600 }, { "epoch": 0.9330985915492958, "grad_norm": 4.875, "learning_rate": 5.3345070422535215e-06, "loss": 4.031, "step": 19610 }, { "epoch": 0.9335744194899125, "grad_norm": 4.78125, "learning_rate": 5.332127902550438e-06, "loss": 3.8822, "step": 19620 }, { "epoch": 0.9340502474305291, "grad_norm": 5.09375, "learning_rate": 5.3297487628473544e-06, "loss": 3.8538, "step": 19630 }, { "epoch": 0.9345260753711458, "grad_norm": 4.71875, "learning_rate": 5.327369623144271e-06, "loss": 4.1151, "step": 19640 }, { "epoch": 0.9350019033117625, "grad_norm": 4.71875, "learning_rate": 5.324990483441188e-06, "loss": 4.0557, "step": 19650 }, { "epoch": 0.9354777312523791, "grad_norm": 4.3125, "learning_rate": 5.322611343738104e-06, "loss": 3.9549, "step": 19660 }, { "epoch": 0.9359535591929958, "grad_norm": 7.6875, "learning_rate": 5.320232204035022e-06, "loss": 4.0019, "step": 19670 }, { "epoch": 0.9364293871336125, "grad_norm": 5.03125, "learning_rate": 5.317853064331938e-06, "loss": 4.0657, "step": 19680 }, { "epoch": 0.9369052150742292, "grad_norm": 4.34375, "learning_rate": 5.315473924628854e-06, "loss": 3.9312, "step": 19690 }, { "epoch": 0.9373810430148458, "grad_norm": 4.90625, "learning_rate": 5.313094784925771e-06, "loss": 4.0952, "step": 19700 }, { "epoch": 0.9378568709554626, "grad_norm": 4.5, "learning_rate": 5.310715645222688e-06, "loss": 4.0797, "step": 19710 }, { "epoch": 0.9383326988960792, "grad_norm": 4.875, "learning_rate": 5.308336505519604e-06, "loss": 4.0892, "step": 19720 }, { "epoch": 0.9388085268366958, "grad_norm": 4.4375, "learning_rate": 5.305957365816521e-06, "loss": 4.1292, "step": 19730 }, { "epoch": 0.9392843547773125, "grad_norm": 4.65625, "learning_rate": 5.303578226113438e-06, "loss": 4.0745, "step": 19740 }, { "epoch": 0.9397601827179292, "grad_norm": 5.28125, "learning_rate": 5.301199086410355e-06, "loss": 4.0136, "step": 19750 }, { "epoch": 0.9402360106585459, "grad_norm": 4.65625, "learning_rate": 5.298819946707271e-06, "loss": 3.8929, "step": 19760 }, { "epoch": 0.9407118385991625, "grad_norm": 4.75, "learning_rate": 5.296440807004187e-06, "loss": 3.9804, "step": 19770 }, { "epoch": 0.9411876665397793, "grad_norm": 4.40625, "learning_rate": 5.294061667301105e-06, "loss": 4.0769, "step": 19780 }, { "epoch": 0.9416634944803959, "grad_norm": 5.0, "learning_rate": 5.291682527598021e-06, "loss": 4.0737, "step": 19790 }, { "epoch": 0.9421393224210126, "grad_norm": 4.78125, "learning_rate": 5.289303387894938e-06, "loss": 3.9989, "step": 19800 }, { "epoch": 0.9426151503616292, "grad_norm": 4.84375, "learning_rate": 5.286924248191855e-06, "loss": 4.0339, "step": 19810 }, { "epoch": 0.9430909783022459, "grad_norm": 4.8125, "learning_rate": 5.284545108488771e-06, "loss": 4.0341, "step": 19820 }, { "epoch": 0.9435668062428626, "grad_norm": 5.40625, "learning_rate": 5.282165968785688e-06, "loss": 4.0576, "step": 19830 }, { "epoch": 0.9440426341834792, "grad_norm": 4.625, "learning_rate": 5.279786829082604e-06, "loss": 4.09, "step": 19840 }, { "epoch": 0.944518462124096, "grad_norm": 5.25, "learning_rate": 5.2774076893795215e-06, "loss": 3.9835, "step": 19850 }, { "epoch": 0.9449942900647126, "grad_norm": 4.40625, "learning_rate": 5.2750285496764376e-06, "loss": 4.2125, "step": 19860 }, { "epoch": 0.9454701180053293, "grad_norm": 4.75, "learning_rate": 5.272649409973354e-06, "loss": 4.1094, "step": 19870 }, { "epoch": 0.9459459459459459, "grad_norm": 4.25, "learning_rate": 5.2702702702702705e-06, "loss": 3.9201, "step": 19880 }, { "epoch": 0.9464217738865626, "grad_norm": 4.90625, "learning_rate": 5.2678911305671875e-06, "loss": 4.0053, "step": 19890 }, { "epoch": 0.9468976018271793, "grad_norm": 4.90625, "learning_rate": 5.2655119908641035e-06, "loss": 4.0125, "step": 19900 }, { "epoch": 0.9473734297677959, "grad_norm": 4.90625, "learning_rate": 5.2631328511610204e-06, "loss": 4.1343, "step": 19910 }, { "epoch": 0.9478492577084127, "grad_norm": 4.96875, "learning_rate": 5.260753711457937e-06, "loss": 3.9436, "step": 19920 }, { "epoch": 0.9483250856490293, "grad_norm": 4.6875, "learning_rate": 5.258374571754854e-06, "loss": 4.141, "step": 19930 }, { "epoch": 0.948800913589646, "grad_norm": 4.28125, "learning_rate": 5.25599543205177e-06, "loss": 3.9409, "step": 19940 }, { "epoch": 0.9492767415302626, "grad_norm": 4.90625, "learning_rate": 5.253616292348686e-06, "loss": 4.1404, "step": 19950 }, { "epoch": 0.9497525694708794, "grad_norm": 4.6875, "learning_rate": 5.251237152645604e-06, "loss": 4.1091, "step": 19960 }, { "epoch": 0.950228397411496, "grad_norm": 5.09375, "learning_rate": 5.24885801294252e-06, "loss": 4.0123, "step": 19970 }, { "epoch": 0.9507042253521126, "grad_norm": 4.90625, "learning_rate": 5.246478873239436e-06, "loss": 4.1369, "step": 19980 }, { "epoch": 0.9511800532927294, "grad_norm": 4.53125, "learning_rate": 5.244099733536354e-06, "loss": 4.1234, "step": 19990 }, { "epoch": 0.951655881233346, "grad_norm": 4.21875, "learning_rate": 5.24172059383327e-06, "loss": 3.95, "step": 20000 }, { "epoch": 0.9521317091739627, "grad_norm": 4.9375, "learning_rate": 5.239341454130187e-06, "loss": 4.0017, "step": 20010 }, { "epoch": 0.9526075371145794, "grad_norm": 5.09375, "learning_rate": 5.236962314427103e-06, "loss": 4.0684, "step": 20020 }, { "epoch": 0.9530833650551961, "grad_norm": 4.9375, "learning_rate": 5.234583174724021e-06, "loss": 4.0877, "step": 20030 }, { "epoch": 0.9535591929958127, "grad_norm": 4.59375, "learning_rate": 5.232204035020937e-06, "loss": 3.9823, "step": 20040 }, { "epoch": 0.9540350209364293, "grad_norm": 8.0, "learning_rate": 5.229824895317853e-06, "loss": 4.0257, "step": 20050 }, { "epoch": 0.9545108488770461, "grad_norm": 4.9375, "learning_rate": 5.227445755614771e-06, "loss": 3.9853, "step": 20060 }, { "epoch": 0.9549866768176627, "grad_norm": 5.09375, "learning_rate": 5.225066615911687e-06, "loss": 4.0715, "step": 20070 }, { "epoch": 0.9554625047582794, "grad_norm": 4.4375, "learning_rate": 5.222687476208603e-06, "loss": 4.1002, "step": 20080 }, { "epoch": 0.955938332698896, "grad_norm": 4.84375, "learning_rate": 5.22030833650552e-06, "loss": 3.9691, "step": 20090 }, { "epoch": 0.9564141606395128, "grad_norm": 4.90625, "learning_rate": 5.217929196802437e-06, "loss": 3.9903, "step": 20100 }, { "epoch": 0.9568899885801294, "grad_norm": 4.125, "learning_rate": 5.215550057099354e-06, "loss": 4.1298, "step": 20110 }, { "epoch": 0.9573658165207461, "grad_norm": 5.0, "learning_rate": 5.21317091739627e-06, "loss": 4.0603, "step": 20120 }, { "epoch": 0.9578416444613628, "grad_norm": 4.5, "learning_rate": 5.2107917776931875e-06, "loss": 4.201, "step": 20130 }, { "epoch": 0.9583174724019794, "grad_norm": 5.0625, "learning_rate": 5.2084126379901036e-06, "loss": 3.9911, "step": 20140 }, { "epoch": 0.9587933003425961, "grad_norm": 5.15625, "learning_rate": 5.20603349828702e-06, "loss": 4.0497, "step": 20150 }, { "epoch": 0.9592691282832128, "grad_norm": 8.0, "learning_rate": 5.203654358583936e-06, "loss": 3.834, "step": 20160 }, { "epoch": 0.9597449562238295, "grad_norm": 4.5, "learning_rate": 5.2012752188808535e-06, "loss": 4.0842, "step": 20170 }, { "epoch": 0.9602207841644461, "grad_norm": 4.21875, "learning_rate": 5.1988960791777695e-06, "loss": 3.9284, "step": 20180 }, { "epoch": 0.9606966121050629, "grad_norm": 4.4375, "learning_rate": 5.1965169394746864e-06, "loss": 3.9495, "step": 20190 }, { "epoch": 0.9611724400456795, "grad_norm": 4.8125, "learning_rate": 5.1941377997716025e-06, "loss": 4.1183, "step": 20200 }, { "epoch": 0.9616482679862961, "grad_norm": 4.71875, "learning_rate": 5.19175866006852e-06, "loss": 4.078, "step": 20210 }, { "epoch": 0.9621240959269128, "grad_norm": 4.90625, "learning_rate": 5.189379520365436e-06, "loss": 4.0911, "step": 20220 }, { "epoch": 0.9625999238675295, "grad_norm": 4.75, "learning_rate": 5.187000380662352e-06, "loss": 3.9679, "step": 20230 }, { "epoch": 0.9630757518081462, "grad_norm": 4.875, "learning_rate": 5.18462124095927e-06, "loss": 4.0012, "step": 20240 }, { "epoch": 0.9635515797487628, "grad_norm": 4.59375, "learning_rate": 5.182242101256186e-06, "loss": 4.0061, "step": 20250 }, { "epoch": 0.9640274076893796, "grad_norm": 4.78125, "learning_rate": 5.179862961553102e-06, "loss": 4.1169, "step": 20260 }, { "epoch": 0.9645032356299962, "grad_norm": 4.78125, "learning_rate": 5.177483821850019e-06, "loss": 3.9984, "step": 20270 }, { "epoch": 0.9649790635706129, "grad_norm": 4.625, "learning_rate": 5.175104682146936e-06, "loss": 3.8713, "step": 20280 }, { "epoch": 0.9654548915112295, "grad_norm": 4.875, "learning_rate": 5.172725542443853e-06, "loss": 3.9944, "step": 20290 }, { "epoch": 0.9659307194518462, "grad_norm": 4.75, "learning_rate": 5.170346402740769e-06, "loss": 4.0014, "step": 20300 }, { "epoch": 0.9664065473924629, "grad_norm": 4.78125, "learning_rate": 5.167967263037687e-06, "loss": 3.9987, "step": 20310 }, { "epoch": 0.9668823753330795, "grad_norm": 5.21875, "learning_rate": 5.165588123334603e-06, "loss": 3.9802, "step": 20320 }, { "epoch": 0.9673582032736963, "grad_norm": 5.0, "learning_rate": 5.163208983631519e-06, "loss": 4.1073, "step": 20330 }, { "epoch": 0.9678340312143129, "grad_norm": 4.71875, "learning_rate": 5.160829843928435e-06, "loss": 3.8833, "step": 20340 }, { "epoch": 0.9683098591549296, "grad_norm": 4.71875, "learning_rate": 5.158450704225353e-06, "loss": 3.8909, "step": 20350 }, { "epoch": 0.9687856870955462, "grad_norm": 5.28125, "learning_rate": 5.156071564522269e-06, "loss": 4.0156, "step": 20360 }, { "epoch": 0.9692615150361629, "grad_norm": 4.4375, "learning_rate": 5.153692424819186e-06, "loss": 4.0298, "step": 20370 }, { "epoch": 0.9697373429767796, "grad_norm": 5.0625, "learning_rate": 5.151313285116103e-06, "loss": 3.7994, "step": 20380 }, { "epoch": 0.9702131709173962, "grad_norm": 5.03125, "learning_rate": 5.14893414541302e-06, "loss": 4.0858, "step": 20390 }, { "epoch": 0.970688998858013, "grad_norm": 4.625, "learning_rate": 5.146555005709936e-06, "loss": 3.8902, "step": 20400 }, { "epoch": 0.9711648267986296, "grad_norm": 5.375, "learning_rate": 5.144175866006852e-06, "loss": 3.9912, "step": 20410 }, { "epoch": 0.9716406547392463, "grad_norm": 4.71875, "learning_rate": 5.1417967263037696e-06, "loss": 4.0357, "step": 20420 }, { "epoch": 0.972116482679863, "grad_norm": 4.75, "learning_rate": 5.139417586600686e-06, "loss": 4.1135, "step": 20430 }, { "epoch": 0.9725923106204797, "grad_norm": 4.59375, "learning_rate": 5.137038446897602e-06, "loss": 4.0039, "step": 20440 }, { "epoch": 0.9730681385610963, "grad_norm": 4.8125, "learning_rate": 5.134659307194519e-06, "loss": 3.943, "step": 20450 }, { "epoch": 0.9735439665017129, "grad_norm": 4.96875, "learning_rate": 5.1322801674914355e-06, "loss": 4.0479, "step": 20460 }, { "epoch": 0.9740197944423297, "grad_norm": 5.03125, "learning_rate": 5.1299010277883524e-06, "loss": 4.0031, "step": 20470 }, { "epoch": 0.9744956223829463, "grad_norm": 5.0, "learning_rate": 5.1275218880852685e-06, "loss": 4.0293, "step": 20480 }, { "epoch": 0.974971450323563, "grad_norm": 5.21875, "learning_rate": 5.125142748382185e-06, "loss": 3.8232, "step": 20490 }, { "epoch": 0.9754472782641797, "grad_norm": 5.09375, "learning_rate": 5.122763608679102e-06, "loss": 4.221, "step": 20500 }, { "epoch": 0.9759231062047964, "grad_norm": 4.9375, "learning_rate": 5.120384468976018e-06, "loss": 4.0356, "step": 20510 }, { "epoch": 0.976398934145413, "grad_norm": 5.125, "learning_rate": 5.1180053292729345e-06, "loss": 4.0624, "step": 20520 }, { "epoch": 0.9768747620860297, "grad_norm": 4.625, "learning_rate": 5.115626189569852e-06, "loss": 4.0625, "step": 20530 }, { "epoch": 0.9773505900266464, "grad_norm": 4.96875, "learning_rate": 5.113247049866768e-06, "loss": 4.0374, "step": 20540 }, { "epoch": 0.977826417967263, "grad_norm": 4.53125, "learning_rate": 5.110867910163685e-06, "loss": 4.1268, "step": 20550 }, { "epoch": 0.9783022459078797, "grad_norm": 4.9375, "learning_rate": 5.108488770460602e-06, "loss": 4.0087, "step": 20560 }, { "epoch": 0.9787780738484964, "grad_norm": 4.8125, "learning_rate": 5.106109630757519e-06, "loss": 3.9586, "step": 20570 }, { "epoch": 0.9792539017891131, "grad_norm": 4.71875, "learning_rate": 5.103730491054435e-06, "loss": 3.9913, "step": 20580 }, { "epoch": 0.9797297297297297, "grad_norm": 4.71875, "learning_rate": 5.101351351351351e-06, "loss": 4.0459, "step": 20590 }, { "epoch": 0.9802055576703465, "grad_norm": 4.75, "learning_rate": 5.098972211648269e-06, "loss": 4.0418, "step": 20600 }, { "epoch": 0.9806813856109631, "grad_norm": 4.4375, "learning_rate": 5.096593071945185e-06, "loss": 3.9943, "step": 20610 }, { "epoch": 0.9811572135515797, "grad_norm": 5.0, "learning_rate": 5.094213932242101e-06, "loss": 4.1379, "step": 20620 }, { "epoch": 0.9816330414921964, "grad_norm": 4.84375, "learning_rate": 5.091834792539019e-06, "loss": 4.2219, "step": 20630 }, { "epoch": 0.9821088694328131, "grad_norm": 4.5625, "learning_rate": 5.089455652835935e-06, "loss": 4.0024, "step": 20640 }, { "epoch": 0.9825846973734298, "grad_norm": 4.90625, "learning_rate": 5.087076513132852e-06, "loss": 4.2266, "step": 20650 }, { "epoch": 0.9830605253140464, "grad_norm": 4.65625, "learning_rate": 5.084697373429768e-06, "loss": 4.0632, "step": 20660 }, { "epoch": 0.9835363532546632, "grad_norm": 4.8125, "learning_rate": 5.082318233726685e-06, "loss": 4.0177, "step": 20670 }, { "epoch": 0.9840121811952798, "grad_norm": 4.46875, "learning_rate": 5.079939094023602e-06, "loss": 4.1537, "step": 20680 }, { "epoch": 0.9844880091358965, "grad_norm": 4.71875, "learning_rate": 5.077559954320518e-06, "loss": 4.085, "step": 20690 }, { "epoch": 0.9849638370765131, "grad_norm": 4.78125, "learning_rate": 5.0751808146174356e-06, "loss": 4.0558, "step": 20700 }, { "epoch": 0.9854396650171298, "grad_norm": 4.34375, "learning_rate": 5.072801674914352e-06, "loss": 4.1291, "step": 20710 }, { "epoch": 0.9859154929577465, "grad_norm": 5.34375, "learning_rate": 5.070422535211268e-06, "loss": 3.9471, "step": 20720 }, { "epoch": 0.9863913208983631, "grad_norm": 4.5625, "learning_rate": 5.068043395508185e-06, "loss": 4.0796, "step": 20730 }, { "epoch": 0.9868671488389799, "grad_norm": 4.78125, "learning_rate": 5.0656642558051015e-06, "loss": 3.9623, "step": 20740 }, { "epoch": 0.9873429767795965, "grad_norm": 5.0, "learning_rate": 5.063285116102018e-06, "loss": 4.0, "step": 20750 }, { "epoch": 0.9878188047202132, "grad_norm": 5.15625, "learning_rate": 5.0609059763989345e-06, "loss": 4.1379, "step": 20760 }, { "epoch": 0.9882946326608298, "grad_norm": 4.71875, "learning_rate": 5.0585268366958506e-06, "loss": 4.1905, "step": 20770 }, { "epoch": 0.9887704606014465, "grad_norm": 4.65625, "learning_rate": 5.056147696992768e-06, "loss": 4.1477, "step": 20780 }, { "epoch": 0.9892462885420632, "grad_norm": 5.09375, "learning_rate": 5.053768557289684e-06, "loss": 4.1129, "step": 20790 }, { "epoch": 0.9897221164826798, "grad_norm": 4.6875, "learning_rate": 5.0513894175866005e-06, "loss": 4.0265, "step": 20800 }, { "epoch": 0.9901979444232966, "grad_norm": 4.625, "learning_rate": 5.049010277883518e-06, "loss": 3.959, "step": 20810 }, { "epoch": 0.9906737723639132, "grad_norm": 4.5, "learning_rate": 5.046631138180434e-06, "loss": 4.1251, "step": 20820 }, { "epoch": 0.9911496003045299, "grad_norm": 4.8125, "learning_rate": 5.044251998477351e-06, "loss": 4.0912, "step": 20830 }, { "epoch": 0.9916254282451465, "grad_norm": 5.875, "learning_rate": 5.041872858774267e-06, "loss": 4.1579, "step": 20840 }, { "epoch": 0.9921012561857633, "grad_norm": 4.71875, "learning_rate": 5.039493719071184e-06, "loss": 4.0696, "step": 20850 }, { "epoch": 0.9925770841263799, "grad_norm": 4.75, "learning_rate": 5.037114579368101e-06, "loss": 4.048, "step": 20860 }, { "epoch": 0.9930529120669965, "grad_norm": 4.71875, "learning_rate": 5.034735439665017e-06, "loss": 3.9512, "step": 20870 }, { "epoch": 0.9935287400076133, "grad_norm": 5.125, "learning_rate": 5.032356299961935e-06, "loss": 4.1219, "step": 20880 }, { "epoch": 0.9940045679482299, "grad_norm": 4.625, "learning_rate": 5.029977160258851e-06, "loss": 4.1952, "step": 20890 }, { "epoch": 0.9944803958888466, "grad_norm": 5.09375, "learning_rate": 5.027598020555767e-06, "loss": 3.9056, "step": 20900 }, { "epoch": 0.9949562238294632, "grad_norm": 4.84375, "learning_rate": 5.025218880852684e-06, "loss": 3.9348, "step": 20910 }, { "epoch": 0.99543205177008, "grad_norm": 4.875, "learning_rate": 5.022839741149601e-06, "loss": 4.0882, "step": 20920 }, { "epoch": 0.9959078797106966, "grad_norm": 4.6875, "learning_rate": 5.020460601446517e-06, "loss": 4.1227, "step": 20930 }, { "epoch": 0.9963837076513132, "grad_norm": 5.09375, "learning_rate": 5.018081461743434e-06, "loss": 4.0385, "step": 20940 }, { "epoch": 0.99685953559193, "grad_norm": 4.875, "learning_rate": 5.015702322040351e-06, "loss": 3.9661, "step": 20950 }, { "epoch": 0.9973353635325466, "grad_norm": 4.625, "learning_rate": 5.013323182337268e-06, "loss": 4.1853, "step": 20960 }, { "epoch": 0.9978111914731633, "grad_norm": 4.5625, "learning_rate": 5.010944042634184e-06, "loss": 4.0292, "step": 20970 }, { "epoch": 0.99828701941378, "grad_norm": 4.3125, "learning_rate": 5.0085649029311e-06, "loss": 4.0886, "step": 20980 }, { "epoch": 0.9987628473543967, "grad_norm": 4.625, "learning_rate": 5.006185763228018e-06, "loss": 3.9355, "step": 20990 }, { "epoch": 0.9992386752950133, "grad_norm": 4.75, "learning_rate": 5.003806623524934e-06, "loss": 4.1891, "step": 21000 }, { "epoch": 0.99971450323563, "grad_norm": 4.5, "learning_rate": 5.00142748382185e-06, "loss": 4.1802, "step": 21010 } ], "logging_steps": 10, "max_steps": 42032, "num_input_tokens_seen": 0, "num_train_epochs": 2, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 4.1203825328615424e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }