diff --git "a/trainer_state.json" "b/trainer_state.json"
new file mode 100644--- /dev/null
+++ "b/trainer_state.json"
@@ -0,0 +1,53128 @@
+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.9999406751171667,
+  "eval_steps": 500000,
+  "global_step": 75852,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.0003954992188890427,
+      "grad_norm": 3.7074936605397686,
+      "learning_rate": 1.318217769575534e-08,
+      "loss": 2.201,
+      "step": 10
+    },
+    {
+      "epoch": 0.0007909984377780854,
+      "grad_norm": 3.3109913373920854,
+      "learning_rate": 2.636435539151068e-08,
+      "loss": 2.1554,
+      "step": 20
+    },
+    {
+      "epoch": 0.0011864976566671281,
+      "grad_norm": 3.6800607569494184,
+      "learning_rate": 3.9546533087266016e-08,
+      "loss": 2.1944,
+      "step": 30
+    },
+    {
+      "epoch": 0.0015819968755561708,
+      "grad_norm": 3.743190421032586,
+      "learning_rate": 5.272871078302136e-08,
+      "loss": 2.2551,
+      "step": 40
+    },
+    {
+      "epoch": 0.0019774960944452134,
+      "grad_norm": 4.691004471985028,
+      "learning_rate": 6.59108884787767e-08,
+      "loss": 2.3473,
+      "step": 50
+    },
+    {
+      "epoch": 0.0023729953133342562,
+      "grad_norm": 3.8048532622701434,
+      "learning_rate": 7.909306617453203e-08,
+      "loss": 2.328,
+      "step": 60
+    },
+    {
+      "epoch": 0.0027684945322232987,
+      "grad_norm": 4.001415152354601,
+      "learning_rate": 9.227524387028737e-08,
+      "loss": 2.3177,
+      "step": 70
+    },
+    {
+      "epoch": 0.0031639937511123415,
+      "grad_norm": 4.032244142335064,
+      "learning_rate": 1.0545742156604272e-07,
+      "loss": 2.3234,
+      "step": 80
+    },
+    {
+      "epoch": 0.0035594929700013844,
+      "grad_norm": 3.7312290699451545,
+      "learning_rate": 1.1863959926179805e-07,
+      "loss": 2.3067,
+      "step": 90
+    },
+    {
+      "epoch": 0.003954992188890427,
+      "grad_norm": 4.153113538768762,
+      "learning_rate": 1.318217769575534e-07,
+      "loss": 2.3278,
+      "step": 100
+    },
+    {
+      "epoch": 0.00435049140777947,
+      "grad_norm": 4.096269099899376,
+      "learning_rate": 1.4500395465330874e-07,
+      "loss": 2.3194,
+      "step": 110
+    },
+    {
+      "epoch": 0.0047459906266685125,
+      "grad_norm": 3.235160888190004,
+      "learning_rate": 1.5818613234906406e-07,
+      "loss": 2.3154,
+      "step": 120
+    },
+    {
+      "epoch": 0.005141489845557555,
+      "grad_norm": 3.323526631193669,
+      "learning_rate": 1.7136831004481941e-07,
+      "loss": 2.2965,
+      "step": 130
+    },
+    {
+      "epoch": 0.005536989064446597,
+      "grad_norm": 3.6904397977464054,
+      "learning_rate": 1.8455048774057474e-07,
+      "loss": 2.2868,
+      "step": 140
+    },
+    {
+      "epoch": 0.00593248828333564,
+      "grad_norm": 3.602112215723258,
+      "learning_rate": 1.977326654363301e-07,
+      "loss": 2.2448,
+      "step": 150
+    },
+    {
+      "epoch": 0.006327987502224683,
+      "grad_norm": 3.2370179111059896,
+      "learning_rate": 2.1091484313208544e-07,
+      "loss": 2.2357,
+      "step": 160
+    },
+    {
+      "epoch": 0.006723486721113726,
+      "grad_norm": 3.3605614319330064,
+      "learning_rate": 2.2409702082784076e-07,
+      "loss": 2.1997,
+      "step": 170
+    },
+    {
+      "epoch": 0.007118985940002769,
+      "grad_norm": 3.0808008550864012,
+      "learning_rate": 2.372791985235961e-07,
+      "loss": 2.1638,
+      "step": 180
+    },
+    {
+      "epoch": 0.007514485158891812,
+      "grad_norm": 3.0129714869661623,
+      "learning_rate": 2.5046137621935146e-07,
+      "loss": 2.1419,
+      "step": 190
+    },
+    {
+      "epoch": 0.007909984377780854,
+      "grad_norm": 2.6405846741638626,
+      "learning_rate": 2.636435539151068e-07,
+      "loss": 2.1563,
+      "step": 200
+    },
+    {
+      "epoch": 0.008305483596669896,
+      "grad_norm": 2.643582511409625,
+      "learning_rate": 2.7682573161086216e-07,
+      "loss": 2.1003,
+      "step": 210
+    },
+    {
+      "epoch": 0.00870098281555894,
+      "grad_norm": 2.232571959869797,
+      "learning_rate": 2.900079093066175e-07,
+      "loss": 2.0861,
+      "step": 220
+    },
+    {
+      "epoch": 0.009096482034447982,
+      "grad_norm": 2.128301735621245,
+      "learning_rate": 3.031900870023728e-07,
+      "loss": 2.0055,
+      "step": 230
+    },
+    {
+      "epoch": 0.009491981253337025,
+      "grad_norm": 2.020636723569866,
+      "learning_rate": 3.1637226469812813e-07,
+      "loss": 1.9564,
+      "step": 240
+    },
+    {
+      "epoch": 0.009887480472226068,
+      "grad_norm": 2.1434864010939,
+      "learning_rate": 3.295544423938835e-07,
+      "loss": 1.9526,
+      "step": 250
+    },
+    {
+      "epoch": 0.01028297969111511,
+      "grad_norm": 2.1828048079576456,
+      "learning_rate": 3.4273662008963883e-07,
+      "loss": 1.9264,
+      "step": 260
+    },
+    {
+      "epoch": 0.010678478910004154,
+      "grad_norm": 1.9105029100185225,
+      "learning_rate": 3.5591879778539415e-07,
+      "loss": 1.8921,
+      "step": 270
+    },
+    {
+      "epoch": 0.011073978128893195,
+      "grad_norm": 2.4926551290834817,
+      "learning_rate": 3.691009754811495e-07,
+      "loss": 1.8819,
+      "step": 280
+    },
+    {
+      "epoch": 0.011469477347782238,
+      "grad_norm": 1.8847166285822843,
+      "learning_rate": 3.8228315317690485e-07,
+      "loss": 1.8367,
+      "step": 290
+    },
+    {
+      "epoch": 0.01186497656667128,
+      "grad_norm": 1.8019359837801003,
+      "learning_rate": 3.954653308726602e-07,
+      "loss": 1.8826,
+      "step": 300
+    },
+    {
+      "epoch": 0.012260475785560323,
+      "grad_norm": 1.7209235517173358,
+      "learning_rate": 4.086475085684155e-07,
+      "loss": 1.8866,
+      "step": 310
+    },
+    {
+      "epoch": 0.012655975004449366,
+      "grad_norm": 1.6203504074860473,
+      "learning_rate": 4.218296862641709e-07,
+      "loss": 1.8438,
+      "step": 320
+    },
+    {
+      "epoch": 0.013051474223338409,
+      "grad_norm": 1.6671035396275247,
+      "learning_rate": 4.350118639599262e-07,
+      "loss": 1.835,
+      "step": 330
+    },
+    {
+      "epoch": 0.013446973442227452,
+      "grad_norm": 1.5304792384555093,
+      "learning_rate": 4.481940416556815e-07,
+      "loss": 1.809,
+      "step": 340
+    },
+    {
+      "epoch": 0.013842472661116495,
+      "grad_norm": 1.5048385764472376,
+      "learning_rate": 4.613762193514369e-07,
+      "loss": 1.7621,
+      "step": 350
+    },
+    {
+      "epoch": 0.014237971880005537,
+      "grad_norm": 1.7833195775272743,
+      "learning_rate": 4.745583970471922e-07,
+      "loss": 1.7634,
+      "step": 360
+    },
+    {
+      "epoch": 0.01463347109889458,
+      "grad_norm": 1.6045933891412605,
+      "learning_rate": 4.877405747429475e-07,
+      "loss": 1.7578,
+      "step": 370
+    },
+    {
+      "epoch": 0.015028970317783623,
+      "grad_norm": 1.6923943685841414,
+      "learning_rate": 5.009227524387029e-07,
+      "loss": 1.7499,
+      "step": 380
+    },
+    {
+      "epoch": 0.015424469536672664,
+      "grad_norm": 1.746191195367448,
+      "learning_rate": 5.141049301344583e-07,
+      "loss": 1.7321,
+      "step": 390
+    },
+    {
+      "epoch": 0.015819968755561707,
+      "grad_norm": 1.5229493549326838,
+      "learning_rate": 5.272871078302136e-07,
+      "loss": 1.7276,
+      "step": 400
+    },
+    {
+      "epoch": 0.01621546797445075,
+      "grad_norm": 1.550582846508733,
+      "learning_rate": 5.404692855259689e-07,
+      "loss": 1.7101,
+      "step": 410
+    },
+    {
+      "epoch": 0.016610967193339793,
+      "grad_norm": 1.6232357312318062,
+      "learning_rate": 5.536514632217243e-07,
+      "loss": 1.6954,
+      "step": 420
+    },
+    {
+      "epoch": 0.017006466412228836,
+      "grad_norm": 1.5664773994327206,
+      "learning_rate": 5.668336409174796e-07,
+      "loss": 1.7404,
+      "step": 430
+    },
+    {
+      "epoch": 0.01740196563111788,
+      "grad_norm": 1.6713626953240466,
+      "learning_rate": 5.80015818613235e-07,
+      "loss": 1.7557,
+      "step": 440
+    },
+    {
+      "epoch": 0.01779746485000692,
+      "grad_norm": 1.5298539994905176,
+      "learning_rate": 5.931979963089903e-07,
+      "loss": 1.74,
+      "step": 450
+    },
+    {
+      "epoch": 0.018192964068895964,
+      "grad_norm": 1.5719731844258467,
+      "learning_rate": 6.063801740047456e-07,
+      "loss": 1.7228,
+      "step": 460
+    },
+    {
+      "epoch": 0.018588463287785007,
+      "grad_norm": 1.696690769626351,
+      "learning_rate": 6.19562351700501e-07,
+      "loss": 1.6842,
+      "step": 470
+    },
+    {
+      "epoch": 0.01898396250667405,
+      "grad_norm": 1.6477134561193705,
+      "learning_rate": 6.327445293962563e-07,
+      "loss": 1.6852,
+      "step": 480
+    },
+    {
+      "epoch": 0.019379461725563093,
+      "grad_norm": 1.4954021115012037,
+      "learning_rate": 6.459267070920116e-07,
+      "loss": 1.6848,
+      "step": 490
+    },
+    {
+      "epoch": 0.019774960944452136,
+      "grad_norm": 1.4543001068041868,
+      "learning_rate": 6.59108884787767e-07,
+      "loss": 1.6932,
+      "step": 500
+    },
+    {
+      "epoch": 0.02017046016334118,
+      "grad_norm": 1.5572647621051774,
+      "learning_rate": 6.722910624835223e-07,
+      "loss": 1.6939,
+      "step": 510
+    },
+    {
+      "epoch": 0.02056595938223022,
+      "grad_norm": 1.5060407384324919,
+      "learning_rate": 6.854732401792777e-07,
+      "loss": 1.665,
+      "step": 520
+    },
+    {
+      "epoch": 0.020961458601119264,
+      "grad_norm": 1.5473694995949017,
+      "learning_rate": 6.98655417875033e-07,
+      "loss": 1.6816,
+      "step": 530
+    },
+    {
+      "epoch": 0.021356957820008307,
+      "grad_norm": 1.585877480822181,
+      "learning_rate": 7.118375955707883e-07,
+      "loss": 1.6502,
+      "step": 540
+    },
+    {
+      "epoch": 0.021752457038897346,
+      "grad_norm": 1.552409095115036,
+      "learning_rate": 7.250197732665437e-07,
+      "loss": 1.6307,
+      "step": 550
+    },
+    {
+      "epoch": 0.02214795625778639,
+      "grad_norm": 1.4684053904227856,
+      "learning_rate": 7.38201950962299e-07,
+      "loss": 1.6293,
+      "step": 560
+    },
+    {
+      "epoch": 0.022543455476675432,
+      "grad_norm": 1.5474705238190347,
+      "learning_rate": 7.513841286580543e-07,
+      "loss": 1.6238,
+      "step": 570
+    },
+    {
+      "epoch": 0.022938954695564475,
+      "grad_norm": 1.4126204821167332,
+      "learning_rate": 7.645663063538097e-07,
+      "loss": 1.6373,
+      "step": 580
+    },
+    {
+      "epoch": 0.023334453914453518,
+      "grad_norm": 1.566399471829619,
+      "learning_rate": 7.77748484049565e-07,
+      "loss": 1.6068,
+      "step": 590
+    },
+    {
+      "epoch": 0.02372995313334256,
+      "grad_norm": 1.4763703195584525,
+      "learning_rate": 7.909306617453203e-07,
+      "loss": 1.6294,
+      "step": 600
+    },
+    {
+      "epoch": 0.024125452352231604,
+      "grad_norm": 1.4214745635789,
+      "learning_rate": 8.041128394410757e-07,
+      "loss": 1.6015,
+      "step": 610
+    },
+    {
+      "epoch": 0.024520951571120646,
+      "grad_norm": 1.4712408124656606,
+      "learning_rate": 8.17295017136831e-07,
+      "loss": 1.6183,
+      "step": 620
+    },
+    {
+      "epoch": 0.02491645079000969,
+      "grad_norm": 1.3914568736563913,
+      "learning_rate": 8.304771948325864e-07,
+      "loss": 1.5748,
+      "step": 630
+    },
+    {
+      "epoch": 0.025311950008898732,
+      "grad_norm": 1.628242307981156,
+      "learning_rate": 8.436593725283417e-07,
+      "loss": 1.5899,
+      "step": 640
+    },
+    {
+      "epoch": 0.025707449227787775,
+      "grad_norm": 1.5522926075709969,
+      "learning_rate": 8.56841550224097e-07,
+      "loss": 1.6057,
+      "step": 650
+    },
+    {
+      "epoch": 0.026102948446676818,
+      "grad_norm": 1.3563714916369782,
+      "learning_rate": 8.700237279198524e-07,
+      "loss": 1.5756,
+      "step": 660
+    },
+    {
+      "epoch": 0.02649844766556586,
+      "grad_norm": 1.5473797209639122,
+      "learning_rate": 8.832059056156078e-07,
+      "loss": 1.5848,
+      "step": 670
+    },
+    {
+      "epoch": 0.026893946884454904,
+      "grad_norm": 1.3448232230490351,
+      "learning_rate": 8.96388083311363e-07,
+      "loss": 1.5797,
+      "step": 680
+    },
+    {
+      "epoch": 0.027289446103343946,
+      "grad_norm": 1.4678885776783204,
+      "learning_rate": 9.095702610071184e-07,
+      "loss": 1.5598,
+      "step": 690
+    },
+    {
+      "epoch": 0.02768494532223299,
+      "grad_norm": 1.4395122891633945,
+      "learning_rate": 9.227524387028738e-07,
+      "loss": 1.5448,
+      "step": 700
+    },
+    {
+      "epoch": 0.028080444541122032,
+      "grad_norm": 1.3091642457498578,
+      "learning_rate": 9.359346163986291e-07,
+      "loss": 1.5719,
+      "step": 710
+    },
+    {
+      "epoch": 0.028475943760011075,
+      "grad_norm": 1.4465270166087172,
+      "learning_rate": 9.491167940943844e-07,
+      "loss": 1.5232,
+      "step": 720
+    },
+    {
+      "epoch": 0.028871442978900118,
+      "grad_norm": 1.413997577563463,
+      "learning_rate": 9.6229897179014e-07,
+      "loss": 1.5481,
+      "step": 730
+    },
+    {
+      "epoch": 0.02926694219778916,
+      "grad_norm": 1.4180765994412476,
+      "learning_rate": 9.75481149485895e-07,
+      "loss": 1.5304,
+      "step": 740
+    },
+    {
+      "epoch": 0.029662441416678204,
+      "grad_norm": 1.374935810420744,
+      "learning_rate": 9.886633271816505e-07,
+      "loss": 1.5182,
+      "step": 750
+    },
+    {
+      "epoch": 0.030057940635567246,
+      "grad_norm": 1.4614624968528402,
+      "learning_rate": 1.0018455048774058e-06,
+      "loss": 1.5315,
+      "step": 760
+    },
+    {
+      "epoch": 0.030453439854456286,
+      "grad_norm": 1.5654133110212314,
+      "learning_rate": 1.0150276825731612e-06,
+      "loss": 1.5604,
+      "step": 770
+    },
+    {
+      "epoch": 0.03084893907334533,
+      "grad_norm": 1.3406860820189042,
+      "learning_rate": 1.0282098602689166e-06,
+      "loss": 1.5199,
+      "step": 780
+    },
+    {
+      "epoch": 0.03124443829223437,
+      "grad_norm": 1.415493720966217,
+      "learning_rate": 1.0413920379646718e-06,
+      "loss": 1.5086,
+      "step": 790
+    },
+    {
+      "epoch": 0.031639937511123414,
+      "grad_norm": 1.3992787320863422,
+      "learning_rate": 1.0545742156604271e-06,
+      "loss": 1.5212,
+      "step": 800
+    },
+    {
+      "epoch": 0.03203543673001246,
+      "grad_norm": 1.403735804574894,
+      "learning_rate": 1.0677563933561825e-06,
+      "loss": 1.513,
+      "step": 810
+    },
+    {
+      "epoch": 0.0324309359489015,
+      "grad_norm": 1.3853535238209604,
+      "learning_rate": 1.0809385710519379e-06,
+      "loss": 1.5152,
+      "step": 820
+    },
+    {
+      "epoch": 0.032826435167790546,
+      "grad_norm": 1.3750744422194516,
+      "learning_rate": 1.0941207487476933e-06,
+      "loss": 1.5273,
+      "step": 830
+    },
+    {
+      "epoch": 0.033221934386679586,
+      "grad_norm": 1.33200650080592,
+      "learning_rate": 1.1073029264434486e-06,
+      "loss": 1.5034,
+      "step": 840
+    },
+    {
+      "epoch": 0.03361743360556863,
+      "grad_norm": 1.410768669372819,
+      "learning_rate": 1.1204851041392038e-06,
+      "loss": 1.516,
+      "step": 850
+    },
+    {
+      "epoch": 0.03401293282445767,
+      "grad_norm": 1.3774153439612187,
+      "learning_rate": 1.1336672818349592e-06,
+      "loss": 1.5088,
+      "step": 860
+    },
+    {
+      "epoch": 0.03440843204334672,
+      "grad_norm": 1.3779966259760223,
+      "learning_rate": 1.1468494595307146e-06,
+      "loss": 1.5183,
+      "step": 870
+    },
+    {
+      "epoch": 0.03480393126223576,
+      "grad_norm": 1.344217420218594,
+      "learning_rate": 1.16003163722647e-06,
+      "loss": 1.4865,
+      "step": 880
+    },
+    {
+      "epoch": 0.0351994304811248,
+      "grad_norm": 1.432525298396332,
+      "learning_rate": 1.1732138149222253e-06,
+      "loss": 1.4758,
+      "step": 890
+    },
+    {
+      "epoch": 0.03559492970001384,
+      "grad_norm": 1.3306163364600878,
+      "learning_rate": 1.1863959926179807e-06,
+      "loss": 1.4826,
+      "step": 900
+    },
+    {
+      "epoch": 0.03599042891890288,
+      "grad_norm": 1.5235420655487502,
+      "learning_rate": 1.1995781703137358e-06,
+      "loss": 1.4898,
+      "step": 910
+    },
+    {
+      "epoch": 0.03638592813779193,
+      "grad_norm": 1.346509814724087,
+      "learning_rate": 1.2127603480094912e-06,
+      "loss": 1.505,
+      "step": 920
+    },
+    {
+      "epoch": 0.03678142735668097,
+      "grad_norm": 1.3877406779619121,
+      "learning_rate": 1.2259425257052466e-06,
+      "loss": 1.4767,
+      "step": 930
+    },
+    {
+      "epoch": 0.037176926575570014,
+      "grad_norm": 1.3846214860230308,
+      "learning_rate": 1.239124703401002e-06,
+      "loss": 1.4724,
+      "step": 940
+    },
+    {
+      "epoch": 0.037572425794459054,
+      "grad_norm": 1.2817258137618222,
+      "learning_rate": 1.2523068810967574e-06,
+      "loss": 1.4713,
+      "step": 950
+    },
+    {
+      "epoch": 0.0379679250133481,
+      "grad_norm": 1.3439675775454174,
+      "learning_rate": 1.2654890587925125e-06,
+      "loss": 1.4752,
+      "step": 960
+    },
+    {
+      "epoch": 0.03836342423223714,
+      "grad_norm": 1.3554867357811966,
+      "learning_rate": 1.278671236488268e-06,
+      "loss": 1.4562,
+      "step": 970
+    },
+    {
+      "epoch": 0.038758923451126186,
+      "grad_norm": 1.2790133713776188,
+      "learning_rate": 1.2918534141840233e-06,
+      "loss": 1.4674,
+      "step": 980
+    },
+    {
+      "epoch": 0.039154422670015225,
+      "grad_norm": 1.343329302614488,
+      "learning_rate": 1.3050355918797786e-06,
+      "loss": 1.4666,
+      "step": 990
+    },
+    {
+      "epoch": 0.03954992188890427,
+      "grad_norm": 1.2672292495398485,
+      "learning_rate": 1.318217769575534e-06,
+      "loss": 1.4556,
+      "step": 1000
+    },
+    {
+      "epoch": 0.03994542110779331,
+      "grad_norm": 1.3037813733806745,
+      "learning_rate": 1.3313999472712894e-06,
+      "loss": 1.4497,
+      "step": 1010
+    },
+    {
+      "epoch": 0.04034092032668236,
+      "grad_norm": 1.345660418942053,
+      "learning_rate": 1.3445821249670446e-06,
+      "loss": 1.4426,
+      "step": 1020
+    },
+    {
+      "epoch": 0.040736419545571396,
+      "grad_norm": 1.2609132287515321,
+      "learning_rate": 1.3577643026628001e-06,
+      "loss": 1.4565,
+      "step": 1030
+    },
+    {
+      "epoch": 0.04113191876446044,
+      "grad_norm": 1.374665502512752,
+      "learning_rate": 1.3709464803585553e-06,
+      "loss": 1.4412,
+      "step": 1040
+    },
+    {
+      "epoch": 0.04152741798334948,
+      "grad_norm": 1.3081072490504966,
+      "learning_rate": 1.3841286580543107e-06,
+      "loss": 1.4402,
+      "step": 1050
+    },
+    {
+      "epoch": 0.04192291720223853,
+      "grad_norm": 1.4330687455032025,
+      "learning_rate": 1.397310835750066e-06,
+      "loss": 1.4683,
+      "step": 1060
+    },
+    {
+      "epoch": 0.04231841642112757,
+      "grad_norm": 1.3606849988116267,
+      "learning_rate": 1.4104930134458214e-06,
+      "loss": 1.4333,
+      "step": 1070
+    },
+    {
+      "epoch": 0.042713915640016614,
+      "grad_norm": 1.3074383876921958,
+      "learning_rate": 1.4236751911415766e-06,
+      "loss": 1.4498,
+      "step": 1080
+    },
+    {
+      "epoch": 0.043109414858905654,
+      "grad_norm": 1.4042081366074939,
+      "learning_rate": 1.4368573688373322e-06,
+      "loss": 1.4528,
+      "step": 1090
+    },
+    {
+      "epoch": 0.04350491407779469,
+      "grad_norm": 1.272696004817707,
+      "learning_rate": 1.4500395465330874e-06,
+      "loss": 1.4277,
+      "step": 1100
+    },
+    {
+      "epoch": 0.04390041329668374,
+      "grad_norm": 1.3590575435554115,
+      "learning_rate": 1.4632217242288427e-06,
+      "loss": 1.4233,
+      "step": 1110
+    },
+    {
+      "epoch": 0.04429591251557278,
+      "grad_norm": 1.2769709382668435,
+      "learning_rate": 1.476403901924598e-06,
+      "loss": 1.4244,
+      "step": 1120
+    },
+    {
+      "epoch": 0.044691411734461825,
+      "grad_norm": 1.2345551348610775,
+      "learning_rate": 1.4895860796203535e-06,
+      "loss": 1.4428,
+      "step": 1130
+    },
+    {
+      "epoch": 0.045086910953350864,
+      "grad_norm": 1.4051433004667393,
+      "learning_rate": 1.5027682573161087e-06,
+      "loss": 1.4464,
+      "step": 1140
+    },
+    {
+      "epoch": 0.04548241017223991,
+      "grad_norm": 1.2673234929231048,
+      "learning_rate": 1.515950435011864e-06,
+      "loss": 1.4083,
+      "step": 1150
+    },
+    {
+      "epoch": 0.04587790939112895,
+      "grad_norm": 1.2619921688980378,
+      "learning_rate": 1.5291326127076194e-06,
+      "loss": 1.4198,
+      "step": 1160
+    },
+    {
+      "epoch": 0.046273408610017996,
+      "grad_norm": 1.3402764138765177,
+      "learning_rate": 1.5423147904033748e-06,
+      "loss": 1.4147,
+      "step": 1170
+    },
+    {
+      "epoch": 0.046668907828907036,
+      "grad_norm": 1.3856239712083365,
+      "learning_rate": 1.55549696809913e-06,
+      "loss": 1.4184,
+      "step": 1180
+    },
+    {
+      "epoch": 0.04706440704779608,
+      "grad_norm": 1.3189510305969174,
+      "learning_rate": 1.5686791457948855e-06,
+      "loss": 1.4032,
+      "step": 1190
+    },
+    {
+      "epoch": 0.04745990626668512,
+      "grad_norm": 1.3714820128166048,
+      "learning_rate": 1.5818613234906407e-06,
+      "loss": 1.4088,
+      "step": 1200
+    },
+    {
+      "epoch": 0.04785540548557417,
+      "grad_norm": 1.1741079276066009,
+      "learning_rate": 1.595043501186396e-06,
+      "loss": 1.4101,
+      "step": 1210
+    },
+    {
+      "epoch": 0.04825090470446321,
+      "grad_norm": 1.2891130628308234,
+      "learning_rate": 1.6082256788821515e-06,
+      "loss": 1.3912,
+      "step": 1220
+    },
+    {
+      "epoch": 0.048646403923352254,
+      "grad_norm": 1.269826134186319,
+      "learning_rate": 1.6214078565779068e-06,
+      "loss": 1.3908,
+      "step": 1230
+    },
+    {
+      "epoch": 0.04904190314224129,
+      "grad_norm": 1.2966310224708233,
+      "learning_rate": 1.634590034273662e-06,
+      "loss": 1.4013,
+      "step": 1240
+    },
+    {
+      "epoch": 0.04943740236113034,
+      "grad_norm": 1.2623601906553965,
+      "learning_rate": 1.6477722119694176e-06,
+      "loss": 1.3954,
+      "step": 1250
+    },
+    {
+      "epoch": 0.04983290158001938,
+      "grad_norm": 1.2093054401404613,
+      "learning_rate": 1.6609543896651727e-06,
+      "loss": 1.3933,
+      "step": 1260
+    },
+    {
+      "epoch": 0.050228400798908425,
+      "grad_norm": 1.312509373739013,
+      "learning_rate": 1.6741365673609281e-06,
+      "loss": 1.4016,
+      "step": 1270
+    },
+    {
+      "epoch": 0.050623900017797464,
+      "grad_norm": 1.2875671531479649,
+      "learning_rate": 1.6873187450566835e-06,
+      "loss": 1.3783,
+      "step": 1280
+    },
+    {
+      "epoch": 0.05101939923668651,
+      "grad_norm": 1.2478098054962192,
+      "learning_rate": 1.7005009227524389e-06,
+      "loss": 1.4164,
+      "step": 1290
+    },
+    {
+      "epoch": 0.05141489845557555,
+      "grad_norm": 1.2796969742474207,
+      "learning_rate": 1.713683100448194e-06,
+      "loss": 1.4055,
+      "step": 1300
+    },
+    {
+      "epoch": 0.051810397674464596,
+      "grad_norm": 1.2046753206553034,
+      "learning_rate": 1.7268652781439496e-06,
+      "loss": 1.3803,
+      "step": 1310
+    },
+    {
+      "epoch": 0.052205896893353636,
+      "grad_norm": 1.2212394785406553,
+      "learning_rate": 1.7400474558397048e-06,
+      "loss": 1.3904,
+      "step": 1320
+    },
+    {
+      "epoch": 0.052601396112242675,
+      "grad_norm": 1.2646871837332208,
+      "learning_rate": 1.7532296335354602e-06,
+      "loss": 1.3713,
+      "step": 1330
+    },
+    {
+      "epoch": 0.05299689533113172,
+      "grad_norm": 1.1867445931970122,
+      "learning_rate": 1.7664118112312155e-06,
+      "loss": 1.4019,
+      "step": 1340
+    },
+    {
+      "epoch": 0.05339239455002076,
+      "grad_norm": 1.3679672932922804,
+      "learning_rate": 1.779593988926971e-06,
+      "loss": 1.4007,
+      "step": 1350
+    },
+    {
+      "epoch": 0.05378789376890981,
+      "grad_norm": 1.2031507878416894,
+      "learning_rate": 1.792776166622726e-06,
+      "loss": 1.4041,
+      "step": 1360
+    },
+    {
+      "epoch": 0.05418339298779885,
+      "grad_norm": 1.3930446030305885,
+      "learning_rate": 1.8059583443184817e-06,
+      "loss": 1.3709,
+      "step": 1370
+    },
+    {
+      "epoch": 0.05457889220668789,
+      "grad_norm": 1.3165081578311295,
+      "learning_rate": 1.8191405220142368e-06,
+      "loss": 1.3929,
+      "step": 1380
+    },
+    {
+      "epoch": 0.05497439142557693,
+      "grad_norm": 1.3804310114347087,
+      "learning_rate": 1.8323226997099922e-06,
+      "loss": 1.371,
+      "step": 1390
+    },
+    {
+      "epoch": 0.05536989064446598,
+      "grad_norm": 1.2659266021240543,
+      "learning_rate": 1.8455048774057476e-06,
+      "loss": 1.3599,
+      "step": 1400
+    },
+    {
+      "epoch": 0.05576538986335502,
+      "grad_norm": 1.1998894502716475,
+      "learning_rate": 1.858687055101503e-06,
+      "loss": 1.3698,
+      "step": 1410
+    },
+    {
+      "epoch": 0.056160889082244064,
+      "grad_norm": 1.1797459236151298,
+      "learning_rate": 1.8718692327972581e-06,
+      "loss": 1.3725,
+      "step": 1420
+    },
+    {
+      "epoch": 0.056556388301133104,
+      "grad_norm": 1.3850200456812771,
+      "learning_rate": 1.8850514104930137e-06,
+      "loss": 1.3904,
+      "step": 1430
+    },
+    {
+      "epoch": 0.05695188752002215,
+      "grad_norm": 1.284926362127649,
+      "learning_rate": 1.8982335881887689e-06,
+      "loss": 1.3617,
+      "step": 1440
+    },
+    {
+      "epoch": 0.05734738673891119,
+      "grad_norm": 1.3130108102859457,
+      "learning_rate": 1.9114157658845243e-06,
+      "loss": 1.387,
+      "step": 1450
+    },
+    {
+      "epoch": 0.057742885957800236,
+      "grad_norm": 1.2515641080831983,
+      "learning_rate": 1.92459794358028e-06,
+      "loss": 1.3434,
+      "step": 1460
+    },
+    {
+      "epoch": 0.058138385176689275,
+      "grad_norm": 1.2855780839226325,
+      "learning_rate": 1.937780121276035e-06,
+      "loss": 1.3818,
+      "step": 1470
+    },
+    {
+      "epoch": 0.05853388439557832,
+      "grad_norm": 1.3951421747758692,
+      "learning_rate": 1.95096229897179e-06,
+      "loss": 1.3525,
+      "step": 1480
+    },
+    {
+      "epoch": 0.05892938361446736,
+      "grad_norm": 1.319151820190548,
+      "learning_rate": 1.9641444766675458e-06,
+      "loss": 1.369,
+      "step": 1490
+    },
+    {
+      "epoch": 0.05932488283335641,
+      "grad_norm": 1.283286982030512,
+      "learning_rate": 1.977326654363301e-06,
+      "loss": 1.3727,
+      "step": 1500
+    },
+    {
+      "epoch": 0.059720382052245446,
+      "grad_norm": 1.225386210337589,
+      "learning_rate": 1.990508832059056e-06,
+      "loss": 1.3847,
+      "step": 1510
+    },
+    {
+      "epoch": 0.06011588127113449,
+      "grad_norm": 1.3323436425897397,
+      "learning_rate": 2.0036910097548117e-06,
+      "loss": 1.368,
+      "step": 1520
+    },
+    {
+      "epoch": 0.06051138049002353,
+      "grad_norm": 1.2114254618537965,
+      "learning_rate": 2.016873187450567e-06,
+      "loss": 1.3842,
+      "step": 1530
+    },
+    {
+      "epoch": 0.06090687970891257,
+      "grad_norm": 1.174176153009977,
+      "learning_rate": 2.0300553651463224e-06,
+      "loss": 1.382,
+      "step": 1540
+    },
+    {
+      "epoch": 0.06130237892780162,
+      "grad_norm": 1.2213823235440902,
+      "learning_rate": 2.043237542842078e-06,
+      "loss": 1.3775,
+      "step": 1550
+    },
+    {
+      "epoch": 0.06169787814669066,
+      "grad_norm": 1.3067532689182895,
+      "learning_rate": 2.056419720537833e-06,
+      "loss": 1.3546,
+      "step": 1560
+    },
+    {
+      "epoch": 0.062093377365579704,
+      "grad_norm": 1.2405561143563724,
+      "learning_rate": 2.0696018982335883e-06,
+      "loss": 1.3793,
+      "step": 1570
+    },
+    {
+      "epoch": 0.06248887658446874,
+      "grad_norm": 1.3444825192839442,
+      "learning_rate": 2.0827840759293435e-06,
+      "loss": 1.3568,
+      "step": 1580
+    },
+    {
+      "epoch": 0.06288437580335779,
+      "grad_norm": 1.3190960946200965,
+      "learning_rate": 2.095966253625099e-06,
+      "loss": 1.359,
+      "step": 1590
+    },
+    {
+      "epoch": 0.06327987502224683,
+      "grad_norm": 1.2444707895977416,
+      "learning_rate": 2.1091484313208543e-06,
+      "loss": 1.3519,
+      "step": 1600
+    },
+    {
+      "epoch": 0.06367537424113587,
+      "grad_norm": 1.2858248835895856,
+      "learning_rate": 2.1223306090166094e-06,
+      "loss": 1.3542,
+      "step": 1610
+    },
+    {
+      "epoch": 0.06407087346002492,
+      "grad_norm": 1.2093456894989756,
+      "learning_rate": 2.135512786712365e-06,
+      "loss": 1.3507,
+      "step": 1620
+    },
+    {
+      "epoch": 0.06446637267891396,
+      "grad_norm": 1.2013316230182205,
+      "learning_rate": 2.14869496440812e-06,
+      "loss": 1.3818,
+      "step": 1630
+    },
+    {
+      "epoch": 0.064861871897803,
+      "grad_norm": 1.2980141671010164,
+      "learning_rate": 2.1618771421038758e-06,
+      "loss": 1.3641,
+      "step": 1640
+    },
+    {
+      "epoch": 0.06525737111669204,
+      "grad_norm": 1.2346815137921718,
+      "learning_rate": 2.175059319799631e-06,
+      "loss": 1.3461,
+      "step": 1650
+    },
+    {
+      "epoch": 0.06565287033558109,
+      "grad_norm": 1.2096389907928575,
+      "learning_rate": 2.1882414974953865e-06,
+      "loss": 1.3343,
+      "step": 1660
+    },
+    {
+      "epoch": 0.06604836955447013,
+      "grad_norm": 1.3952439518632846,
+      "learning_rate": 2.2014236751911417e-06,
+      "loss": 1.3375,
+      "step": 1670
+    },
+    {
+      "epoch": 0.06644386877335917,
+      "grad_norm": 1.174174309336657,
+      "learning_rate": 2.2146058528868973e-06,
+      "loss": 1.3411,
+      "step": 1680
+    },
+    {
+      "epoch": 0.06683936799224821,
+      "grad_norm": 1.2765076663404167,
+      "learning_rate": 2.2277880305826524e-06,
+      "loss": 1.3432,
+      "step": 1690
+    },
+    {
+      "epoch": 0.06723486721113726,
+      "grad_norm": 1.3370795855751043,
+      "learning_rate": 2.2409702082784076e-06,
+      "loss": 1.3489,
+      "step": 1700
+    },
+    {
+      "epoch": 0.0676303664300263,
+      "grad_norm": 1.2366580352436853,
+      "learning_rate": 2.254152385974163e-06,
+      "loss": 1.3262,
+      "step": 1710
+    },
+    {
+      "epoch": 0.06802586564891534,
+      "grad_norm": 1.2943364201255696,
+      "learning_rate": 2.2673345636699184e-06,
+      "loss": 1.3339,
+      "step": 1720
+    },
+    {
+      "epoch": 0.06842136486780438,
+      "grad_norm": 1.196691594127321,
+      "learning_rate": 2.2805167413656735e-06,
+      "loss": 1.3379,
+      "step": 1730
+    },
+    {
+      "epoch": 0.06881686408669344,
+      "grad_norm": 1.2142181703444799,
+      "learning_rate": 2.293698919061429e-06,
+      "loss": 1.3515,
+      "step": 1740
+    },
+    {
+      "epoch": 0.06921236330558247,
+      "grad_norm": 1.172172263971748,
+      "learning_rate": 2.3068810967571843e-06,
+      "loss": 1.3424,
+      "step": 1750
+    },
+    {
+      "epoch": 0.06960786252447151,
+      "grad_norm": 1.1981525906535253,
+      "learning_rate": 2.32006327445294e-06,
+      "loss": 1.3359,
+      "step": 1760
+    },
+    {
+      "epoch": 0.07000336174336055,
+      "grad_norm": 1.1121572319368689,
+      "learning_rate": 2.3332454521486954e-06,
+      "loss": 1.3492,
+      "step": 1770
+    },
+    {
+      "epoch": 0.0703988609622496,
+      "grad_norm": 1.3609407003058103,
+      "learning_rate": 2.3464276298444506e-06,
+      "loss": 1.3494,
+      "step": 1780
+    },
+    {
+      "epoch": 0.07079436018113865,
+      "grad_norm": 1.195358728366932,
+      "learning_rate": 2.3596098075402058e-06,
+      "loss": 1.3133,
+      "step": 1790
+    },
+    {
+      "epoch": 0.07118985940002769,
+      "grad_norm": 1.2660297564882381,
+      "learning_rate": 2.3727919852359614e-06,
+      "loss": 1.3286,
+      "step": 1800
+    },
+    {
+      "epoch": 0.07158535861891673,
+      "grad_norm": 1.1846403707730422,
+      "learning_rate": 2.3859741629317165e-06,
+      "loss": 1.3337,
+      "step": 1810
+    },
+    {
+      "epoch": 0.07198085783780576,
+      "grad_norm": 1.2419714851604897,
+      "learning_rate": 2.3991563406274717e-06,
+      "loss": 1.3444,
+      "step": 1820
+    },
+    {
+      "epoch": 0.07237635705669482,
+      "grad_norm": 1.2247587012064896,
+      "learning_rate": 2.4123385183232273e-06,
+      "loss": 1.3297,
+      "step": 1830
+    },
+    {
+      "epoch": 0.07277185627558386,
+      "grad_norm": 1.2367202399241926,
+      "learning_rate": 2.4255206960189824e-06,
+      "loss": 1.3336,
+      "step": 1840
+    },
+    {
+      "epoch": 0.0731673554944729,
+      "grad_norm": 1.15162704981469,
+      "learning_rate": 2.4387028737147376e-06,
+      "loss": 1.3304,
+      "step": 1850
+    },
+    {
+      "epoch": 0.07356285471336194,
+      "grad_norm": 1.2725548856923554,
+      "learning_rate": 2.451885051410493e-06,
+      "loss": 1.3135,
+      "step": 1860
+    },
+    {
+      "epoch": 0.07395835393225099,
+      "grad_norm": 1.2219938763619118,
+      "learning_rate": 2.4650672291062484e-06,
+      "loss": 1.3318,
+      "step": 1870
+    },
+    {
+      "epoch": 0.07435385315114003,
+      "grad_norm": 1.1414783727229223,
+      "learning_rate": 2.478249406802004e-06,
+      "loss": 1.3201,
+      "step": 1880
+    },
+    {
+      "epoch": 0.07474935237002907,
+      "grad_norm": 1.261774884225301,
+      "learning_rate": 2.4914315844977595e-06,
+      "loss": 1.3163,
+      "step": 1890
+    },
+    {
+      "epoch": 0.07514485158891811,
+      "grad_norm": 1.3685126623521886,
+      "learning_rate": 2.5046137621935147e-06,
+      "loss": 1.3393,
+      "step": 1900
+    },
+    {
+      "epoch": 0.07554035080780716,
+      "grad_norm": 1.2255559190916123,
+      "learning_rate": 2.51779593988927e-06,
+      "loss": 1.3089,
+      "step": 1910
+    },
+    {
+      "epoch": 0.0759358500266962,
+      "grad_norm": 1.1766220763492885,
+      "learning_rate": 2.530978117585025e-06,
+      "loss": 1.3395,
+      "step": 1920
+    },
+    {
+      "epoch": 0.07633134924558524,
+      "grad_norm": 1.2164090577880697,
+      "learning_rate": 2.54416029528078e-06,
+      "loss": 1.3402,
+      "step": 1930
+    },
+    {
+      "epoch": 0.07672684846447428,
+      "grad_norm": 1.1656840897879306,
+      "learning_rate": 2.557342472976536e-06,
+      "loss": 1.3193,
+      "step": 1940
+    },
+    {
+      "epoch": 0.07712234768336333,
+      "grad_norm": 1.2524143523318216,
+      "learning_rate": 2.5705246506722914e-06,
+      "loss": 1.3132,
+      "step": 1950
+    },
+    {
+      "epoch": 0.07751784690225237,
+      "grad_norm": 1.2740067789155567,
+      "learning_rate": 2.5837068283680465e-06,
+      "loss": 1.3409,
+      "step": 1960
+    },
+    {
+      "epoch": 0.07791334612114141,
+      "grad_norm": 1.2041116051593224,
+      "learning_rate": 2.5968890060638017e-06,
+      "loss": 1.3129,
+      "step": 1970
+    },
+    {
+      "epoch": 0.07830884534003045,
+      "grad_norm": 1.297301623510751,
+      "learning_rate": 2.6100711837595573e-06,
+      "loss": 1.3177,
+      "step": 1980
+    },
+    {
+      "epoch": 0.07870434455891949,
+      "grad_norm": 1.1837368447180556,
+      "learning_rate": 2.6232533614553125e-06,
+      "loss": 1.3111,
+      "step": 1990
+    },
+    {
+      "epoch": 0.07909984377780854,
+      "grad_norm": 1.2251227411035477,
+      "learning_rate": 2.636435539151068e-06,
+      "loss": 1.3142,
+      "step": 2000
+    },
+    {
+      "epoch": 0.07949534299669758,
+      "grad_norm": 1.225277517784227,
+      "learning_rate": 2.6496177168468236e-06,
+      "loss": 1.3097,
+      "step": 2010
+    },
+    {
+      "epoch": 0.07989084221558662,
+      "grad_norm": 1.2877850891977878,
+      "learning_rate": 2.662799894542579e-06,
+      "loss": 1.3242,
+      "step": 2020
+    },
+    {
+      "epoch": 0.08028634143447566,
+      "grad_norm": 1.1227169091442835,
+      "learning_rate": 2.675982072238334e-06,
+      "loss": 1.3134,
+      "step": 2030
+    },
+    {
+      "epoch": 0.08068184065336471,
+      "grad_norm": 1.233621741322885,
+      "learning_rate": 2.689164249934089e-06,
+      "loss": 1.3349,
+      "step": 2040
+    },
+    {
+      "epoch": 0.08107733987225375,
+      "grad_norm": 1.1777945722776684,
+      "learning_rate": 2.7023464276298443e-06,
+      "loss": 1.321,
+      "step": 2050
+    },
+    {
+      "epoch": 0.08147283909114279,
+      "grad_norm": 1.2902735762570336,
+      "learning_rate": 2.7155286053256003e-06,
+      "loss": 1.3025,
+      "step": 2060
+    },
+    {
+      "epoch": 0.08186833831003183,
+      "grad_norm": 1.214252890627205,
+      "learning_rate": 2.7287107830213555e-06,
+      "loss": 1.309,
+      "step": 2070
+    },
+    {
+      "epoch": 0.08226383752892089,
+      "grad_norm": 1.157680702032122,
+      "learning_rate": 2.7418929607171106e-06,
+      "loss": 1.295,
+      "step": 2080
+    },
+    {
+      "epoch": 0.08265933674780993,
+      "grad_norm": 1.057546069052866,
+      "learning_rate": 2.755075138412866e-06,
+      "loss": 1.3222,
+      "step": 2090
+    },
+    {
+      "epoch": 0.08305483596669896,
+      "grad_norm": 1.235513698983506,
+      "learning_rate": 2.7682573161086214e-06,
+      "loss": 1.3249,
+      "step": 2100
+    },
+    {
+      "epoch": 0.083450335185588,
+      "grad_norm": 1.1229256349512073,
+      "learning_rate": 2.7814394938043765e-06,
+      "loss": 1.3007,
+      "step": 2110
+    },
+    {
+      "epoch": 0.08384583440447706,
+      "grad_norm": 1.2496919968109956,
+      "learning_rate": 2.794621671500132e-06,
+      "loss": 1.3014,
+      "step": 2120
+    },
+    {
+      "epoch": 0.0842413336233661,
+      "grad_norm": 1.1054196498776563,
+      "learning_rate": 2.8078038491958877e-06,
+      "loss": 1.305,
+      "step": 2130
+    },
+    {
+      "epoch": 0.08463683284225514,
+      "grad_norm": 1.2328726354969368,
+      "learning_rate": 2.820986026891643e-06,
+      "loss": 1.2916,
+      "step": 2140
+    },
+    {
+      "epoch": 0.08503233206114418,
+      "grad_norm": 1.289009845738552,
+      "learning_rate": 2.834168204587398e-06,
+      "loss": 1.3008,
+      "step": 2150
+    },
+    {
+      "epoch": 0.08542783128003323,
+      "grad_norm": 1.2111929582849519,
+      "learning_rate": 2.8473503822831532e-06,
+      "loss": 1.3128,
+      "step": 2160
+    },
+    {
+      "epoch": 0.08582333049892227,
+      "grad_norm": 1.2033677687792834,
+      "learning_rate": 2.8605325599789084e-06,
+      "loss": 1.3147,
+      "step": 2170
+    },
+    {
+      "epoch": 0.08621882971781131,
+      "grad_norm": 1.261854934115352,
+      "learning_rate": 2.8737147376746644e-06,
+      "loss": 1.3069,
+      "step": 2180
+    },
+    {
+      "epoch": 0.08661432893670035,
+      "grad_norm": 1.3012047565607785,
+      "learning_rate": 2.8868969153704196e-06,
+      "loss": 1.2983,
+      "step": 2190
+    },
+    {
+      "epoch": 0.08700982815558939,
+      "grad_norm": 1.2549646960874847,
+      "learning_rate": 2.9000790930661747e-06,
+      "loss": 1.2962,
+      "step": 2200
+    },
+    {
+      "epoch": 0.08740532737447844,
+      "grad_norm": 1.10280154543323,
+      "learning_rate": 2.9132612707619303e-06,
+      "loss": 1.2977,
+      "step": 2210
+    },
+    {
+      "epoch": 0.08780082659336748,
+      "grad_norm": 1.1953750453391896,
+      "learning_rate": 2.9264434484576855e-06,
+      "loss": 1.3066,
+      "step": 2220
+    },
+    {
+      "epoch": 0.08819632581225652,
+      "grad_norm": 1.173115521592014,
+      "learning_rate": 2.9396256261534406e-06,
+      "loss": 1.3035,
+      "step": 2230
+    },
+    {
+      "epoch": 0.08859182503114556,
+      "grad_norm": 1.3534880527534277,
+      "learning_rate": 2.952807803849196e-06,
+      "loss": 1.307,
+      "step": 2240
+    },
+    {
+      "epoch": 0.08898732425003461,
+      "grad_norm": 1.2387759451415656,
+      "learning_rate": 2.965989981544952e-06,
+      "loss": 1.2956,
+      "step": 2250
+    },
+    {
+      "epoch": 0.08938282346892365,
+      "grad_norm": 1.2677066907321448,
+      "learning_rate": 2.979172159240707e-06,
+      "loss": 1.2928,
+      "step": 2260
+    },
+    {
+      "epoch": 0.08977832268781269,
+      "grad_norm": 1.1372495129274685,
+      "learning_rate": 2.992354336936462e-06,
+      "loss": 1.302,
+      "step": 2270
+    },
+    {
+      "epoch": 0.09017382190670173,
+      "grad_norm": 1.1894113283549337,
+      "learning_rate": 3.0055365146322173e-06,
+      "loss": 1.2806,
+      "step": 2280
+    },
+    {
+      "epoch": 0.09056932112559078,
+      "grad_norm": 1.1347442967062604,
+      "learning_rate": 3.0187186923279725e-06,
+      "loss": 1.3028,
+      "step": 2290
+    },
+    {
+      "epoch": 0.09096482034447982,
+      "grad_norm": 1.2641426738557546,
+      "learning_rate": 3.031900870023728e-06,
+      "loss": 1.3049,
+      "step": 2300
+    },
+    {
+      "epoch": 0.09136031956336886,
+      "grad_norm": 1.2912543277413953,
+      "learning_rate": 3.0450830477194836e-06,
+      "loss": 1.2942,
+      "step": 2310
+    },
+    {
+      "epoch": 0.0917558187822579,
+      "grad_norm": 1.1640861851973205,
+      "learning_rate": 3.058265225415239e-06,
+      "loss": 1.2676,
+      "step": 2320
+    },
+    {
+      "epoch": 0.09215131800114695,
+      "grad_norm": 1.2761397336573672,
+      "learning_rate": 3.0714474031109944e-06,
+      "loss": 1.2743,
+      "step": 2330
+    },
+    {
+      "epoch": 0.09254681722003599,
+      "grad_norm": 1.3336958425243306,
+      "learning_rate": 3.0846295808067496e-06,
+      "loss": 1.3095,
+      "step": 2340
+    },
+    {
+      "epoch": 0.09294231643892503,
+      "grad_norm": 1.2293364502627973,
+      "learning_rate": 3.0978117585025047e-06,
+      "loss": 1.2689,
+      "step": 2350
+    },
+    {
+      "epoch": 0.09333781565781407,
+      "grad_norm": 1.1964016901569907,
+      "learning_rate": 3.11099393619826e-06,
+      "loss": 1.2988,
+      "step": 2360
+    },
+    {
+      "epoch": 0.09373331487670312,
+      "grad_norm": 1.1652688763465373,
+      "learning_rate": 3.124176113894016e-06,
+      "loss": 1.2951,
+      "step": 2370
+    },
+    {
+      "epoch": 0.09412881409559216,
+      "grad_norm": 1.1857748450845993,
+      "learning_rate": 3.137358291589771e-06,
+      "loss": 1.2822,
+      "step": 2380
+    },
+    {
+      "epoch": 0.0945243133144812,
+      "grad_norm": 1.1673509054243105,
+      "learning_rate": 3.1505404692855262e-06,
+      "loss": 1.2938,
+      "step": 2390
+    },
+    {
+      "epoch": 0.09491981253337024,
+      "grad_norm": 1.2296260905483272,
+      "learning_rate": 3.1637226469812814e-06,
+      "loss": 1.2611,
+      "step": 2400
+    },
+    {
+      "epoch": 0.09531531175225928,
+      "grad_norm": 1.218062373578883,
+      "learning_rate": 3.1769048246770366e-06,
+      "loss": 1.2714,
+      "step": 2410
+    },
+    {
+      "epoch": 0.09571081097114834,
+      "grad_norm": 1.2430563615302073,
+      "learning_rate": 3.190087002372792e-06,
+      "loss": 1.2868,
+      "step": 2420
+    },
+    {
+      "epoch": 0.09610631019003738,
+      "grad_norm": 1.2681172308291901,
+      "learning_rate": 3.2032691800685477e-06,
+      "loss": 1.2853,
+      "step": 2430
+    },
+    {
+      "epoch": 0.09650180940892641,
+      "grad_norm": 1.1547689215385586,
+      "learning_rate": 3.216451357764303e-06,
+      "loss": 1.2862,
+      "step": 2440
+    },
+    {
+      "epoch": 0.09689730862781545,
+      "grad_norm": 1.1768931021697404,
+      "learning_rate": 3.2296335354600585e-06,
+      "loss": 1.2818,
+      "step": 2450
+    },
+    {
+      "epoch": 0.09729280784670451,
+      "grad_norm": 1.190143004164276,
+      "learning_rate": 3.2428157131558137e-06,
+      "loss": 1.2917,
+      "step": 2460
+    },
+    {
+      "epoch": 0.09768830706559355,
+      "grad_norm": 1.2271609371437073,
+      "learning_rate": 3.255997890851569e-06,
+      "loss": 1.2966,
+      "step": 2470
+    },
+    {
+      "epoch": 0.09808380628448259,
+      "grad_norm": 1.30224620720194,
+      "learning_rate": 3.269180068547324e-06,
+      "loss": 1.2738,
+      "step": 2480
+    },
+    {
+      "epoch": 0.09847930550337163,
+      "grad_norm": 1.2072012030104826,
+      "learning_rate": 3.28236224624308e-06,
+      "loss": 1.2857,
+      "step": 2490
+    },
+    {
+      "epoch": 0.09887480472226068,
+      "grad_norm": 1.245746983758924,
+      "learning_rate": 3.295544423938835e-06,
+      "loss": 1.2744,
+      "step": 2500
+    },
+    {
+      "epoch": 0.09927030394114972,
+      "grad_norm": 1.232464505799779,
+      "learning_rate": 3.3087266016345903e-06,
+      "loss": 1.2845,
+      "step": 2510
+    },
+    {
+      "epoch": 0.09966580316003876,
+      "grad_norm": 1.1776225450844722,
+      "learning_rate": 3.3219087793303455e-06,
+      "loss": 1.2704,
+      "step": 2520
+    },
+    {
+      "epoch": 0.1000613023789278,
+      "grad_norm": 1.2070461495279086,
+      "learning_rate": 3.3350909570261007e-06,
+      "loss": 1.2858,
+      "step": 2530
+    },
+    {
+      "epoch": 0.10045680159781685,
+      "grad_norm": 1.2873546275622396,
+      "learning_rate": 3.3482731347218562e-06,
+      "loss": 1.2686,
+      "step": 2540
+    },
+    {
+      "epoch": 0.10085230081670589,
+      "grad_norm": 1.2436617312073135,
+      "learning_rate": 3.361455312417612e-06,
+      "loss": 1.2816,
+      "step": 2550
+    },
+    {
+      "epoch": 0.10124780003559493,
+      "grad_norm": 1.2016848948693586,
+      "learning_rate": 3.374637490113367e-06,
+      "loss": 1.2871,
+      "step": 2560
+    },
+    {
+      "epoch": 0.10164329925448397,
+      "grad_norm": 1.0536038308096831,
+      "learning_rate": 3.3878196678091226e-06,
+      "loss": 1.2754,
+      "step": 2570
+    },
+    {
+      "epoch": 0.10203879847337302,
+      "grad_norm": 1.270637142538822,
+      "learning_rate": 3.4010018455048777e-06,
+      "loss": 1.2931,
+      "step": 2580
+    },
+    {
+      "epoch": 0.10243429769226206,
+      "grad_norm": 1.2705181067783509,
+      "learning_rate": 3.414184023200633e-06,
+      "loss": 1.2826,
+      "step": 2590
+    },
+    {
+      "epoch": 0.1028297969111511,
+      "grad_norm": 1.1622496688215964,
+      "learning_rate": 3.427366200896388e-06,
+      "loss": 1.2753,
+      "step": 2600
+    },
+    {
+      "epoch": 0.10322529613004014,
+      "grad_norm": 1.2243854649828334,
+      "learning_rate": 3.440548378592144e-06,
+      "loss": 1.2853,
+      "step": 2610
+    },
+    {
+      "epoch": 0.10362079534892919,
+      "grad_norm": 1.1921833219835056,
+      "learning_rate": 3.4537305562878992e-06,
+      "loss": 1.2947,
+      "step": 2620
+    },
+    {
+      "epoch": 0.10401629456781823,
+      "grad_norm": 1.1049571202442656,
+      "learning_rate": 3.4669127339836544e-06,
+      "loss": 1.2621,
+      "step": 2630
+    },
+    {
+      "epoch": 0.10441179378670727,
+      "grad_norm": 1.1671222554569785,
+      "learning_rate": 3.4800949116794096e-06,
+      "loss": 1.275,
+      "step": 2640
+    },
+    {
+      "epoch": 0.10480729300559631,
+      "grad_norm": 1.2064194503502172,
+      "learning_rate": 3.493277089375165e-06,
+      "loss": 1.2849,
+      "step": 2650
+    },
+    {
+      "epoch": 0.10520279222448535,
+      "grad_norm": 1.2154560517149036,
+      "learning_rate": 3.5064592670709203e-06,
+      "loss": 1.254,
+      "step": 2660
+    },
+    {
+      "epoch": 0.1055982914433744,
+      "grad_norm": 1.1269630172231502,
+      "learning_rate": 3.519641444766676e-06,
+      "loss": 1.2795,
+      "step": 2670
+    },
+    {
+      "epoch": 0.10599379066226344,
+      "grad_norm": 1.2081274932682922,
+      "learning_rate": 3.532823622462431e-06,
+      "loss": 1.2752,
+      "step": 2680
+    },
+    {
+      "epoch": 0.10638928988115248,
+      "grad_norm": 1.1368132761943728,
+      "learning_rate": 3.5460058001581867e-06,
+      "loss": 1.267,
+      "step": 2690
+    },
+    {
+      "epoch": 0.10678478910004152,
+      "grad_norm": 1.1938542758614383,
+      "learning_rate": 3.559187977853942e-06,
+      "loss": 1.2695,
+      "step": 2700
+    },
+    {
+      "epoch": 0.10718028831893057,
+      "grad_norm": 1.227569725090285,
+      "learning_rate": 3.572370155549697e-06,
+      "loss": 1.2877,
+      "step": 2710
+    },
+    {
+      "epoch": 0.10757578753781961,
+      "grad_norm": 1.0635833460383113,
+      "learning_rate": 3.585552333245452e-06,
+      "loss": 1.2722,
+      "step": 2720
+    },
+    {
+      "epoch": 0.10797128675670865,
+      "grad_norm": 1.1446922334420235,
+      "learning_rate": 3.5987345109412073e-06,
+      "loss": 1.2797,
+      "step": 2730
+    },
+    {
+      "epoch": 0.1083667859755977,
+      "grad_norm": 1.2130529429189103,
+      "learning_rate": 3.6119166886369633e-06,
+      "loss": 1.2748,
+      "step": 2740
+    },
+    {
+      "epoch": 0.10876228519448675,
+      "grad_norm": 1.1964236814556763,
+      "learning_rate": 3.6250988663327185e-06,
+      "loss": 1.2702,
+      "step": 2750
+    },
+    {
+      "epoch": 0.10915778441337579,
+      "grad_norm": 1.1949833709572786,
+      "learning_rate": 3.6382810440284737e-06,
+      "loss": 1.2882,
+      "step": 2760
+    },
+    {
+      "epoch": 0.10955328363226483,
+      "grad_norm": 1.09709150025736,
+      "learning_rate": 3.6514632217242293e-06,
+      "loss": 1.2605,
+      "step": 2770
+    },
+    {
+      "epoch": 0.10994878285115386,
+      "grad_norm": 1.1515072477043464,
+      "learning_rate": 3.6646453994199844e-06,
+      "loss": 1.2753,
+      "step": 2780
+    },
+    {
+      "epoch": 0.11034428207004292,
+      "grad_norm": 1.2458919888173228,
+      "learning_rate": 3.6778275771157396e-06,
+      "loss": 1.2696,
+      "step": 2790
+    },
+    {
+      "epoch": 0.11073978128893196,
+      "grad_norm": 1.1302128174527677,
+      "learning_rate": 3.691009754811495e-06,
+      "loss": 1.2605,
+      "step": 2800
+    },
+    {
+      "epoch": 0.111135280507821,
+      "grad_norm": 1.24222385009568,
+      "learning_rate": 3.7041919325072508e-06,
+      "loss": 1.2729,
+      "step": 2810
+    },
+    {
+      "epoch": 0.11153077972671004,
+      "grad_norm": 1.102787713183318,
+      "learning_rate": 3.717374110203006e-06,
+      "loss": 1.2607,
+      "step": 2820
+    },
+    {
+      "epoch": 0.11192627894559909,
+      "grad_norm": 1.2023360978677924,
+      "learning_rate": 3.730556287898761e-06,
+      "loss": 1.2615,
+      "step": 2830
+    },
+    {
+      "epoch": 0.11232177816448813,
+      "grad_norm": 1.2724137347654667,
+      "learning_rate": 3.7437384655945163e-06,
+      "loss": 1.2615,
+      "step": 2840
+    },
+    {
+      "epoch": 0.11271727738337717,
+      "grad_norm": 1.093152579494928,
+      "learning_rate": 3.7569206432902714e-06,
+      "loss": 1.2631,
+      "step": 2850
+    },
+    {
+      "epoch": 0.11311277660226621,
+      "grad_norm": 1.1704547463612633,
+      "learning_rate": 3.7701028209860274e-06,
+      "loss": 1.2644,
+      "step": 2860
+    },
+    {
+      "epoch": 0.11350827582115525,
+      "grad_norm": 1.161424150089614,
+      "learning_rate": 3.7832849986817826e-06,
+      "loss": 1.2767,
+      "step": 2870
+    },
+    {
+      "epoch": 0.1139037750400443,
+      "grad_norm": 1.1278205431526822,
+      "learning_rate": 3.7964671763775378e-06,
+      "loss": 1.2665,
+      "step": 2880
+    },
+    {
+      "epoch": 0.11429927425893334,
+      "grad_norm": 1.1528269166892364,
+      "learning_rate": 3.8096493540732933e-06,
+      "loss": 1.2755,
+      "step": 2890
+    },
+    {
+      "epoch": 0.11469477347782238,
+      "grad_norm": 1.1067252929827938,
+      "learning_rate": 3.8228315317690485e-06,
+      "loss": 1.2871,
+      "step": 2900
+    },
+    {
+      "epoch": 0.11509027269671142,
+      "grad_norm": 1.1120209246517228,
+      "learning_rate": 3.836013709464803e-06,
+      "loss": 1.2626,
+      "step": 2910
+    },
+    {
+      "epoch": 0.11548577191560047,
+      "grad_norm": 1.122877948452319,
+      "learning_rate": 3.84919588716056e-06,
+      "loss": 1.263,
+      "step": 2920
+    },
+    {
+      "epoch": 0.11588127113448951,
+      "grad_norm": 1.124640519597798,
+      "learning_rate": 3.8623780648563144e-06,
+      "loss": 1.2731,
+      "step": 2930
+    },
+    {
+      "epoch": 0.11627677035337855,
+      "grad_norm": 1.125024599583401,
+      "learning_rate": 3.87556024255207e-06,
+      "loss": 1.2691,
+      "step": 2940
+    },
+    {
+      "epoch": 0.11667226957226759,
+      "grad_norm": 1.2126790331792352,
+      "learning_rate": 3.888742420247826e-06,
+      "loss": 1.2426,
+      "step": 2950
+    },
+    {
+      "epoch": 0.11706776879115664,
+      "grad_norm": 1.0611422258130283,
+      "learning_rate": 3.90192459794358e-06,
+      "loss": 1.2551,
+      "step": 2960
+    },
+    {
+      "epoch": 0.11746326801004568,
+      "grad_norm": 1.1931586393046836,
+      "learning_rate": 3.915106775639336e-06,
+      "loss": 1.262,
+      "step": 2970
+    },
+    {
+      "epoch": 0.11785876722893472,
+      "grad_norm": 1.1209628020676263,
+      "learning_rate": 3.9282889533350915e-06,
+      "loss": 1.2688,
+      "step": 2980
+    },
+    {
+      "epoch": 0.11825426644782376,
+      "grad_norm": 1.397330863918019,
+      "learning_rate": 3.941471131030847e-06,
+      "loss": 1.2564,
+      "step": 2990
+    },
+    {
+      "epoch": 0.11864976566671281,
+      "grad_norm": 1.0906431181507632,
+      "learning_rate": 3.954653308726602e-06,
+      "loss": 1.2706,
+      "step": 3000
+    },
+    {
+      "epoch": 0.11904526488560185,
+      "grad_norm": 1.1586328470171683,
+      "learning_rate": 3.9678354864223574e-06,
+      "loss": 1.2626,
+      "step": 3010
+    },
+    {
+      "epoch": 0.11944076410449089,
+      "grad_norm": 1.0967133685495674,
+      "learning_rate": 3.981017664118112e-06,
+      "loss": 1.2548,
+      "step": 3020
+    },
+    {
+      "epoch": 0.11983626332337993,
+      "grad_norm": 1.1269755343665413,
+      "learning_rate": 3.994199841813868e-06,
+      "loss": 1.2643,
+      "step": 3030
+    },
+    {
+      "epoch": 0.12023176254226899,
+      "grad_norm": 1.091934409490822,
+      "learning_rate": 4.007382019509623e-06,
+      "loss": 1.2662,
+      "step": 3040
+    },
+    {
+      "epoch": 0.12062726176115803,
+      "grad_norm": 1.1440701382940623,
+      "learning_rate": 4.020564197205379e-06,
+      "loss": 1.2468,
+      "step": 3050
+    },
+    {
+      "epoch": 0.12102276098004706,
+      "grad_norm": 1.1751888661402632,
+      "learning_rate": 4.033746374901134e-06,
+      "loss": 1.2512,
+      "step": 3060
+    },
+    {
+      "epoch": 0.1214182601989361,
+      "grad_norm": 1.1501843407322958,
+      "learning_rate": 4.046928552596889e-06,
+      "loss": 1.2601,
+      "step": 3070
+    },
+    {
+      "epoch": 0.12181375941782514,
+      "grad_norm": 1.1762906184019515,
+      "learning_rate": 4.060110730292645e-06,
+      "loss": 1.2637,
+      "step": 3080
+    },
+    {
+      "epoch": 0.1222092586367142,
+      "grad_norm": 1.2032163341674906,
+      "learning_rate": 4.0732929079884e-06,
+      "loss": 1.2741,
+      "step": 3090
+    },
+    {
+      "epoch": 0.12260475785560324,
+      "grad_norm": 1.290646553414388,
+      "learning_rate": 4.086475085684156e-06,
+      "loss": 1.2365,
+      "step": 3100
+    },
+    {
+      "epoch": 0.12300025707449228,
+      "grad_norm": 1.1601572556541933,
+      "learning_rate": 4.099657263379911e-06,
+      "loss": 1.2392,
+      "step": 3110
+    },
+    {
+      "epoch": 0.12339575629338131,
+      "grad_norm": 1.0964699034764527,
+      "learning_rate": 4.112839441075666e-06,
+      "loss": 1.2577,
+      "step": 3120
+    },
+    {
+      "epoch": 0.12379125551227037,
+      "grad_norm": 1.1539770797614612,
+      "learning_rate": 4.126021618771421e-06,
+      "loss": 1.2573,
+      "step": 3130
+    },
+    {
+      "epoch": 0.12418675473115941,
+      "grad_norm": 1.0985466764198102,
+      "learning_rate": 4.139203796467177e-06,
+      "loss": 1.244,
+      "step": 3140
+    },
+    {
+      "epoch": 0.12458225395004845,
+      "grad_norm": 1.2184945405951828,
+      "learning_rate": 4.152385974162932e-06,
+      "loss": 1.2554,
+      "step": 3150
+    },
+    {
+      "epoch": 0.12497775316893749,
+      "grad_norm": 1.187418233396473,
+      "learning_rate": 4.165568151858687e-06,
+      "loss": 1.263,
+      "step": 3160
+    },
+    {
+      "epoch": 0.12537325238782654,
+      "grad_norm": 1.1833230618056227,
+      "learning_rate": 4.178750329554443e-06,
+      "loss": 1.2533,
+      "step": 3170
+    },
+    {
+      "epoch": 0.12576875160671558,
+      "grad_norm": 1.1355097475879699,
+      "learning_rate": 4.191932507250198e-06,
+      "loss": 1.256,
+      "step": 3180
+    },
+    {
+      "epoch": 0.12616425082560462,
+      "grad_norm": 1.253125308018465,
+      "learning_rate": 4.205114684945954e-06,
+      "loss": 1.2464,
+      "step": 3190
+    },
+    {
+      "epoch": 0.12655975004449366,
+      "grad_norm": 1.2331745099422928,
+      "learning_rate": 4.2182968626417085e-06,
+      "loss": 1.2574,
+      "step": 3200
+    },
+    {
+      "epoch": 0.1269552492633827,
+      "grad_norm": 1.247226535246477,
+      "learning_rate": 4.231479040337464e-06,
+      "loss": 1.26,
+      "step": 3210
+    },
+    {
+      "epoch": 0.12735074848227174,
+      "grad_norm": 1.1038916765400215,
+      "learning_rate": 4.244661218033219e-06,
+      "loss": 1.2382,
+      "step": 3220
+    },
+    {
+      "epoch": 0.1277462477011608,
+      "grad_norm": 1.092556264313893,
+      "learning_rate": 4.257843395728975e-06,
+      "loss": 1.2496,
+      "step": 3230
+    },
+    {
+      "epoch": 0.12814174692004984,
+      "grad_norm": 1.1264358388022202,
+      "learning_rate": 4.27102557342473e-06,
+      "loss": 1.2501,
+      "step": 3240
+    },
+    {
+      "epoch": 0.12853724613893888,
+      "grad_norm": 1.1184361409466503,
+      "learning_rate": 4.284207751120486e-06,
+      "loss": 1.2366,
+      "step": 3250
+    },
+    {
+      "epoch": 0.12893274535782792,
+      "grad_norm": 1.2406981039602194,
+      "learning_rate": 4.29738992881624e-06,
+      "loss": 1.2231,
+      "step": 3260
+    },
+    {
+      "epoch": 0.12932824457671696,
+      "grad_norm": 1.2567060350031107,
+      "learning_rate": 4.310572106511996e-06,
+      "loss": 1.2443,
+      "step": 3270
+    },
+    {
+      "epoch": 0.129723743795606,
+      "grad_norm": 1.23262189454401,
+      "learning_rate": 4.3237542842077515e-06,
+      "loss": 1.2428,
+      "step": 3280
+    },
+    {
+      "epoch": 0.13011924301449504,
+      "grad_norm": 1.1496110749546276,
+      "learning_rate": 4.336936461903507e-06,
+      "loss": 1.2519,
+      "step": 3290
+    },
+    {
+      "epoch": 0.13051474223338408,
+      "grad_norm": 1.1173408155113762,
+      "learning_rate": 4.350118639599262e-06,
+      "loss": 1.2586,
+      "step": 3300
+    },
+    {
+      "epoch": 0.13091024145227312,
+      "grad_norm": 1.1907311153321274,
+      "learning_rate": 4.3633008172950175e-06,
+      "loss": 1.2376,
+      "step": 3310
+    },
+    {
+      "epoch": 0.13130574067116219,
+      "grad_norm": 1.1642841053506983,
+      "learning_rate": 4.376482994990773e-06,
+      "loss": 1.2278,
+      "step": 3320
+    },
+    {
+      "epoch": 0.13170123989005122,
+      "grad_norm": 1.217803181818625,
+      "learning_rate": 4.389665172686528e-06,
+      "loss": 1.2473,
+      "step": 3330
+    },
+    {
+      "epoch": 0.13209673910894026,
+      "grad_norm": 1.1601491220146136,
+      "learning_rate": 4.402847350382283e-06,
+      "loss": 1.2615,
+      "step": 3340
+    },
+    {
+      "epoch": 0.1324922383278293,
+      "grad_norm": 1.2383436434314843,
+      "learning_rate": 4.416029528078039e-06,
+      "loss": 1.24,
+      "step": 3350
+    },
+    {
+      "epoch": 0.13288773754671834,
+      "grad_norm": 1.1692938600694844,
+      "learning_rate": 4.4292117057737945e-06,
+      "loss": 1.2541,
+      "step": 3360
+    },
+    {
+      "epoch": 0.13328323676560738,
+      "grad_norm": 1.1778183496481327,
+      "learning_rate": 4.442393883469549e-06,
+      "loss": 1.258,
+      "step": 3370
+    },
+    {
+      "epoch": 0.13367873598449642,
+      "grad_norm": 1.1351108705524025,
+      "learning_rate": 4.455576061165305e-06,
+      "loss": 1.2586,
+      "step": 3380
+    },
+    {
+      "epoch": 0.13407423520338546,
+      "grad_norm": 1.1603484288394463,
+      "learning_rate": 4.4687582388610605e-06,
+      "loss": 1.2253,
+      "step": 3390
+    },
+    {
+      "epoch": 0.13446973442227453,
+      "grad_norm": 1.144547572424092,
+      "learning_rate": 4.481940416556815e-06,
+      "loss": 1.2353,
+      "step": 3400
+    },
+    {
+      "epoch": 0.13486523364116357,
+      "grad_norm": 1.180522473535395,
+      "learning_rate": 4.495122594252571e-06,
+      "loss": 1.2623,
+      "step": 3410
+    },
+    {
+      "epoch": 0.1352607328600526,
+      "grad_norm": 1.1237416464133951,
+      "learning_rate": 4.508304771948326e-06,
+      "loss": 1.2661,
+      "step": 3420
+    },
+    {
+      "epoch": 0.13565623207894165,
+      "grad_norm": 1.0952310861427315,
+      "learning_rate": 4.521486949644082e-06,
+      "loss": 1.2602,
+      "step": 3430
+    },
+    {
+      "epoch": 0.13605173129783069,
+      "grad_norm": 1.2519540981327684,
+      "learning_rate": 4.534669127339837e-06,
+      "loss": 1.2486,
+      "step": 3440
+    },
+    {
+      "epoch": 0.13644723051671973,
+      "grad_norm": 1.1453176445124462,
+      "learning_rate": 4.547851305035592e-06,
+      "loss": 1.2511,
+      "step": 3450
+    },
+    {
+      "epoch": 0.13684272973560876,
+      "grad_norm": 1.2492970933494332,
+      "learning_rate": 4.561033482731347e-06,
+      "loss": 1.2512,
+      "step": 3460
+    },
+    {
+      "epoch": 0.1372382289544978,
+      "grad_norm": 1.1183370717472112,
+      "learning_rate": 4.5742156604271035e-06,
+      "loss": 1.2683,
+      "step": 3470
+    },
+    {
+      "epoch": 0.13763372817338687,
+      "grad_norm": 1.3102018394220485,
+      "learning_rate": 4.587397838122858e-06,
+      "loss": 1.2332,
+      "step": 3480
+    },
+    {
+      "epoch": 0.1380292273922759,
+      "grad_norm": 1.1125088730002428,
+      "learning_rate": 4.600580015818614e-06,
+      "loss": 1.2393,
+      "step": 3490
+    },
+    {
+      "epoch": 0.13842472661116495,
+      "grad_norm": 1.0788463422691676,
+      "learning_rate": 4.6137621935143685e-06,
+      "loss": 1.2572,
+      "step": 3500
+    },
+    {
+      "epoch": 0.138820225830054,
+      "grad_norm": 1.1148975227432092,
+      "learning_rate": 4.626944371210124e-06,
+      "loss": 1.25,
+      "step": 3510
+    },
+    {
+      "epoch": 0.13921572504894303,
+      "grad_norm": 1.1501918101905848,
+      "learning_rate": 4.64012654890588e-06,
+      "loss": 1.2405,
+      "step": 3520
+    },
+    {
+      "epoch": 0.13961122426783207,
+      "grad_norm": 1.1965273666353122,
+      "learning_rate": 4.653308726601635e-06,
+      "loss": 1.2366,
+      "step": 3530
+    },
+    {
+      "epoch": 0.1400067234867211,
+      "grad_norm": 1.1368756204903232,
+      "learning_rate": 4.666490904297391e-06,
+      "loss": 1.2338,
+      "step": 3540
+    },
+    {
+      "epoch": 0.14040222270561015,
+      "grad_norm": 1.1082768252138382,
+      "learning_rate": 4.679673081993146e-06,
+      "loss": 1.2136,
+      "step": 3550
+    },
+    {
+      "epoch": 0.1407977219244992,
+      "grad_norm": 1.2668650584687497,
+      "learning_rate": 4.692855259688901e-06,
+      "loss": 1.2224,
+      "step": 3560
+    },
+    {
+      "epoch": 0.14119322114338825,
+      "grad_norm": 1.2404601026210604,
+      "learning_rate": 4.706037437384656e-06,
+      "loss": 1.2406,
+      "step": 3570
+    },
+    {
+      "epoch": 0.1415887203622773,
+      "grad_norm": 1.0846643375311202,
+      "learning_rate": 4.7192196150804116e-06,
+      "loss": 1.2368,
+      "step": 3580
+    },
+    {
+      "epoch": 0.14198421958116633,
+      "grad_norm": 1.0060315583009074,
+      "learning_rate": 4.732401792776167e-06,
+      "loss": 1.2437,
+      "step": 3590
+    },
+    {
+      "epoch": 0.14237971880005537,
+      "grad_norm": 1.2564332345588882,
+      "learning_rate": 4.745583970471923e-06,
+      "loss": 1.2504,
+      "step": 3600
+    },
+    {
+      "epoch": 0.1427752180189444,
+      "grad_norm": 1.036375529178967,
+      "learning_rate": 4.7587661481676775e-06,
+      "loss": 1.2411,
+      "step": 3610
+    },
+    {
+      "epoch": 0.14317071723783345,
+      "grad_norm": 1.196949606969891,
+      "learning_rate": 4.771948325863433e-06,
+      "loss": 1.2264,
+      "step": 3620
+    },
+    {
+      "epoch": 0.1435662164567225,
+      "grad_norm": 1.1286077565195762,
+      "learning_rate": 4.785130503559189e-06,
+      "loss": 1.2403,
+      "step": 3630
+    },
+    {
+      "epoch": 0.14396171567561153,
+      "grad_norm": 1.0885858043496695,
+      "learning_rate": 4.798312681254943e-06,
+      "loss": 1.2409,
+      "step": 3640
+    },
+    {
+      "epoch": 0.1443572148945006,
+      "grad_norm": 1.1046744042088779,
+      "learning_rate": 4.811494858950699e-06,
+      "loss": 1.2259,
+      "step": 3650
+    },
+    {
+      "epoch": 0.14475271411338964,
+      "grad_norm": 1.073761934784075,
+      "learning_rate": 4.8246770366464546e-06,
+      "loss": 1.2302,
+      "step": 3660
+    },
+    {
+      "epoch": 0.14514821333227867,
+      "grad_norm": 1.0442273647850029,
+      "learning_rate": 4.83785921434221e-06,
+      "loss": 1.2201,
+      "step": 3670
+    },
+    {
+      "epoch": 0.14554371255116771,
+      "grad_norm": 1.14133687841234,
+      "learning_rate": 4.851041392037965e-06,
+      "loss": 1.2402,
+      "step": 3680
+    },
+    {
+      "epoch": 0.14593921177005675,
+      "grad_norm": 1.0424041206172552,
+      "learning_rate": 4.8642235697337205e-06,
+      "loss": 1.2392,
+      "step": 3690
+    },
+    {
+      "epoch": 0.1463347109889458,
+      "grad_norm": 1.1722935358179387,
+      "learning_rate": 4.877405747429475e-06,
+      "loss": 1.2292,
+      "step": 3700
+    },
+    {
+      "epoch": 0.14673021020783483,
+      "grad_norm": 1.2177101061453444,
+      "learning_rate": 4.890587925125231e-06,
+      "loss": 1.2374,
+      "step": 3710
+    },
+    {
+      "epoch": 0.14712570942672387,
+      "grad_norm": 1.2342369323304698,
+      "learning_rate": 4.903770102820986e-06,
+      "loss": 1.2426,
+      "step": 3720
+    },
+    {
+      "epoch": 0.1475212086456129,
+      "grad_norm": 1.1921504558647251,
+      "learning_rate": 4.916952280516742e-06,
+      "loss": 1.2274,
+      "step": 3730
+    },
+    {
+      "epoch": 0.14791670786450198,
+      "grad_norm": 1.233202677746452,
+      "learning_rate": 4.930134458212497e-06,
+      "loss": 1.2214,
+      "step": 3740
+    },
+    {
+      "epoch": 0.14831220708339102,
+      "grad_norm": 1.1818383564644324,
+      "learning_rate": 4.943316635908252e-06,
+      "loss": 1.2134,
+      "step": 3750
+    },
+    {
+      "epoch": 0.14870770630228006,
+      "grad_norm": 1.1654870018883667,
+      "learning_rate": 4.956498813604008e-06,
+      "loss": 1.2258,
+      "step": 3760
+    },
+    {
+      "epoch": 0.1491032055211691,
+      "grad_norm": 1.2636489076159825,
+      "learning_rate": 4.969680991299763e-06,
+      "loss": 1.2333,
+      "step": 3770
+    },
+    {
+      "epoch": 0.14949870474005814,
+      "grad_norm": 1.1597012593172427,
+      "learning_rate": 4.982863168995519e-06,
+      "loss": 1.2265,
+      "step": 3780
+    },
+    {
+      "epoch": 0.14989420395894718,
+      "grad_norm": 1.1245543623413068,
+      "learning_rate": 4.996045346691274e-06,
+      "loss": 1.2336,
+      "step": 3790
+    },
+    {
+      "epoch": 0.15028970317783621,
+      "grad_norm": 1.1486476968811328,
+      "learning_rate": 5.009227524387029e-06,
+      "loss": 1.2262,
+      "step": 3800
+    },
+    {
+      "epoch": 0.15068520239672525,
+      "grad_norm": 1.0923697401437276,
+      "learning_rate": 5.022409702082784e-06,
+      "loss": 1.2258,
+      "step": 3810
+    },
+    {
+      "epoch": 0.15108070161561432,
+      "grad_norm": 1.26754825813355,
+      "learning_rate": 5.03559187977854e-06,
+      "loss": 1.241,
+      "step": 3820
+    },
+    {
+      "epoch": 0.15147620083450336,
+      "grad_norm": 1.168185364886432,
+      "learning_rate": 5.048774057474295e-06,
+      "loss": 1.2071,
+      "step": 3830
+    },
+    {
+      "epoch": 0.1518717000533924,
+      "grad_norm": 1.0582271434621335,
+      "learning_rate": 5.06195623517005e-06,
+      "loss": 1.2364,
+      "step": 3840
+    },
+    {
+      "epoch": 0.15226719927228144,
+      "grad_norm": 1.1643566454453338,
+      "learning_rate": 5.075138412865806e-06,
+      "loss": 1.2491,
+      "step": 3850
+    },
+    {
+      "epoch": 0.15266269849117048,
+      "grad_norm": 1.220011945064443,
+      "learning_rate": 5.08832059056156e-06,
+      "loss": 1.2099,
+      "step": 3860
+    },
+    {
+      "epoch": 0.15305819771005952,
+      "grad_norm": 1.2105087817617946,
+      "learning_rate": 5.101502768257317e-06,
+      "loss": 1.2241,
+      "step": 3870
+    },
+    {
+      "epoch": 0.15345369692894856,
+      "grad_norm": 1.1908599952093613,
+      "learning_rate": 5.114684945953072e-06,
+      "loss": 1.2354,
+      "step": 3880
+    },
+    {
+      "epoch": 0.1538491961478376,
+      "grad_norm": 1.200200710315934,
+      "learning_rate": 5.127867123648827e-06,
+      "loss": 1.225,
+      "step": 3890
+    },
+    {
+      "epoch": 0.15424469536672666,
+      "grad_norm": 1.2302439849161493,
+      "learning_rate": 5.141049301344583e-06,
+      "loss": 1.2262,
+      "step": 3900
+    },
+    {
+      "epoch": 0.1546401945856157,
+      "grad_norm": 1.1004820662407016,
+      "learning_rate": 5.154231479040338e-06,
+      "loss": 1.2088,
+      "step": 3910
+    },
+    {
+      "epoch": 0.15503569380450474,
+      "grad_norm": 1.102235460054769,
+      "learning_rate": 5.167413656736093e-06,
+      "loss": 1.2433,
+      "step": 3920
+    },
+    {
+      "epoch": 0.15543119302339378,
+      "grad_norm": 1.23789374465247,
+      "learning_rate": 5.180595834431849e-06,
+      "loss": 1.2357,
+      "step": 3930
+    },
+    {
+      "epoch": 0.15582669224228282,
+      "grad_norm": 1.161533174136612,
+      "learning_rate": 5.193778012127603e-06,
+      "loss": 1.2317,
+      "step": 3940
+    },
+    {
+      "epoch": 0.15622219146117186,
+      "grad_norm": 1.0642322136109936,
+      "learning_rate": 5.206960189823359e-06,
+      "loss": 1.2224,
+      "step": 3950
+    },
+    {
+      "epoch": 0.1566176906800609,
+      "grad_norm": 1.2087215287034732,
+      "learning_rate": 5.220142367519115e-06,
+      "loss": 1.2448,
+      "step": 3960
+    },
+    {
+      "epoch": 0.15701318989894994,
+      "grad_norm": 1.212885228606204,
+      "learning_rate": 5.233324545214869e-06,
+      "loss": 1.2208,
+      "step": 3970
+    },
+    {
+      "epoch": 0.15740868911783898,
+      "grad_norm": 1.1495875886075297,
+      "learning_rate": 5.246506722910625e-06,
+      "loss": 1.2342,
+      "step": 3980
+    },
+    {
+      "epoch": 0.15780418833672805,
+      "grad_norm": 1.2816638398485318,
+      "learning_rate": 5.259688900606381e-06,
+      "loss": 1.2223,
+      "step": 3990
+    },
+    {
+      "epoch": 0.15819968755561709,
+      "grad_norm": 1.1259679640570786,
+      "learning_rate": 5.272871078302136e-06,
+      "loss": 1.2163,
+      "step": 4000
+    },
+    {
+      "epoch": 0.15859518677450613,
+      "grad_norm": 1.1365047394495493,
+      "learning_rate": 5.286053255997892e-06,
+      "loss": 1.2343,
+      "step": 4010
+    },
+    {
+      "epoch": 0.15899068599339516,
+      "grad_norm": 1.1549244055048389,
+      "learning_rate": 5.299235433693647e-06,
+      "loss": 1.2231,
+      "step": 4020
+    },
+    {
+      "epoch": 0.1593861852122842,
+      "grad_norm": 1.1536226134710523,
+      "learning_rate": 5.312417611389402e-06,
+      "loss": 1.2235,
+      "step": 4030
+    },
+    {
+      "epoch": 0.15978168443117324,
+      "grad_norm": 1.221526078235549,
+      "learning_rate": 5.325599789085158e-06,
+      "loss": 1.2211,
+      "step": 4040
+    },
+    {
+      "epoch": 0.16017718365006228,
+      "grad_norm": 1.0183863925295793,
+      "learning_rate": 5.338781966780912e-06,
+      "loss": 1.2279,
+      "step": 4050
+    },
+    {
+      "epoch": 0.16057268286895132,
+      "grad_norm": 1.189482207312474,
+      "learning_rate": 5.351964144476668e-06,
+      "loss": 1.2224,
+      "step": 4060
+    },
+    {
+      "epoch": 0.1609681820878404,
+      "grad_norm": 1.147959951272284,
+      "learning_rate": 5.3651463221724235e-06,
+      "loss": 1.2194,
+      "step": 4070
+    },
+    {
+      "epoch": 0.16136368130672943,
+      "grad_norm": 1.216570820756994,
+      "learning_rate": 5.378328499868178e-06,
+      "loss": 1.2115,
+      "step": 4080
+    },
+    {
+      "epoch": 0.16175918052561847,
+      "grad_norm": 1.1023582438938562,
+      "learning_rate": 5.391510677563934e-06,
+      "loss": 1.2118,
+      "step": 4090
+    },
+    {
+      "epoch": 0.1621546797445075,
+      "grad_norm": 1.1847962397819922,
+      "learning_rate": 5.4046928552596886e-06,
+      "loss": 1.2191,
+      "step": 4100
+    },
+    {
+      "epoch": 0.16255017896339655,
+      "grad_norm": 1.1004471914767329,
+      "learning_rate": 5.417875032955444e-06,
+      "loss": 1.2318,
+      "step": 4110
+    },
+    {
+      "epoch": 0.16294567818228559,
+      "grad_norm": 1.1649211722871002,
+      "learning_rate": 5.431057210651201e-06,
+      "loss": 1.2229,
+      "step": 4120
+    },
+    {
+      "epoch": 0.16334117740117463,
+      "grad_norm": 1.215868544802769,
+      "learning_rate": 5.444239388346955e-06,
+      "loss": 1.2097,
+      "step": 4130
+    },
+    {
+      "epoch": 0.16373667662006366,
+      "grad_norm": 1.1889771147762935,
+      "learning_rate": 5.457421566042711e-06,
+      "loss": 1.2382,
+      "step": 4140
+    },
+    {
+      "epoch": 0.16413217583895273,
+      "grad_norm": 1.2546811487400502,
+      "learning_rate": 5.4706037437384665e-06,
+      "loss": 1.1969,
+      "step": 4150
+    },
+    {
+      "epoch": 0.16452767505784177,
+      "grad_norm": 1.0644584064909797,
+      "learning_rate": 5.483785921434221e-06,
+      "loss": 1.2298,
+      "step": 4160
+    },
+    {
+      "epoch": 0.1649231742767308,
+      "grad_norm": 1.1063298464696862,
+      "learning_rate": 5.496968099129977e-06,
+      "loss": 1.2201,
+      "step": 4170
+    },
+    {
+      "epoch": 0.16531867349561985,
+      "grad_norm": 1.0658384818742714,
+      "learning_rate": 5.510150276825732e-06,
+      "loss": 1.2275,
+      "step": 4180
+    },
+    {
+      "epoch": 0.1657141727145089,
+      "grad_norm": 1.0683881098515082,
+      "learning_rate": 5.523332454521487e-06,
+      "loss": 1.2182,
+      "step": 4190
+    },
+    {
+      "epoch": 0.16610967193339793,
+      "grad_norm": 1.2241782663996508,
+      "learning_rate": 5.536514632217243e-06,
+      "loss": 1.2104,
+      "step": 4200
+    },
+    {
+      "epoch": 0.16650517115228697,
+      "grad_norm": 1.101634265395167,
+      "learning_rate": 5.5496968099129975e-06,
+      "loss": 1.2083,
+      "step": 4210
+    },
+    {
+      "epoch": 0.166900670371176,
+      "grad_norm": 1.1178755199397847,
+      "learning_rate": 5.562878987608753e-06,
+      "loss": 1.2241,
+      "step": 4220
+    },
+    {
+      "epoch": 0.16729616959006505,
+      "grad_norm": 1.1098609400973727,
+      "learning_rate": 5.576061165304508e-06,
+      "loss": 1.2124,
+      "step": 4230
+    },
+    {
+      "epoch": 0.16769166880895411,
+      "grad_norm": 1.1189088715468434,
+      "learning_rate": 5.589243343000264e-06,
+      "loss": 1.2234,
+      "step": 4240
+    },
+    {
+      "epoch": 0.16808716802784315,
+      "grad_norm": 1.1016650861823671,
+      "learning_rate": 5.60242552069602e-06,
+      "loss": 1.2152,
+      "step": 4250
+    },
+    {
+      "epoch": 0.1684826672467322,
+      "grad_norm": 1.0871078425460847,
+      "learning_rate": 5.6156076983917754e-06,
+      "loss": 1.2048,
+      "step": 4260
+    },
+    {
+      "epoch": 0.16887816646562123,
+      "grad_norm": 1.2015510695753984,
+      "learning_rate": 5.62878987608753e-06,
+      "loss": 1.2052,
+      "step": 4270
+    },
+    {
+      "epoch": 0.16927366568451027,
+      "grad_norm": 1.1087395721866484,
+      "learning_rate": 5.641972053783286e-06,
+      "loss": 1.2116,
+      "step": 4280
+    },
+    {
+      "epoch": 0.1696691649033993,
+      "grad_norm": 1.1957481980714524,
+      "learning_rate": 5.6551542314790405e-06,
+      "loss": 1.204,
+      "step": 4290
+    },
+    {
+      "epoch": 0.17006466412228835,
+      "grad_norm": 1.220248337474719,
+      "learning_rate": 5.668336409174796e-06,
+      "loss": 1.234,
+      "step": 4300
+    },
+    {
+      "epoch": 0.1704601633411774,
+      "grad_norm": 1.1288978870452016,
+      "learning_rate": 5.681518586870552e-06,
+      "loss": 1.2226,
+      "step": 4310
+    },
+    {
+      "epoch": 0.17085566256006646,
+      "grad_norm": 1.1583623676766133,
+      "learning_rate": 5.6947007645663064e-06,
+      "loss": 1.2078,
+      "step": 4320
+    },
+    {
+      "epoch": 0.1712511617789555,
+      "grad_norm": 1.0820854303411411,
+      "learning_rate": 5.707882942262062e-06,
+      "loss": 1.2275,
+      "step": 4330
+    },
+    {
+      "epoch": 0.17164666099784454,
+      "grad_norm": 1.0436724693021422,
+      "learning_rate": 5.721065119957817e-06,
+      "loss": 1.2208,
+      "step": 4340
+    },
+    {
+      "epoch": 0.17204216021673358,
+      "grad_norm": 1.0826207101520977,
+      "learning_rate": 5.734247297653572e-06,
+      "loss": 1.2259,
+      "step": 4350
+    },
+    {
+      "epoch": 0.17243765943562261,
+      "grad_norm": 1.0062420188231134,
+      "learning_rate": 5.747429475349329e-06,
+      "loss": 1.2152,
+      "step": 4360
+    },
+    {
+      "epoch": 0.17283315865451165,
+      "grad_norm": 1.1625476486327295,
+      "learning_rate": 5.760611653045084e-06,
+      "loss": 1.2126,
+      "step": 4370
+    },
+    {
+      "epoch": 0.1732286578734007,
+      "grad_norm": 1.0850920716468246,
+      "learning_rate": 5.773793830740839e-06,
+      "loss": 1.2063,
+      "step": 4380
+    },
+    {
+      "epoch": 0.17362415709228973,
+      "grad_norm": 1.1230610081802372,
+      "learning_rate": 5.786976008436595e-06,
+      "loss": 1.1971,
+      "step": 4390
+    },
+    {
+      "epoch": 0.17401965631117877,
+      "grad_norm": 1.1151659191207979,
+      "learning_rate": 5.8001581861323494e-06,
+      "loss": 1.2067,
+      "step": 4400
+    },
+    {
+      "epoch": 0.17441515553006784,
+      "grad_norm": 1.230022261272906,
+      "learning_rate": 5.813340363828105e-06,
+      "loss": 1.2075,
+      "step": 4410
+    },
+    {
+      "epoch": 0.17481065474895688,
+      "grad_norm": 1.1273517804475939,
+      "learning_rate": 5.826522541523861e-06,
+      "loss": 1.2277,
+      "step": 4420
+    },
+    {
+      "epoch": 0.17520615396784592,
+      "grad_norm": 1.033521339125641,
+      "learning_rate": 5.839704719219615e-06,
+      "loss": 1.2031,
+      "step": 4430
+    },
+    {
+      "epoch": 0.17560165318673496,
+      "grad_norm": 1.184836766845481,
+      "learning_rate": 5.852886896915371e-06,
+      "loss": 1.2166,
+      "step": 4440
+    },
+    {
+      "epoch": 0.175997152405624,
+      "grad_norm": 1.0961490896095614,
+      "learning_rate": 5.866069074611126e-06,
+      "loss": 1.2072,
+      "step": 4450
+    },
+    {
+      "epoch": 0.17639265162451304,
+      "grad_norm": 1.0861106050149547,
+      "learning_rate": 5.879251252306881e-06,
+      "loss": 1.2063,
+      "step": 4460
+    },
+    {
+      "epoch": 0.17678815084340208,
+      "grad_norm": 1.1315583725036726,
+      "learning_rate": 5.892433430002637e-06,
+      "loss": 1.209,
+      "step": 4470
+    },
+    {
+      "epoch": 0.17718365006229111,
+      "grad_norm": 1.2140546103918646,
+      "learning_rate": 5.905615607698392e-06,
+      "loss": 1.2076,
+      "step": 4480
+    },
+    {
+      "epoch": 0.17757914928118018,
+      "grad_norm": 1.0819422364079176,
+      "learning_rate": 5.918797785394148e-06,
+      "loss": 1.2201,
+      "step": 4490
+    },
+    {
+      "epoch": 0.17797464850006922,
+      "grad_norm": 1.0987791821906259,
+      "learning_rate": 5.931979963089904e-06,
+      "loss": 1.2229,
+      "step": 4500
+    },
+    {
+      "epoch": 0.17837014771895826,
+      "grad_norm": 1.069938546339024,
+      "learning_rate": 5.945162140785658e-06,
+      "loss": 1.213,
+      "step": 4510
+    },
+    {
+      "epoch": 0.1787656469378473,
+      "grad_norm": 1.0815335186123936,
+      "learning_rate": 5.958344318481414e-06,
+      "loss": 1.2106,
+      "step": 4520
+    },
+    {
+      "epoch": 0.17916114615673634,
+      "grad_norm": 1.1135705456815699,
+      "learning_rate": 5.971526496177169e-06,
+      "loss": 1.2102,
+      "step": 4530
+    },
+    {
+      "epoch": 0.17955664537562538,
+      "grad_norm": 1.16647050216788,
+      "learning_rate": 5.984708673872924e-06,
+      "loss": 1.2037,
+      "step": 4540
+    },
+    {
+      "epoch": 0.17995214459451442,
+      "grad_norm": 1.1292284579138845,
+      "learning_rate": 5.99789085156868e-06,
+      "loss": 1.1978,
+      "step": 4550
+    },
+    {
+      "epoch": 0.18034764381340346,
+      "grad_norm": 1.14664276573325,
+      "learning_rate": 6.011073029264435e-06,
+      "loss": 1.2139,
+      "step": 4560
+    },
+    {
+      "epoch": 0.18074314303229252,
+      "grad_norm": 1.1601126947258054,
+      "learning_rate": 6.02425520696019e-06,
+      "loss": 1.2129,
+      "step": 4570
+    },
+    {
+      "epoch": 0.18113864225118156,
+      "grad_norm": 1.1305462606121826,
+      "learning_rate": 6.037437384655945e-06,
+      "loss": 1.2017,
+      "step": 4580
+    },
+    {
+      "epoch": 0.1815341414700706,
+      "grad_norm": 1.2025591824974453,
+      "learning_rate": 6.0506195623517005e-06,
+      "loss": 1.1961,
+      "step": 4590
+    },
+    {
+      "epoch": 0.18192964068895964,
+      "grad_norm": 1.107927753760213,
+      "learning_rate": 6.063801740047456e-06,
+      "loss": 1.2043,
+      "step": 4600
+    },
+    {
+      "epoch": 0.18232513990784868,
+      "grad_norm": 1.059282958384036,
+      "learning_rate": 6.0769839177432125e-06,
+      "loss": 1.2237,
+      "step": 4610
+    },
+    {
+      "epoch": 0.18272063912673772,
+      "grad_norm": 1.1293359737449702,
+      "learning_rate": 6.090166095438967e-06,
+      "loss": 1.2178,
+      "step": 4620
+    },
+    {
+      "epoch": 0.18311613834562676,
+      "grad_norm": 1.1003580895342506,
+      "learning_rate": 6.103348273134723e-06,
+      "loss": 1.2172,
+      "step": 4630
+    },
+    {
+      "epoch": 0.1835116375645158,
+      "grad_norm": 1.1129213322032758,
+      "learning_rate": 6.116530450830478e-06,
+      "loss": 1.2067,
+      "step": 4640
+    },
+    {
+      "epoch": 0.18390713678340484,
+      "grad_norm": 1.1370152734015335,
+      "learning_rate": 6.129712628526233e-06,
+      "loss": 1.2212,
+      "step": 4650
+    },
+    {
+      "epoch": 0.1843026360022939,
+      "grad_norm": 0.9871263897368611,
+      "learning_rate": 6.142894806221989e-06,
+      "loss": 1.1978,
+      "step": 4660
+    },
+    {
+      "epoch": 0.18469813522118295,
+      "grad_norm": 1.1226188070149417,
+      "learning_rate": 6.1560769839177435e-06,
+      "loss": 1.2064,
+      "step": 4670
+    },
+    {
+      "epoch": 0.18509363444007199,
+      "grad_norm": 1.005155023841371,
+      "learning_rate": 6.169259161613499e-06,
+      "loss": 1.215,
+      "step": 4680
+    },
+    {
+      "epoch": 0.18548913365896103,
+      "grad_norm": 1.1970691027193183,
+      "learning_rate": 6.182441339309254e-06,
+      "loss": 1.2262,
+      "step": 4690
+    },
+    {
+      "epoch": 0.18588463287785006,
+      "grad_norm": 1.1480500843386499,
+      "learning_rate": 6.1956235170050095e-06,
+      "loss": 1.2021,
+      "step": 4700
+    },
+    {
+      "epoch": 0.1862801320967391,
+      "grad_norm": 1.1316159070674807,
+      "learning_rate": 6.208805694700765e-06,
+      "loss": 1.1915,
+      "step": 4710
+    },
+    {
+      "epoch": 0.18667563131562814,
+      "grad_norm": 1.0970783022183737,
+      "learning_rate": 6.22198787239652e-06,
+      "loss": 1.1908,
+      "step": 4720
+    },
+    {
+      "epoch": 0.18707113053451718,
+      "grad_norm": 1.2322937340277362,
+      "learning_rate": 6.235170050092276e-06,
+      "loss": 1.1937,
+      "step": 4730
+    },
+    {
+      "epoch": 0.18746662975340625,
+      "grad_norm": 1.1562788085977769,
+      "learning_rate": 6.248352227788032e-06,
+      "loss": 1.1975,
+      "step": 4740
+    },
+    {
+      "epoch": 0.1878621289722953,
+      "grad_norm": 1.0617214350153394,
+      "learning_rate": 6.2615344054837865e-06,
+      "loss": 1.2073,
+      "step": 4750
+    },
+    {
+      "epoch": 0.18825762819118433,
+      "grad_norm": 1.1267659232258125,
+      "learning_rate": 6.274716583179542e-06,
+      "loss": 1.2082,
+      "step": 4760
+    },
+    {
+      "epoch": 0.18865312741007337,
+      "grad_norm": 1.0953367195241055,
+      "learning_rate": 6.287898760875297e-06,
+      "loss": 1.218,
+      "step": 4770
+    },
+    {
+      "epoch": 0.1890486266289624,
+      "grad_norm": 1.233493824395055,
+      "learning_rate": 6.3010809385710525e-06,
+      "loss": 1.1995,
+      "step": 4780
+    },
+    {
+      "epoch": 0.18944412584785145,
+      "grad_norm": 1.1672051427671783,
+      "learning_rate": 6.314263116266808e-06,
+      "loss": 1.2104,
+      "step": 4790
+    },
+    {
+      "epoch": 0.18983962506674049,
+      "grad_norm": 1.130357352957549,
+      "learning_rate": 6.327445293962563e-06,
+      "loss": 1.1854,
+      "step": 4800
+    },
+    {
+      "epoch": 0.19023512428562953,
+      "grad_norm": 1.1876610829152676,
+      "learning_rate": 6.340627471658318e-06,
+      "loss": 1.2025,
+      "step": 4810
+    },
+    {
+      "epoch": 0.19063062350451856,
+      "grad_norm": 1.1432838733532447,
+      "learning_rate": 6.353809649354073e-06,
+      "loss": 1.2002,
+      "step": 4820
+    },
+    {
+      "epoch": 0.19102612272340763,
+      "grad_norm": 1.1064655435369426,
+      "learning_rate": 6.366991827049829e-06,
+      "loss": 1.2054,
+      "step": 4830
+    },
+    {
+      "epoch": 0.19142162194229667,
+      "grad_norm": 1.0369588688987386,
+      "learning_rate": 6.380174004745584e-06,
+      "loss": 1.2098,
+      "step": 4840
+    },
+    {
+      "epoch": 0.1918171211611857,
+      "grad_norm": 1.1406236057045094,
+      "learning_rate": 6.393356182441341e-06,
+      "loss": 1.1847,
+      "step": 4850
+    },
+    {
+      "epoch": 0.19221262038007475,
+      "grad_norm": 1.1058165241374485,
+      "learning_rate": 6.4065383601370955e-06,
+      "loss": 1.1937,
+      "step": 4860
+    },
+    {
+      "epoch": 0.1926081195989638,
+      "grad_norm": 1.0992793605595896,
+      "learning_rate": 6.419720537832851e-06,
+      "loss": 1.1932,
+      "step": 4870
+    },
+    {
+      "epoch": 0.19300361881785283,
+      "grad_norm": 1.1127877999481255,
+      "learning_rate": 6.432902715528606e-06,
+      "loss": 1.2048,
+      "step": 4880
+    },
+    {
+      "epoch": 0.19339911803674187,
+      "grad_norm": 1.125752676847193,
+      "learning_rate": 6.446084893224361e-06,
+      "loss": 1.2097,
+      "step": 4890
+    },
+    {
+      "epoch": 0.1937946172556309,
+      "grad_norm": 1.1381813091957724,
+      "learning_rate": 6.459267070920117e-06,
+      "loss": 1.2113,
+      "step": 4900
+    },
+    {
+      "epoch": 0.19419011647451997,
+      "grad_norm": 1.1318617892323126,
+      "learning_rate": 6.472449248615872e-06,
+      "loss": 1.2025,
+      "step": 4910
+    },
+    {
+      "epoch": 0.19458561569340901,
+      "grad_norm": 1.1590990307599618,
+      "learning_rate": 6.485631426311627e-06,
+      "loss": 1.2255,
+      "step": 4920
+    },
+    {
+      "epoch": 0.19498111491229805,
+      "grad_norm": 1.0728884274758368,
+      "learning_rate": 6.498813604007382e-06,
+      "loss": 1.2192,
+      "step": 4930
+    },
+    {
+      "epoch": 0.1953766141311871,
+      "grad_norm": 1.1590073488967672,
+      "learning_rate": 6.511995781703138e-06,
+      "loss": 1.1997,
+      "step": 4940
+    },
+    {
+      "epoch": 0.19577211335007613,
+      "grad_norm": 1.0720531676701768,
+      "learning_rate": 6.525177959398893e-06,
+      "loss": 1.2053,
+      "step": 4950
+    },
+    {
+      "epoch": 0.19616761256896517,
+      "grad_norm": 1.102380456447985,
+      "learning_rate": 6.538360137094648e-06,
+      "loss": 1.1884,
+      "step": 4960
+    },
+    {
+      "epoch": 0.1965631117878542,
+      "grad_norm": 1.2196827142160622,
+      "learning_rate": 6.5515423147904036e-06,
+      "loss": 1.1909,
+      "step": 4970
+    },
+    {
+      "epoch": 0.19695861100674325,
+      "grad_norm": 1.2097402651174276,
+      "learning_rate": 6.56472449248616e-06,
+      "loss": 1.1928,
+      "step": 4980
+    },
+    {
+      "epoch": 0.19735411022563232,
+      "grad_norm": 1.196417983790882,
+      "learning_rate": 6.577906670181915e-06,
+      "loss": 1.212,
+      "step": 4990
+    },
+    {
+      "epoch": 0.19774960944452136,
+      "grad_norm": 1.1301826828351647,
+      "learning_rate": 6.59108884787767e-06,
+      "loss": 1.2082,
+      "step": 5000
+    },
+    {
+      "epoch": 0.1981451086634104,
+      "grad_norm": 1.205324899611935,
+      "learning_rate": 6.604271025573425e-06,
+      "loss": 1.2016,
+      "step": 5010
+    },
+    {
+      "epoch": 0.19854060788229944,
+      "grad_norm": 1.0418298360414089,
+      "learning_rate": 6.617453203269181e-06,
+      "loss": 1.1904,
+      "step": 5020
+    },
+    {
+      "epoch": 0.19893610710118848,
+      "grad_norm": 1.1056732498997823,
+      "learning_rate": 6.630635380964936e-06,
+      "loss": 1.1847,
+      "step": 5030
+    },
+    {
+      "epoch": 0.19933160632007751,
+      "grad_norm": 1.1723411776579258,
+      "learning_rate": 6.643817558660691e-06,
+      "loss": 1.2013,
+      "step": 5040
+    },
+    {
+      "epoch": 0.19972710553896655,
+      "grad_norm": 1.0725587178293903,
+      "learning_rate": 6.6569997363564466e-06,
+      "loss": 1.1906,
+      "step": 5050
+    },
+    {
+      "epoch": 0.2001226047578556,
+      "grad_norm": 1.182205126075706,
+      "learning_rate": 6.670181914052201e-06,
+      "loss": 1.2293,
+      "step": 5060
+    },
+    {
+      "epoch": 0.20051810397674463,
+      "grad_norm": 1.1706906166684679,
+      "learning_rate": 6.683364091747957e-06,
+      "loss": 1.1839,
+      "step": 5070
+    },
+    {
+      "epoch": 0.2009136031956337,
+      "grad_norm": 1.0606264241886505,
+      "learning_rate": 6.6965462694437125e-06,
+      "loss": 1.1898,
+      "step": 5080
+    },
+    {
+      "epoch": 0.20130910241452274,
+      "grad_norm": 1.102577300984322,
+      "learning_rate": 6.709728447139467e-06,
+      "loss": 1.1931,
+      "step": 5090
+    },
+    {
+      "epoch": 0.20170460163341178,
+      "grad_norm": 1.0740894576699729,
+      "learning_rate": 6.722910624835224e-06,
+      "loss": 1.19,
+      "step": 5100
+    },
+    {
+      "epoch": 0.20210010085230082,
+      "grad_norm": 1.031551191036934,
+      "learning_rate": 6.736092802530979e-06,
+      "loss": 1.1873,
+      "step": 5110
+    },
+    {
+      "epoch": 0.20249560007118986,
+      "grad_norm": 1.1076310763618626,
+      "learning_rate": 6.749274980226734e-06,
+      "loss": 1.1972,
+      "step": 5120
+    },
+    {
+      "epoch": 0.2028910992900789,
+      "grad_norm": 1.166132111346991,
+      "learning_rate": 6.7624571579224896e-06,
+      "loss": 1.1901,
+      "step": 5130
+    },
+    {
+      "epoch": 0.20328659850896794,
+      "grad_norm": 1.132853692988871,
+      "learning_rate": 6.775639335618245e-06,
+      "loss": 1.2034,
+      "step": 5140
+    },
+    {
+      "epoch": 0.20368209772785698,
+      "grad_norm": 0.9725479759881043,
+      "learning_rate": 6.788821513314e-06,
+      "loss": 1.1833,
+      "step": 5150
+    },
+    {
+      "epoch": 0.20407759694674604,
+      "grad_norm": 1.0332235948730206,
+      "learning_rate": 6.8020036910097555e-06,
+      "loss": 1.1982,
+      "step": 5160
+    },
+    {
+      "epoch": 0.20447309616563508,
+      "grad_norm": 1.0583171205475557,
+      "learning_rate": 6.81518586870551e-06,
+      "loss": 1.2041,
+      "step": 5170
+    },
+    {
+      "epoch": 0.20486859538452412,
+      "grad_norm": 1.109138409419143,
+      "learning_rate": 6.828368046401266e-06,
+      "loss": 1.187,
+      "step": 5180
+    },
+    {
+      "epoch": 0.20526409460341316,
+      "grad_norm": 1.0705760872339762,
+      "learning_rate": 6.841550224097021e-06,
+      "loss": 1.1962,
+      "step": 5190
+    },
+    {
+      "epoch": 0.2056595938223022,
+      "grad_norm": 1.1911369148569064,
+      "learning_rate": 6.854732401792776e-06,
+      "loss": 1.1849,
+      "step": 5200
+    },
+    {
+      "epoch": 0.20605509304119124,
+      "grad_norm": 1.230097996524188,
+      "learning_rate": 6.867914579488532e-06,
+      "loss": 1.1891,
+      "step": 5210
+    },
+    {
+      "epoch": 0.20645059226008028,
+      "grad_norm": 1.0580823525273977,
+      "learning_rate": 6.881096757184288e-06,
+      "loss": 1.1863,
+      "step": 5220
+    },
+    {
+      "epoch": 0.20684609147896932,
+      "grad_norm": 1.0052600867365868,
+      "learning_rate": 6.894278934880043e-06,
+      "loss": 1.2044,
+      "step": 5230
+    },
+    {
+      "epoch": 0.20724159069785839,
+      "grad_norm": 1.1017939365565552,
+      "learning_rate": 6.9074611125757985e-06,
+      "loss": 1.1684,
+      "step": 5240
+    },
+    {
+      "epoch": 0.20763708991674742,
+      "grad_norm": 1.2056077201045035,
+      "learning_rate": 6.920643290271554e-06,
+      "loss": 1.2266,
+      "step": 5250
+    },
+    {
+      "epoch": 0.20803258913563646,
+      "grad_norm": 1.181079240370204,
+      "learning_rate": 6.933825467967309e-06,
+      "loss": 1.186,
+      "step": 5260
+    },
+    {
+      "epoch": 0.2084280883545255,
+      "grad_norm": 1.2195669073367532,
+      "learning_rate": 6.947007645663064e-06,
+      "loss": 1.1817,
+      "step": 5270
+    },
+    {
+      "epoch": 0.20882358757341454,
+      "grad_norm": 1.1235748020775331,
+      "learning_rate": 6.960189823358819e-06,
+      "loss": 1.2001,
+      "step": 5280
+    },
+    {
+      "epoch": 0.20921908679230358,
+      "grad_norm": 1.1521378141517005,
+      "learning_rate": 6.973372001054575e-06,
+      "loss": 1.1924,
+      "step": 5290
+    },
+    {
+      "epoch": 0.20961458601119262,
+      "grad_norm": 1.0166028358003851,
+      "learning_rate": 6.98655417875033e-06,
+      "loss": 1.1897,
+      "step": 5300
+    },
+    {
+      "epoch": 0.21001008523008166,
+      "grad_norm": 1.0869474511568988,
+      "learning_rate": 6.999736356446085e-06,
+      "loss": 1.1823,
+      "step": 5310
+    },
+    {
+      "epoch": 0.2104055844489707,
+      "grad_norm": 1.1130187118253254,
+      "learning_rate": 7.012918534141841e-06,
+      "loss": 1.2134,
+      "step": 5320
+    },
+    {
+      "epoch": 0.21080108366785977,
+      "grad_norm": 1.1469650571130527,
+      "learning_rate": 7.026100711837595e-06,
+      "loss": 1.188,
+      "step": 5330
+    },
+    {
+      "epoch": 0.2111965828867488,
+      "grad_norm": 1.1300566228826372,
+      "learning_rate": 7.039282889533352e-06,
+      "loss": 1.1725,
+      "step": 5340
+    },
+    {
+      "epoch": 0.21159208210563785,
+      "grad_norm": 1.274414479671289,
+      "learning_rate": 7.052465067229107e-06,
+      "loss": 1.1988,
+      "step": 5350
+    },
+    {
+      "epoch": 0.21198758132452689,
+      "grad_norm": 1.1850161637671603,
+      "learning_rate": 7.065647244924862e-06,
+      "loss": 1.1975,
+      "step": 5360
+    },
+    {
+      "epoch": 0.21238308054341593,
+      "grad_norm": 1.0612621577904042,
+      "learning_rate": 7.078829422620618e-06,
+      "loss": 1.1728,
+      "step": 5370
+    },
+    {
+      "epoch": 0.21277857976230496,
+      "grad_norm": 1.1124533314306806,
+      "learning_rate": 7.092011600316373e-06,
+      "loss": 1.1843,
+      "step": 5380
+    },
+    {
+      "epoch": 0.213174078981194,
+      "grad_norm": 1.0477804215375393,
+      "learning_rate": 7.105193778012128e-06,
+      "loss": 1.1811,
+      "step": 5390
+    },
+    {
+      "epoch": 0.21356957820008304,
+      "grad_norm": 1.0372335636733403,
+      "learning_rate": 7.118375955707884e-06,
+      "loss": 1.1951,
+      "step": 5400
+    },
+    {
+      "epoch": 0.2139650774189721,
+      "grad_norm": 1.1481534276974998,
+      "learning_rate": 7.131558133403638e-06,
+      "loss": 1.1947,
+      "step": 5410
+    },
+    {
+      "epoch": 0.21436057663786115,
+      "grad_norm": 1.1697544411860625,
+      "learning_rate": 7.144740311099394e-06,
+      "loss": 1.1953,
+      "step": 5420
+    },
+    {
+      "epoch": 0.2147560758567502,
+      "grad_norm": 1.1311784869070132,
+      "learning_rate": 7.15792248879515e-06,
+      "loss": 1.1787,
+      "step": 5430
+    },
+    {
+      "epoch": 0.21515157507563923,
+      "grad_norm": 1.2282071696512284,
+      "learning_rate": 7.171104666490904e-06,
+      "loss": 1.1759,
+      "step": 5440
+    },
+    {
+      "epoch": 0.21554707429452827,
+      "grad_norm": 1.177619393772581,
+      "learning_rate": 7.18428684418666e-06,
+      "loss": 1.1885,
+      "step": 5450
+    },
+    {
+      "epoch": 0.2159425735134173,
+      "grad_norm": 1.090265552981861,
+      "learning_rate": 7.197469021882415e-06,
+      "loss": 1.1939,
+      "step": 5460
+    },
+    {
+      "epoch": 0.21633807273230635,
+      "grad_norm": 1.142632763613256,
+      "learning_rate": 7.210651199578171e-06,
+      "loss": 1.1822,
+      "step": 5470
+    },
+    {
+      "epoch": 0.2167335719511954,
+      "grad_norm": 1.0718320903292262,
+      "learning_rate": 7.223833377273927e-06,
+      "loss": 1.1939,
+      "step": 5480
+    },
+    {
+      "epoch": 0.21712907117008443,
+      "grad_norm": 1.176301180636614,
+      "learning_rate": 7.237015554969682e-06,
+      "loss": 1.1907,
+      "step": 5490
+    },
+    {
+      "epoch": 0.2175245703889735,
+      "grad_norm": 1.1886012961434531,
+      "learning_rate": 7.250197732665437e-06,
+      "loss": 1.1826,
+      "step": 5500
+    },
+    {
+      "epoch": 0.21792006960786253,
+      "grad_norm": 1.0984046476010765,
+      "learning_rate": 7.263379910361193e-06,
+      "loss": 1.1889,
+      "step": 5510
+    },
+    {
+      "epoch": 0.21831556882675157,
+      "grad_norm": 1.1404654332642885,
+      "learning_rate": 7.276562088056947e-06,
+      "loss": 1.1849,
+      "step": 5520
+    },
+    {
+      "epoch": 0.2187110680456406,
+      "grad_norm": 1.4914170937444111,
+      "learning_rate": 7.289744265752703e-06,
+      "loss": 1.2077,
+      "step": 5530
+    },
+    {
+      "epoch": 0.21910656726452965,
+      "grad_norm": 1.0116134814623248,
+      "learning_rate": 7.3029264434484585e-06,
+      "loss": 1.1896,
+      "step": 5540
+    },
+    {
+      "epoch": 0.2195020664834187,
+      "grad_norm": 1.0576683251568944,
+      "learning_rate": 7.316108621144213e-06,
+      "loss": 1.1669,
+      "step": 5550
+    },
+    {
+      "epoch": 0.21989756570230773,
+      "grad_norm": 1.176212516636309,
+      "learning_rate": 7.329290798839969e-06,
+      "loss": 1.1856,
+      "step": 5560
+    },
+    {
+      "epoch": 0.22029306492119677,
+      "grad_norm": 1.0528687783871176,
+      "learning_rate": 7.342472976535724e-06,
+      "loss": 1.1827,
+      "step": 5570
+    },
+    {
+      "epoch": 0.22068856414008584,
+      "grad_norm": 1.0778721372671816,
+      "learning_rate": 7.355655154231479e-06,
+      "loss": 1.1782,
+      "step": 5580
+    },
+    {
+      "epoch": 0.22108406335897487,
+      "grad_norm": 1.1596046837857599,
+      "learning_rate": 7.368837331927236e-06,
+      "loss": 1.1887,
+      "step": 5590
+    },
+    {
+      "epoch": 0.22147956257786391,
+      "grad_norm": 1.1386802424246674,
+      "learning_rate": 7.38201950962299e-06,
+      "loss": 1.183,
+      "step": 5600
+    },
+    {
+      "epoch": 0.22187506179675295,
+      "grad_norm": 1.1391482737850553,
+      "learning_rate": 7.395201687318746e-06,
+      "loss": 1.1697,
+      "step": 5610
+    },
+    {
+      "epoch": 0.222270561015642,
+      "grad_norm": 1.1660626439813442,
+      "learning_rate": 7.4083838650145015e-06,
+      "loss": 1.1932,
+      "step": 5620
+    },
+    {
+      "epoch": 0.22266606023453103,
+      "grad_norm": 1.1555188872700974,
+      "learning_rate": 7.421566042710256e-06,
+      "loss": 1.1763,
+      "step": 5630
+    },
+    {
+      "epoch": 0.22306155945342007,
+      "grad_norm": 1.0991845739762487,
+      "learning_rate": 7.434748220406012e-06,
+      "loss": 1.1853,
+      "step": 5640
+    },
+    {
+      "epoch": 0.2234570586723091,
+      "grad_norm": 1.0182843488423188,
+      "learning_rate": 7.447930398101767e-06,
+      "loss": 1.1988,
+      "step": 5650
+    },
+    {
+      "epoch": 0.22385255789119818,
+      "grad_norm": 1.1130398385272844,
+      "learning_rate": 7.461112575797522e-06,
+      "loss": 1.1913,
+      "step": 5660
+    },
+    {
+      "epoch": 0.22424805711008722,
+      "grad_norm": 0.9908546857530702,
+      "learning_rate": 7.474294753493278e-06,
+      "loss": 1.1704,
+      "step": 5670
+    },
+    {
+      "epoch": 0.22464355632897626,
+      "grad_norm": 1.067589359758871,
+      "learning_rate": 7.4874769311890325e-06,
+      "loss": 1.1913,
+      "step": 5680
+    },
+    {
+      "epoch": 0.2250390555478653,
+      "grad_norm": 1.1105660597118736,
+      "learning_rate": 7.500659108884788e-06,
+      "loss": 1.1759,
+      "step": 5690
+    },
+    {
+      "epoch": 0.22543455476675434,
+      "grad_norm": 1.041106831265699,
+      "learning_rate": 7.513841286580543e-06,
+      "loss": 1.1701,
+      "step": 5700
+    },
+    {
+      "epoch": 0.22583005398564338,
+      "grad_norm": 1.2642404375824563,
+      "learning_rate": 7.527023464276299e-06,
+      "loss": 1.1876,
+      "step": 5710
+    },
+    {
+      "epoch": 0.22622555320453241,
+      "grad_norm": 1.0686131672242405,
+      "learning_rate": 7.540205641972055e-06,
+      "loss": 1.1987,
+      "step": 5720
+    },
+    {
+      "epoch": 0.22662105242342145,
+      "grad_norm": 1.0928903190558983,
+      "learning_rate": 7.5533878196678104e-06,
+      "loss": 1.1769,
+      "step": 5730
+    },
+    {
+      "epoch": 0.2270165516423105,
+      "grad_norm": 1.1249045038505108,
+      "learning_rate": 7.566569997363565e-06,
+      "loss": 1.1777,
+      "step": 5740
+    },
+    {
+      "epoch": 0.22741205086119956,
+      "grad_norm": 1.053302381764884,
+      "learning_rate": 7.579752175059321e-06,
+      "loss": 1.1815,
+      "step": 5750
+    },
+    {
+      "epoch": 0.2278075500800886,
+      "grad_norm": 1.06009376731742,
+      "learning_rate": 7.5929343527550755e-06,
+      "loss": 1.1806,
+      "step": 5760
+    },
+    {
+      "epoch": 0.22820304929897764,
+      "grad_norm": 1.1668357715197877,
+      "learning_rate": 7.606116530450831e-06,
+      "loss": 1.1827,
+      "step": 5770
+    },
+    {
+      "epoch": 0.22859854851786668,
+      "grad_norm": 1.1374322414862652,
+      "learning_rate": 7.619298708146587e-06,
+      "loss": 1.1772,
+      "step": 5780
+    },
+    {
+      "epoch": 0.22899404773675572,
+      "grad_norm": 1.1515430768494321,
+      "learning_rate": 7.632480885842342e-06,
+      "loss": 1.1741,
+      "step": 5790
+    },
+    {
+      "epoch": 0.22938954695564476,
+      "grad_norm": 1.0747868397069782,
+      "learning_rate": 7.645663063538097e-06,
+      "loss": 1.1721,
+      "step": 5800
+    },
+    {
+      "epoch": 0.2297850461745338,
+      "grad_norm": 1.0802652471530219,
+      "learning_rate": 7.658845241233852e-06,
+      "loss": 1.1897,
+      "step": 5810
+    },
+    {
+      "epoch": 0.23018054539342284,
+      "grad_norm": 1.0518838605645595,
+      "learning_rate": 7.672027418929607e-06,
+      "loss": 1.1908,
+      "step": 5820
+    },
+    {
+      "epoch": 0.2305760446123119,
+      "grad_norm": 1.2135760341685462,
+      "learning_rate": 7.685209596625363e-06,
+      "loss": 1.1836,
+      "step": 5830
+    },
+    {
+      "epoch": 0.23097154383120094,
+      "grad_norm": 0.9999715659492281,
+      "learning_rate": 7.69839177432112e-06,
+      "loss": 1.1811,
+      "step": 5840
+    },
+    {
+      "epoch": 0.23136704305008998,
+      "grad_norm": 1.1261489154359952,
+      "learning_rate": 7.711573952016874e-06,
+      "loss": 1.1908,
+      "step": 5850
+    },
+    {
+      "epoch": 0.23176254226897902,
+      "grad_norm": 1.126828046943543,
+      "learning_rate": 7.724756129712629e-06,
+      "loss": 1.1793,
+      "step": 5860
+    },
+    {
+      "epoch": 0.23215804148786806,
+      "grad_norm": 1.0651831460117613,
+      "learning_rate": 7.737938307408385e-06,
+      "loss": 1.1766,
+      "step": 5870
+    },
+    {
+      "epoch": 0.2325535407067571,
+      "grad_norm": 1.106690753227979,
+      "learning_rate": 7.75112048510414e-06,
+      "loss": 1.1867,
+      "step": 5880
+    },
+    {
+      "epoch": 0.23294903992564614,
+      "grad_norm": 1.101915469968922,
+      "learning_rate": 7.764302662799895e-06,
+      "loss": 1.1716,
+      "step": 5890
+    },
+    {
+      "epoch": 0.23334453914453518,
+      "grad_norm": 1.0570199268648146,
+      "learning_rate": 7.777484840495651e-06,
+      "loss": 1.1711,
+      "step": 5900
+    },
+    {
+      "epoch": 0.23374003836342422,
+      "grad_norm": 1.1084856382867536,
+      "learning_rate": 7.790667018191406e-06,
+      "loss": 1.1812,
+      "step": 5910
+    },
+    {
+      "epoch": 0.23413553758231329,
+      "grad_norm": 1.0572418455319958,
+      "learning_rate": 7.80384919588716e-06,
+      "loss": 1.1689,
+      "step": 5920
+    },
+    {
+      "epoch": 0.23453103680120232,
+      "grad_norm": 1.2009413694561577,
+      "learning_rate": 7.817031373582915e-06,
+      "loss": 1.1739,
+      "step": 5930
+    },
+    {
+      "epoch": 0.23492653602009136,
+      "grad_norm": 1.1712303243934858,
+      "learning_rate": 7.830213551278672e-06,
+      "loss": 1.1737,
+      "step": 5940
+    },
+    {
+      "epoch": 0.2353220352389804,
+      "grad_norm": 1.1495272290453256,
+      "learning_rate": 7.843395728974427e-06,
+      "loss": 1.1746,
+      "step": 5950
+    },
+    {
+      "epoch": 0.23571753445786944,
+      "grad_norm": 1.0896631247253787,
+      "learning_rate": 7.856577906670183e-06,
+      "loss": 1.16,
+      "step": 5960
+    },
+    {
+      "epoch": 0.23611303367675848,
+      "grad_norm": 1.1222862469161627,
+      "learning_rate": 7.869760084365938e-06,
+      "loss": 1.1838,
+      "step": 5970
+    },
+    {
+      "epoch": 0.23650853289564752,
+      "grad_norm": 1.1028633684686153,
+      "learning_rate": 7.882942262061694e-06,
+      "loss": 1.1867,
+      "step": 5980
+    },
+    {
+      "epoch": 0.23690403211453656,
+      "grad_norm": 1.081945129117165,
+      "learning_rate": 7.896124439757449e-06,
+      "loss": 1.1637,
+      "step": 5990
+    },
+    {
+      "epoch": 0.23729953133342563,
+      "grad_norm": 1.0328786633600346,
+      "learning_rate": 7.909306617453204e-06,
+      "loss": 1.1871,
+      "step": 6000
+    },
+    {
+      "epoch": 0.23769503055231467,
+      "grad_norm": 1.1079988408281052,
+      "learning_rate": 7.92248879514896e-06,
+      "loss": 1.158,
+      "step": 6010
+    },
+    {
+      "epoch": 0.2380905297712037,
+      "grad_norm": 1.1122837017014315,
+      "learning_rate": 7.935670972844715e-06,
+      "loss": 1.1892,
+      "step": 6020
+    },
+    {
+      "epoch": 0.23848602899009275,
+      "grad_norm": 1.085944017501584,
+      "learning_rate": 7.94885315054047e-06,
+      "loss": 1.1792,
+      "step": 6030
+    },
+    {
+      "epoch": 0.23888152820898179,
+      "grad_norm": 1.1582566764845728,
+      "learning_rate": 7.962035328236224e-06,
+      "loss": 1.1914,
+      "step": 6040
+    },
+    {
+      "epoch": 0.23927702742787083,
+      "grad_norm": 0.9941134424761493,
+      "learning_rate": 7.97521750593198e-06,
+      "loss": 1.1674,
+      "step": 6050
+    },
+    {
+      "epoch": 0.23967252664675986,
+      "grad_norm": 1.1210103484952265,
+      "learning_rate": 7.988399683627736e-06,
+      "loss": 1.1527,
+      "step": 6060
+    },
+    {
+      "epoch": 0.2400680258656489,
+      "grad_norm": 0.9956069141554965,
+      "learning_rate": 8.00158186132349e-06,
+      "loss": 1.1825,
+      "step": 6070
+    },
+    {
+      "epoch": 0.24046352508453797,
+      "grad_norm": 1.0546877411905147,
+      "learning_rate": 8.014764039019247e-06,
+      "loss": 1.1588,
+      "step": 6080
+    },
+    {
+      "epoch": 0.240859024303427,
+      "grad_norm": 1.0720512777738658,
+      "learning_rate": 8.027946216715003e-06,
+      "loss": 1.1608,
+      "step": 6090
+    },
+    {
+      "epoch": 0.24125452352231605,
+      "grad_norm": 1.1327424990841461,
+      "learning_rate": 8.041128394410758e-06,
+      "loss": 1.18,
+      "step": 6100
+    },
+    {
+      "epoch": 0.2416500227412051,
+      "grad_norm": 1.0246150412127613,
+      "learning_rate": 8.054310572106513e-06,
+      "loss": 1.1727,
+      "step": 6110
+    },
+    {
+      "epoch": 0.24204552196009413,
+      "grad_norm": 1.0547704761882348,
+      "learning_rate": 8.067492749802267e-06,
+      "loss": 1.2026,
+      "step": 6120
+    },
+    {
+      "epoch": 0.24244102117898317,
+      "grad_norm": 1.1540055379752396,
+      "learning_rate": 8.080674927498024e-06,
+      "loss": 1.1724,
+      "step": 6130
+    },
+    {
+      "epoch": 0.2428365203978722,
+      "grad_norm": 1.2501221411947008,
+      "learning_rate": 8.093857105193779e-06,
+      "loss": 1.1785,
+      "step": 6140
+    },
+    {
+      "epoch": 0.24323201961676125,
+      "grad_norm": 1.0614325320732845,
+      "learning_rate": 8.107039282889533e-06,
+      "loss": 1.1658,
+      "step": 6150
+    },
+    {
+      "epoch": 0.2436275188356503,
+      "grad_norm": 1.015507074925763,
+      "learning_rate": 8.12022146058529e-06,
+      "loss": 1.1764,
+      "step": 6160
+    },
+    {
+      "epoch": 0.24402301805453935,
+      "grad_norm": 1.0628681347777855,
+      "learning_rate": 8.133403638281044e-06,
+      "loss": 1.1675,
+      "step": 6170
+    },
+    {
+      "epoch": 0.2444185172734284,
+      "grad_norm": 1.0932486532840522,
+      "learning_rate": 8.1465858159768e-06,
+      "loss": 1.1709,
+      "step": 6180
+    },
+    {
+      "epoch": 0.24481401649231743,
+      "grad_norm": 1.1263862596026384,
+      "learning_rate": 8.159767993672556e-06,
+      "loss": 1.1597,
+      "step": 6190
+    },
+    {
+      "epoch": 0.24520951571120647,
+      "grad_norm": 1.0770805734265192,
+      "learning_rate": 8.172950171368312e-06,
+      "loss": 1.174,
+      "step": 6200
+    },
+    {
+      "epoch": 0.2456050149300955,
+      "grad_norm": 1.0603848160715135,
+      "learning_rate": 8.186132349064067e-06,
+      "loss": 1.1768,
+      "step": 6210
+    },
+    {
+      "epoch": 0.24600051414898455,
+      "grad_norm": 1.253670238853359,
+      "learning_rate": 8.199314526759822e-06,
+      "loss": 1.1777,
+      "step": 6220
+    },
+    {
+      "epoch": 0.2463960133678736,
+      "grad_norm": 1.0934642808612727,
+      "learning_rate": 8.212496704455576e-06,
+      "loss": 1.1915,
+      "step": 6230
+    },
+    {
+      "epoch": 0.24679151258676263,
+      "grad_norm": 0.9847850117555382,
+      "learning_rate": 8.225678882151333e-06,
+      "loss": 1.1767,
+      "step": 6240
+    },
+    {
+      "epoch": 0.2471870118056517,
+      "grad_norm": 1.211193544162175,
+      "learning_rate": 8.238861059847087e-06,
+      "loss": 1.1665,
+      "step": 6250
+    },
+    {
+      "epoch": 0.24758251102454074,
+      "grad_norm": 1.0847733181321655,
+      "learning_rate": 8.252043237542842e-06,
+      "loss": 1.1619,
+      "step": 6260
+    },
+    {
+      "epoch": 0.24797801024342978,
+      "grad_norm": 1.1025174074927875,
+      "learning_rate": 8.265225415238599e-06,
+      "loss": 1.1701,
+      "step": 6270
+    },
+    {
+      "epoch": 0.24837350946231881,
+      "grad_norm": 1.1692208486659628,
+      "learning_rate": 8.278407592934353e-06,
+      "loss": 1.1697,
+      "step": 6280
+    },
+    {
+      "epoch": 0.24876900868120785,
+      "grad_norm": 1.0073475785556216,
+      "learning_rate": 8.291589770630108e-06,
+      "loss": 1.1734,
+      "step": 6290
+    },
+    {
+      "epoch": 0.2491645079000969,
+      "grad_norm": 1.0234624670140824,
+      "learning_rate": 8.304771948325865e-06,
+      "loss": 1.1854,
+      "step": 6300
+    },
+    {
+      "epoch": 0.24956000711898593,
+      "grad_norm": 1.0951585384234093,
+      "learning_rate": 8.31795412602162e-06,
+      "loss": 1.1762,
+      "step": 6310
+    },
+    {
+      "epoch": 0.24995550633787497,
+      "grad_norm": 1.0873306151039657,
+      "learning_rate": 8.331136303717374e-06,
+      "loss": 1.1552,
+      "step": 6320
+    },
+    {
+      "epoch": 0.25035100555676404,
+      "grad_norm": 1.0240120393841852,
+      "learning_rate": 8.34431848141313e-06,
+      "loss": 1.1802,
+      "step": 6330
+    },
+    {
+      "epoch": 0.2507465047756531,
+      "grad_norm": 1.034552069138358,
+      "learning_rate": 8.357500659108885e-06,
+      "loss": 1.1668,
+      "step": 6340
+    },
+    {
+      "epoch": 0.2511420039945421,
+      "grad_norm": 1.1197005220763687,
+      "learning_rate": 8.370682836804642e-06,
+      "loss": 1.1738,
+      "step": 6350
+    },
+    {
+      "epoch": 0.25153750321343116,
+      "grad_norm": 1.119668682368083,
+      "learning_rate": 8.383865014500396e-06,
+      "loss": 1.1583,
+      "step": 6360
+    },
+    {
+      "epoch": 0.2519330024323202,
+      "grad_norm": 1.2075231131828583,
+      "learning_rate": 8.397047192196151e-06,
+      "loss": 1.1708,
+      "step": 6370
+    },
+    {
+      "epoch": 0.25232850165120924,
+      "grad_norm": 1.1377469506231173,
+      "learning_rate": 8.410229369891908e-06,
+      "loss": 1.1728,
+      "step": 6380
+    },
+    {
+      "epoch": 0.2527240008700983,
+      "grad_norm": 1.1178120669656715,
+      "learning_rate": 8.423411547587662e-06,
+      "loss": 1.1694,
+      "step": 6390
+    },
+    {
+      "epoch": 0.2531195000889873,
+      "grad_norm": 1.0779353890928898,
+      "learning_rate": 8.436593725283417e-06,
+      "loss": 1.1471,
+      "step": 6400
+    },
+    {
+      "epoch": 0.25351499930787635,
+      "grad_norm": 0.9953082219668707,
+      "learning_rate": 8.449775902979172e-06,
+      "loss": 1.197,
+      "step": 6410
+    },
+    {
+      "epoch": 0.2539104985267654,
+      "grad_norm": 1.0101990986198501,
+      "learning_rate": 8.462958080674928e-06,
+      "loss": 1.1553,
+      "step": 6420
+    },
+    {
+      "epoch": 0.25430599774565443,
+      "grad_norm": 1.157969204894414,
+      "learning_rate": 8.476140258370683e-06,
+      "loss": 1.1623,
+      "step": 6430
+    },
+    {
+      "epoch": 0.2547014969645435,
+      "grad_norm": 1.0488965315858676,
+      "learning_rate": 8.489322436066438e-06,
+      "loss": 1.1451,
+      "step": 6440
+    },
+    {
+      "epoch": 0.2550969961834325,
+      "grad_norm": 1.1041395386253063,
+      "learning_rate": 8.502504613762194e-06,
+      "loss": 1.1659,
+      "step": 6450
+    },
+    {
+      "epoch": 0.2554924954023216,
+      "grad_norm": 1.1581646432867871,
+      "learning_rate": 8.51568679145795e-06,
+      "loss": 1.1766,
+      "step": 6460
+    },
+    {
+      "epoch": 0.25588799462121065,
+      "grad_norm": 1.0980356702568528,
+      "learning_rate": 8.528868969153705e-06,
+      "loss": 1.1785,
+      "step": 6470
+    },
+    {
+      "epoch": 0.2562834938400997,
+      "grad_norm": 1.1861087679667697,
+      "learning_rate": 8.54205114684946e-06,
+      "loss": 1.1585,
+      "step": 6480
+    },
+    {
+      "epoch": 0.2566789930589887,
+      "grad_norm": 1.0219639518352461,
+      "learning_rate": 8.555233324545216e-06,
+      "loss": 1.1772,
+      "step": 6490
+    },
+    {
+      "epoch": 0.25707449227787776,
+      "grad_norm": 1.056878966123612,
+      "learning_rate": 8.568415502240971e-06,
+      "loss": 1.1671,
+      "step": 6500
+    },
+    {
+      "epoch": 0.2574699914967668,
+      "grad_norm": 1.0940660037017722,
+      "learning_rate": 8.581597679936726e-06,
+      "loss": 1.148,
+      "step": 6510
+    },
+    {
+      "epoch": 0.25786549071565584,
+      "grad_norm": 1.0674300690750151,
+      "learning_rate": 8.59477985763248e-06,
+      "loss": 1.174,
+      "step": 6520
+    },
+    {
+      "epoch": 0.2582609899345449,
+      "grad_norm": 1.168884414847323,
+      "learning_rate": 8.607962035328237e-06,
+      "loss": 1.1738,
+      "step": 6530
+    },
+    {
+      "epoch": 0.2586564891534339,
+      "grad_norm": 1.0901074571606302,
+      "learning_rate": 8.621144213023992e-06,
+      "loss": 1.1698,
+      "step": 6540
+    },
+    {
+      "epoch": 0.25905198837232296,
+      "grad_norm": 1.0081933008955009,
+      "learning_rate": 8.634326390719747e-06,
+      "loss": 1.1732,
+      "step": 6550
+    },
+    {
+      "epoch": 0.259447487591212,
+      "grad_norm": 1.0472523218672491,
+      "learning_rate": 8.647508568415503e-06,
+      "loss": 1.1944,
+      "step": 6560
+    },
+    {
+      "epoch": 0.25984298681010104,
+      "grad_norm": 1.083002614378503,
+      "learning_rate": 8.66069074611126e-06,
+      "loss": 1.1759,
+      "step": 6570
+    },
+    {
+      "epoch": 0.2602384860289901,
+      "grad_norm": 1.0337867350936691,
+      "learning_rate": 8.673872923807014e-06,
+      "loss": 1.1786,
+      "step": 6580
+    },
+    {
+      "epoch": 0.2606339852478791,
+      "grad_norm": 1.1750093553727017,
+      "learning_rate": 8.687055101502769e-06,
+      "loss": 1.152,
+      "step": 6590
+    },
+    {
+      "epoch": 0.26102948446676816,
+      "grad_norm": 1.0065370977739418,
+      "learning_rate": 8.700237279198524e-06,
+      "loss": 1.1975,
+      "step": 6600
+    },
+    {
+      "epoch": 0.2614249836856572,
+      "grad_norm": 1.1200115924116618,
+      "learning_rate": 8.71341945689428e-06,
+      "loss": 1.1589,
+      "step": 6610
+    },
+    {
+      "epoch": 0.26182048290454624,
+      "grad_norm": 1.103576799969859,
+      "learning_rate": 8.726601634590035e-06,
+      "loss": 1.1619,
+      "step": 6620
+    },
+    {
+      "epoch": 0.26221598212343533,
+      "grad_norm": 1.106156567910775,
+      "learning_rate": 8.73978381228579e-06,
+      "loss": 1.1391,
+      "step": 6630
+    },
+    {
+      "epoch": 0.26261148134232437,
+      "grad_norm": 1.1459364589766694,
+      "learning_rate": 8.752965989981546e-06,
+      "loss": 1.1438,
+      "step": 6640
+    },
+    {
+      "epoch": 0.2630069805612134,
+      "grad_norm": 1.0214619379847538,
+      "learning_rate": 8.7661481676773e-06,
+      "loss": 1.1629,
+      "step": 6650
+    },
+    {
+      "epoch": 0.26340247978010245,
+      "grad_norm": 1.0815641824393551,
+      "learning_rate": 8.779330345373056e-06,
+      "loss": 1.1714,
+      "step": 6660
+    },
+    {
+      "epoch": 0.2637979789989915,
+      "grad_norm": 1.0716313595933342,
+      "learning_rate": 8.792512523068812e-06,
+      "loss": 1.1749,
+      "step": 6670
+    },
+    {
+      "epoch": 0.26419347821788053,
+      "grad_norm": 0.9878242959846413,
+      "learning_rate": 8.805694700764567e-06,
+      "loss": 1.1618,
+      "step": 6680
+    },
+    {
+      "epoch": 0.26458897743676957,
+      "grad_norm": 1.1009137554289798,
+      "learning_rate": 8.818876878460323e-06,
+      "loss": 1.1669,
+      "step": 6690
+    },
+    {
+      "epoch": 0.2649844766556586,
+      "grad_norm": 1.1762070685344304,
+      "learning_rate": 8.832059056156078e-06,
+      "loss": 1.1741,
+      "step": 6700
+    },
+    {
+      "epoch": 0.26537997587454765,
+      "grad_norm": 1.1079271551948326,
+      "learning_rate": 8.845241233851833e-06,
+      "loss": 1.1627,
+      "step": 6710
+    },
+    {
+      "epoch": 0.2657754750934367,
+      "grad_norm": 1.1747891181397616,
+      "learning_rate": 8.858423411547589e-06,
+      "loss": 1.1501,
+      "step": 6720
+    },
+    {
+      "epoch": 0.2661709743123257,
+      "grad_norm": 1.037936510673359,
+      "learning_rate": 8.871605589243344e-06,
+      "loss": 1.1569,
+      "step": 6730
+    },
+    {
+      "epoch": 0.26656647353121476,
+      "grad_norm": 1.0870972961027363,
+      "learning_rate": 8.884787766939099e-06,
+      "loss": 1.1597,
+      "step": 6740
+    },
+    {
+      "epoch": 0.2669619727501038,
+      "grad_norm": 1.02864829135046,
+      "learning_rate": 8.897969944634855e-06,
+      "loss": 1.1566,
+      "step": 6750
+    },
+    {
+      "epoch": 0.26735747196899284,
+      "grad_norm": 1.0176116623294185,
+      "learning_rate": 8.91115212233061e-06,
+      "loss": 1.151,
+      "step": 6760
+    },
+    {
+      "epoch": 0.2677529711878819,
+      "grad_norm": 1.0600270797042877,
+      "learning_rate": 8.924334300026364e-06,
+      "loss": 1.1648,
+      "step": 6770
+    },
+    {
+      "epoch": 0.2681484704067709,
+      "grad_norm": 1.0647938834849626,
+      "learning_rate": 8.937516477722121e-06,
+      "loss": 1.1482,
+      "step": 6780
+    },
+    {
+      "epoch": 0.26854396962565996,
+      "grad_norm": 0.9961032924522476,
+      "learning_rate": 8.950698655417876e-06,
+      "loss": 1.1587,
+      "step": 6790
+    },
+    {
+      "epoch": 0.26893946884454906,
+      "grad_norm": 1.188830341978806,
+      "learning_rate": 8.96388083311363e-06,
+      "loss": 1.1593,
+      "step": 6800
+    },
+    {
+      "epoch": 0.2693349680634381,
+      "grad_norm": 1.1656441459820128,
+      "learning_rate": 8.977063010809385e-06,
+      "loss": 1.163,
+      "step": 6810
+    },
+    {
+      "epoch": 0.26973046728232714,
+      "grad_norm": 1.002892323701781,
+      "learning_rate": 8.990245188505142e-06,
+      "loss": 1.1418,
+      "step": 6820
+    },
+    {
+      "epoch": 0.2701259665012162,
+      "grad_norm": 1.0445255581844506,
+      "learning_rate": 9.003427366200898e-06,
+      "loss": 1.1692,
+      "step": 6830
+    },
+    {
+      "epoch": 0.2705214657201052,
+      "grad_norm": 1.1601855371753287,
+      "learning_rate": 9.016609543896653e-06,
+      "loss": 1.1587,
+      "step": 6840
+    },
+    {
+      "epoch": 0.27091696493899425,
+      "grad_norm": 1.0550325955738908,
+      "learning_rate": 9.029791721592408e-06,
+      "loss": 1.159,
+      "step": 6850
+    },
+    {
+      "epoch": 0.2713124641578833,
+      "grad_norm": 1.0481380516758225,
+      "learning_rate": 9.042973899288164e-06,
+      "loss": 1.1727,
+      "step": 6860
+    },
+    {
+      "epoch": 0.27170796337677233,
+      "grad_norm": 1.0508674424021534,
+      "learning_rate": 9.056156076983919e-06,
+      "loss": 1.1539,
+      "step": 6870
+    },
+    {
+      "epoch": 0.27210346259566137,
+      "grad_norm": 0.9900021740965856,
+      "learning_rate": 9.069338254679673e-06,
+      "loss": 1.1498,
+      "step": 6880
+    },
+    {
+      "epoch": 0.2724989618145504,
+      "grad_norm": 1.1802460494125215,
+      "learning_rate": 9.08252043237543e-06,
+      "loss": 1.153,
+      "step": 6890
+    },
+    {
+      "epoch": 0.27289446103343945,
+      "grad_norm": 1.0700598518295985,
+      "learning_rate": 9.095702610071185e-06,
+      "loss": 1.171,
+      "step": 6900
+    },
+    {
+      "epoch": 0.2732899602523285,
+      "grad_norm": 1.067586376504746,
+      "learning_rate": 9.10888478776694e-06,
+      "loss": 1.1599,
+      "step": 6910
+    },
+    {
+      "epoch": 0.27368545947121753,
+      "grad_norm": 1.0839229351108237,
+      "learning_rate": 9.122066965462694e-06,
+      "loss": 1.1475,
+      "step": 6920
+    },
+    {
+      "epoch": 0.27408095869010657,
+      "grad_norm": 1.1326930276551868,
+      "learning_rate": 9.13524914315845e-06,
+      "loss": 1.169,
+      "step": 6930
+    },
+    {
+      "epoch": 0.2744764579089956,
+      "grad_norm": 1.0496366375123172,
+      "learning_rate": 9.148431320854207e-06,
+      "loss": 1.1527,
+      "step": 6940
+    },
+    {
+      "epoch": 0.27487195712788465,
+      "grad_norm": 1.0757628236657761,
+      "learning_rate": 9.161613498549962e-06,
+      "loss": 1.1547,
+      "step": 6950
+    },
+    {
+      "epoch": 0.27526745634677374,
+      "grad_norm": 1.1642124923842847,
+      "learning_rate": 9.174795676245716e-06,
+      "loss": 1.1698,
+      "step": 6960
+    },
+    {
+      "epoch": 0.2756629555656628,
+      "grad_norm": 1.0200901316389057,
+      "learning_rate": 9.187977853941473e-06,
+      "loss": 1.1428,
+      "step": 6970
+    },
+    {
+      "epoch": 0.2760584547845518,
+      "grad_norm": 1.1191047918723218,
+      "learning_rate": 9.201160031637228e-06,
+      "loss": 1.1588,
+      "step": 6980
+    },
+    {
+      "epoch": 0.27645395400344086,
+      "grad_norm": 1.0471658185669384,
+      "learning_rate": 9.214342209332982e-06,
+      "loss": 1.1408,
+      "step": 6990
+    },
+    {
+      "epoch": 0.2768494532223299,
+      "grad_norm": 0.9956419350846413,
+      "learning_rate": 9.227524387028737e-06,
+      "loss": 1.1491,
+      "step": 7000
+    },
+    {
+      "epoch": 0.27724495244121894,
+      "grad_norm": 1.0399861975290758,
+      "learning_rate": 9.240706564724494e-06,
+      "loss": 1.1634,
+      "step": 7010
+    },
+    {
+      "epoch": 0.277640451660108,
+      "grad_norm": 1.0338356619905011,
+      "learning_rate": 9.253888742420248e-06,
+      "loss": 1.1497,
+      "step": 7020
+    },
+    {
+      "epoch": 0.278035950878997,
+      "grad_norm": 1.1210126903669186,
+      "learning_rate": 9.267070920116003e-06,
+      "loss": 1.1569,
+      "step": 7030
+    },
+    {
+      "epoch": 0.27843145009788606,
+      "grad_norm": 1.0370097724964364,
+      "learning_rate": 9.28025309781176e-06,
+      "loss": 1.1504,
+      "step": 7040
+    },
+    {
+      "epoch": 0.2788269493167751,
+      "grad_norm": 1.1003889473115938,
+      "learning_rate": 9.293435275507514e-06,
+      "loss": 1.1358,
+      "step": 7050
+    },
+    {
+      "epoch": 0.27922244853566414,
+      "grad_norm": 1.0337426460445318,
+      "learning_rate": 9.30661745320327e-06,
+      "loss": 1.1531,
+      "step": 7060
+    },
+    {
+      "epoch": 0.2796179477545532,
+      "grad_norm": 1.087073093069295,
+      "learning_rate": 9.319799630899025e-06,
+      "loss": 1.1559,
+      "step": 7070
+    },
+    {
+      "epoch": 0.2800134469734422,
+      "grad_norm": 1.074257522163238,
+      "learning_rate": 9.332981808594782e-06,
+      "loss": 1.1444,
+      "step": 7080
+    },
+    {
+      "epoch": 0.28040894619233125,
+      "grad_norm": 0.9866871639209648,
+      "learning_rate": 9.346163986290537e-06,
+      "loss": 1.1495,
+      "step": 7090
+    },
+    {
+      "epoch": 0.2808044454112203,
+      "grad_norm": 1.1689281642663236,
+      "learning_rate": 9.359346163986291e-06,
+      "loss": 1.1625,
+      "step": 7100
+    },
+    {
+      "epoch": 0.28119994463010933,
+      "grad_norm": 1.1050639124194632,
+      "learning_rate": 9.372528341682046e-06,
+      "loss": 1.1667,
+      "step": 7110
+    },
+    {
+      "epoch": 0.2815954438489984,
+      "grad_norm": 0.9995140101531379,
+      "learning_rate": 9.385710519377802e-06,
+      "loss": 1.1497,
+      "step": 7120
+    },
+    {
+      "epoch": 0.28199094306788747,
+      "grad_norm": 1.0431243905150764,
+      "learning_rate": 9.398892697073557e-06,
+      "loss": 1.1503,
+      "step": 7130
+    },
+    {
+      "epoch": 0.2823864422867765,
+      "grad_norm": 1.046178458001966,
+      "learning_rate": 9.412074874769312e-06,
+      "loss": 1.1525,
+      "step": 7140
+    },
+    {
+      "epoch": 0.28278194150566555,
+      "grad_norm": 1.0699089833697082,
+      "learning_rate": 9.425257052465068e-06,
+      "loss": 1.1579,
+      "step": 7150
+    },
+    {
+      "epoch": 0.2831774407245546,
+      "grad_norm": 1.0047040437453831,
+      "learning_rate": 9.438439230160823e-06,
+      "loss": 1.1515,
+      "step": 7160
+    },
+    {
+      "epoch": 0.2835729399434436,
+      "grad_norm": 1.0708900004693,
+      "learning_rate": 9.451621407856578e-06,
+      "loss": 1.1586,
+      "step": 7170
+    },
+    {
+      "epoch": 0.28396843916233266,
+      "grad_norm": 1.0544833393424204,
+      "learning_rate": 9.464803585552334e-06,
+      "loss": 1.1338,
+      "step": 7180
+    },
+    {
+      "epoch": 0.2843639383812217,
+      "grad_norm": 1.0159631829306173,
+      "learning_rate": 9.477985763248089e-06,
+      "loss": 1.1386,
+      "step": 7190
+    },
+    {
+      "epoch": 0.28475943760011074,
+      "grad_norm": 1.102776336382458,
+      "learning_rate": 9.491167940943845e-06,
+      "loss": 1.1336,
+      "step": 7200
+    },
+    {
+      "epoch": 0.2851549368189998,
+      "grad_norm": 1.1372067535003214,
+      "learning_rate": 9.5043501186396e-06,
+      "loss": 1.1579,
+      "step": 7210
+    },
+    {
+      "epoch": 0.2855504360378888,
+      "grad_norm": 0.9649271222588772,
+      "learning_rate": 9.517532296335355e-06,
+      "loss": 1.1524,
+      "step": 7220
+    },
+    {
+      "epoch": 0.28594593525677786,
+      "grad_norm": 1.0182131376804553,
+      "learning_rate": 9.530714474031111e-06,
+      "loss": 1.1529,
+      "step": 7230
+    },
+    {
+      "epoch": 0.2863414344756669,
+      "grad_norm": 1.1246317122292944,
+      "learning_rate": 9.543896651726866e-06,
+      "loss": 1.1656,
+      "step": 7240
+    },
+    {
+      "epoch": 0.28673693369455594,
+      "grad_norm": 1.0689858875408487,
+      "learning_rate": 9.557078829422621e-06,
+      "loss": 1.1454,
+      "step": 7250
+    },
+    {
+      "epoch": 0.287132432913445,
+      "grad_norm": 1.0257278038771898,
+      "learning_rate": 9.570261007118377e-06,
+      "loss": 1.1582,
+      "step": 7260
+    },
+    {
+      "epoch": 0.287527932132334,
+      "grad_norm": 1.1166064339430195,
+      "learning_rate": 9.583443184814132e-06,
+      "loss": 1.155,
+      "step": 7270
+    },
+    {
+      "epoch": 0.28792343135122306,
+      "grad_norm": 1.1006012516646528,
+      "learning_rate": 9.596625362509887e-06,
+      "loss": 1.1435,
+      "step": 7280
+    },
+    {
+      "epoch": 0.2883189305701121,
+      "grad_norm": 1.0630534117150428,
+      "learning_rate": 9.609807540205642e-06,
+      "loss": 1.1535,
+      "step": 7290
+    },
+    {
+      "epoch": 0.2887144297890012,
+      "grad_norm": 0.9792269528417479,
+      "learning_rate": 9.622989717901398e-06,
+      "loss": 1.1738,
+      "step": 7300
+    },
+    {
+      "epoch": 0.28910992900789023,
+      "grad_norm": 1.0055040104078166,
+      "learning_rate": 9.636171895597154e-06,
+      "loss": 1.1681,
+      "step": 7310
+    },
+    {
+      "epoch": 0.28950542822677927,
+      "grad_norm": 1.035042302985885,
+      "learning_rate": 9.649354073292909e-06,
+      "loss": 1.1428,
+      "step": 7320
+    },
+    {
+      "epoch": 0.2899009274456683,
+      "grad_norm": 1.0450520011045796,
+      "learning_rate": 9.662536250988664e-06,
+      "loss": 1.1443,
+      "step": 7330
+    },
+    {
+      "epoch": 0.29029642666455735,
+      "grad_norm": 1.096449465283461,
+      "learning_rate": 9.67571842868442e-06,
+      "loss": 1.1716,
+      "step": 7340
+    },
+    {
+      "epoch": 0.2906919258834464,
+      "grad_norm": 1.0544634958059502,
+      "learning_rate": 9.688900606380175e-06,
+      "loss": 1.1576,
+      "step": 7350
+    },
+    {
+      "epoch": 0.29108742510233543,
+      "grad_norm": 0.9878027761108865,
+      "learning_rate": 9.70208278407593e-06,
+      "loss": 1.1575,
+      "step": 7360
+    },
+    {
+      "epoch": 0.29148292432122447,
+      "grad_norm": 1.0749500885717254,
+      "learning_rate": 9.715264961771686e-06,
+      "loss": 1.1507,
+      "step": 7370
+    },
+    {
+      "epoch": 0.2918784235401135,
+      "grad_norm": 1.1787827624737508,
+      "learning_rate": 9.728447139467441e-06,
+      "loss": 1.1478,
+      "step": 7380
+    },
+    {
+      "epoch": 0.29227392275900255,
+      "grad_norm": 1.1486963767973906,
+      "learning_rate": 9.741629317163196e-06,
+      "loss": 1.1243,
+      "step": 7390
+    },
+    {
+      "epoch": 0.2926694219778916,
+      "grad_norm": 1.0523146289593874,
+      "learning_rate": 9.75481149485895e-06,
+      "loss": 1.1423,
+      "step": 7400
+    },
+    {
+      "epoch": 0.2930649211967806,
+      "grad_norm": 1.0554276018279876,
+      "learning_rate": 9.767993672554707e-06,
+      "loss": 1.1471,
+      "step": 7410
+    },
+    {
+      "epoch": 0.29346042041566966,
+      "grad_norm": 1.10403398801149,
+      "learning_rate": 9.781175850250462e-06,
+      "loss": 1.1618,
+      "step": 7420
+    },
+    {
+      "epoch": 0.2938559196345587,
+      "grad_norm": 1.0766020677378985,
+      "learning_rate": 9.794358027946218e-06,
+      "loss": 1.1586,
+      "step": 7430
+    },
+    {
+      "epoch": 0.29425141885344774,
+      "grad_norm": 1.0753900326857253,
+      "learning_rate": 9.807540205641973e-06,
+      "loss": 1.1567,
+      "step": 7440
+    },
+    {
+      "epoch": 0.2946469180723368,
+      "grad_norm": 1.0239699626505532,
+      "learning_rate": 9.82072238333773e-06,
+      "loss": 1.1715,
+      "step": 7450
+    },
+    {
+      "epoch": 0.2950424172912258,
+      "grad_norm": 1.0402348596386024,
+      "learning_rate": 9.833904561033484e-06,
+      "loss": 1.1471,
+      "step": 7460
+    },
+    {
+      "epoch": 0.2954379165101149,
+      "grad_norm": 1.0817951163292203,
+      "learning_rate": 9.847086738729239e-06,
+      "loss": 1.1608,
+      "step": 7470
+    },
+    {
+      "epoch": 0.29583341572900396,
+      "grad_norm": 1.0308457244603786,
+      "learning_rate": 9.860268916424993e-06,
+      "loss": 1.1497,
+      "step": 7480
+    },
+    {
+      "epoch": 0.296228914947893,
+      "grad_norm": 1.0679981517404136,
+      "learning_rate": 9.87345109412075e-06,
+      "loss": 1.1527,
+      "step": 7490
+    },
+    {
+      "epoch": 0.29662441416678204,
+      "grad_norm": 1.13970432943047,
+      "learning_rate": 9.886633271816505e-06,
+      "loss": 1.1589,
+      "step": 7500
+    },
+    {
+      "epoch": 0.2970199133856711,
+      "grad_norm": 1.039400451031927,
+      "learning_rate": 9.89981544951226e-06,
+      "loss": 1.1567,
+      "step": 7510
+    },
+    {
+      "epoch": 0.2974154126045601,
+      "grad_norm": 1.0206511911027096,
+      "learning_rate": 9.912997627208016e-06,
+      "loss": 1.1498,
+      "step": 7520
+    },
+    {
+      "epoch": 0.29781091182344915,
+      "grad_norm": 1.0835664350756524,
+      "learning_rate": 9.92617980490377e-06,
+      "loss": 1.1374,
+      "step": 7530
+    },
+    {
+      "epoch": 0.2982064110423382,
+      "grad_norm": 1.0142405882753354,
+      "learning_rate": 9.939361982599525e-06,
+      "loss": 1.1528,
+      "step": 7540
+    },
+    {
+      "epoch": 0.29860191026122723,
+      "grad_norm": 1.0302749761246293,
+      "learning_rate": 9.952544160295282e-06,
+      "loss": 1.1481,
+      "step": 7550
+    },
+    {
+      "epoch": 0.29899740948011627,
+      "grad_norm": 1.1212356445302314,
+      "learning_rate": 9.965726337991038e-06,
+      "loss": 1.1401,
+      "step": 7560
+    },
+    {
+      "epoch": 0.2993929086990053,
+      "grad_norm": 1.105685089454397,
+      "learning_rate": 9.978908515686793e-06,
+      "loss": 1.1362,
+      "step": 7570
+    },
+    {
+      "epoch": 0.29978840791789435,
+      "grad_norm": 1.0348685039411056,
+      "learning_rate": 9.992090693382548e-06,
+      "loss": 1.1518,
+      "step": 7580
+    },
+    {
+      "epoch": 0.3001839071367834,
+      "grad_norm": 1.0772774004761567,
+      "learning_rate": 9.999999915286853e-06,
+      "loss": 1.1454,
+      "step": 7590
+    },
+    {
+      "epoch": 0.30057940635567243,
+      "grad_norm": 1.0868277976509395,
+      "learning_rate": 9.999998962263965e-06,
+      "loss": 1.1459,
+      "step": 7600
+    },
+    {
+      "epoch": 0.30097490557456147,
+      "grad_norm": 1.0396137891404242,
+      "learning_rate": 9.99999695032696e-06,
+      "loss": 1.1415,
+      "step": 7610
+    },
+    {
+      "epoch": 0.3013704047934505,
+      "grad_norm": 1.0467776564054676,
+      "learning_rate": 9.999993879476262e-06,
+      "loss": 1.1264,
+      "step": 7620
+    },
+    {
+      "epoch": 0.3017659040123396,
+      "grad_norm": 1.0919389356037232,
+      "learning_rate": 9.99998974971252e-06,
+      "loss": 1.1527,
+      "step": 7630
+    },
+    {
+      "epoch": 0.30216140323122864,
+      "grad_norm": 1.0133468523450062,
+      "learning_rate": 9.999984561036611e-06,
+      "loss": 1.1346,
+      "step": 7640
+    },
+    {
+      "epoch": 0.3025569024501177,
+      "grad_norm": 1.0173614276530554,
+      "learning_rate": 9.999978313449632e-06,
+      "loss": 1.1513,
+      "step": 7650
+    },
+    {
+      "epoch": 0.3029524016690067,
+      "grad_norm": 1.0073447720518742,
+      "learning_rate": 9.999971006952907e-06,
+      "loss": 1.1495,
+      "step": 7660
+    },
+    {
+      "epoch": 0.30334790088789576,
+      "grad_norm": 1.1021491774529075,
+      "learning_rate": 9.999962641547982e-06,
+      "loss": 1.1464,
+      "step": 7670
+    },
+    {
+      "epoch": 0.3037434001067848,
+      "grad_norm": 1.1547605405148325,
+      "learning_rate": 9.999953217236631e-06,
+      "loss": 1.1328,
+      "step": 7680
+    },
+    {
+      "epoch": 0.30413889932567384,
+      "grad_norm": 1.0611314072445008,
+      "learning_rate": 9.999942734020848e-06,
+      "loss": 1.1576,
+      "step": 7690
+    },
+    {
+      "epoch": 0.3045343985445629,
+      "grad_norm": 1.1532099248265681,
+      "learning_rate": 9.999931191902855e-06,
+      "loss": 1.1518,
+      "step": 7700
+    },
+    {
+      "epoch": 0.3049298977634519,
+      "grad_norm": 1.0540416158600223,
+      "learning_rate": 9.999918590885093e-06,
+      "loss": 1.1315,
+      "step": 7710
+    },
+    {
+      "epoch": 0.30532539698234096,
+      "grad_norm": 1.0225290209213587,
+      "learning_rate": 9.999904930970234e-06,
+      "loss": 1.1513,
+      "step": 7720
+    },
+    {
+      "epoch": 0.30572089620123,
+      "grad_norm": 1.0187755272406196,
+      "learning_rate": 9.999890212161172e-06,
+      "loss": 1.1581,
+      "step": 7730
+    },
+    {
+      "epoch": 0.30611639542011904,
+      "grad_norm": 1.0854726225349205,
+      "learning_rate": 9.999874434461021e-06,
+      "loss": 1.1554,
+      "step": 7740
+    },
+    {
+      "epoch": 0.3065118946390081,
+      "grad_norm": 1.0921745850081388,
+      "learning_rate": 9.999857597873123e-06,
+      "loss": 1.1368,
+      "step": 7750
+    },
+    {
+      "epoch": 0.3069073938578971,
+      "grad_norm": 1.078034111228919,
+      "learning_rate": 9.999839702401044e-06,
+      "loss": 1.1668,
+      "step": 7760
+    },
+    {
+      "epoch": 0.30730289307678615,
+      "grad_norm": 1.169657677509384,
+      "learning_rate": 9.999820748048574e-06,
+      "loss": 1.1439,
+      "step": 7770
+    },
+    {
+      "epoch": 0.3076983922956752,
+      "grad_norm": 1.0794079857575394,
+      "learning_rate": 9.999800734819729e-06,
+      "loss": 1.1483,
+      "step": 7780
+    },
+    {
+      "epoch": 0.30809389151456423,
+      "grad_norm": 1.0151254316682448,
+      "learning_rate": 9.999779662718745e-06,
+      "loss": 1.1407,
+      "step": 7790
+    },
+    {
+      "epoch": 0.30848939073345333,
+      "grad_norm": 1.0435703566894419,
+      "learning_rate": 9.999757531750086e-06,
+      "loss": 1.1224,
+      "step": 7800
+    },
+    {
+      "epoch": 0.30888488995234237,
+      "grad_norm": 1.1294248679309034,
+      "learning_rate": 9.999734341918437e-06,
+      "loss": 1.1351,
+      "step": 7810
+    },
+    {
+      "epoch": 0.3092803891712314,
+      "grad_norm": 1.0285488300699832,
+      "learning_rate": 9.999710093228713e-06,
+      "loss": 1.1524,
+      "step": 7820
+    },
+    {
+      "epoch": 0.30967588839012045,
+      "grad_norm": 1.1499788419716344,
+      "learning_rate": 9.999684785686045e-06,
+      "loss": 1.1392,
+      "step": 7830
+    },
+    {
+      "epoch": 0.3100713876090095,
+      "grad_norm": 1.0681045851482305,
+      "learning_rate": 9.999658419295797e-06,
+      "loss": 1.1432,
+      "step": 7840
+    },
+    {
+      "epoch": 0.3104668868278985,
+      "grad_norm": 1.0833857535609424,
+      "learning_rate": 9.99963099406355e-06,
+      "loss": 1.1348,
+      "step": 7850
+    },
+    {
+      "epoch": 0.31086238604678756,
+      "grad_norm": 1.0396442755395066,
+      "learning_rate": 9.999602509995114e-06,
+      "loss": 1.1368,
+      "step": 7860
+    },
+    {
+      "epoch": 0.3112578852656766,
+      "grad_norm": 1.1101757808227828,
+      "learning_rate": 9.99957296709652e-06,
+      "loss": 1.1544,
+      "step": 7870
+    },
+    {
+      "epoch": 0.31165338448456564,
+      "grad_norm": 1.058708898641461,
+      "learning_rate": 9.999542365374024e-06,
+      "loss": 1.1513,
+      "step": 7880
+    },
+    {
+      "epoch": 0.3120488837034547,
+      "grad_norm": 1.1142481473073882,
+      "learning_rate": 9.99951070483411e-06,
+      "loss": 1.1184,
+      "step": 7890
+    },
+    {
+      "epoch": 0.3124443829223437,
+      "grad_norm": 1.0971805791686748,
+      "learning_rate": 9.99947798548348e-06,
+      "loss": 1.1523,
+      "step": 7900
+    },
+    {
+      "epoch": 0.31283988214123276,
+      "grad_norm": 1.0301905359960508,
+      "learning_rate": 9.999444207329066e-06,
+      "loss": 1.1438,
+      "step": 7910
+    },
+    {
+      "epoch": 0.3132353813601218,
+      "grad_norm": 1.0542089135117807,
+      "learning_rate": 9.999409370378018e-06,
+      "loss": 1.1531,
+      "step": 7920
+    },
+    {
+      "epoch": 0.31363088057901084,
+      "grad_norm": 1.011441178475839,
+      "learning_rate": 9.999373474637716e-06,
+      "loss": 1.1269,
+      "step": 7930
+    },
+    {
+      "epoch": 0.3140263797978999,
+      "grad_norm": 0.9702712175004335,
+      "learning_rate": 9.999336520115766e-06,
+      "loss": 1.119,
+      "step": 7940
+    },
+    {
+      "epoch": 0.3144218790167889,
+      "grad_norm": 1.0501379630933108,
+      "learning_rate": 9.999298506819988e-06,
+      "loss": 1.1334,
+      "step": 7950
+    },
+    {
+      "epoch": 0.31481737823567796,
+      "grad_norm": 1.0196367582239143,
+      "learning_rate": 9.999259434758434e-06,
+      "loss": 1.1471,
+      "step": 7960
+    },
+    {
+      "epoch": 0.31521287745456705,
+      "grad_norm": 0.9874760325436281,
+      "learning_rate": 9.999219303939382e-06,
+      "loss": 1.1425,
+      "step": 7970
+    },
+    {
+      "epoch": 0.3156083766734561,
+      "grad_norm": 1.1043265488578693,
+      "learning_rate": 9.999178114371329e-06,
+      "loss": 1.1427,
+      "step": 7980
+    },
+    {
+      "epoch": 0.31600387589234513,
+      "grad_norm": 1.0763472120369255,
+      "learning_rate": 9.999135866062997e-06,
+      "loss": 1.1484,
+      "step": 7990
+    },
+    {
+      "epoch": 0.31639937511123417,
+      "grad_norm": 1.171183073904711,
+      "learning_rate": 9.999092559023336e-06,
+      "loss": 1.1341,
+      "step": 8000
+    },
+    {
+      "epoch": 0.3167948743301232,
+      "grad_norm": 0.9672680634862828,
+      "learning_rate": 9.999048193261516e-06,
+      "loss": 1.1447,
+      "step": 8010
+    },
+    {
+      "epoch": 0.31719037354901225,
+      "grad_norm": 0.9610529414344143,
+      "learning_rate": 9.999002768786934e-06,
+      "loss": 1.1487,
+      "step": 8020
+    },
+    {
+      "epoch": 0.3175858727679013,
+      "grad_norm": 1.097602664923663,
+      "learning_rate": 9.998956285609208e-06,
+      "loss": 1.1369,
+      "step": 8030
+    },
+    {
+      "epoch": 0.31798137198679033,
+      "grad_norm": 1.0709544024146305,
+      "learning_rate": 9.998908743738184e-06,
+      "loss": 1.147,
+      "step": 8040
+    },
+    {
+      "epoch": 0.31837687120567937,
+      "grad_norm": 1.0310070238319275,
+      "learning_rate": 9.998860143183932e-06,
+      "loss": 1.1244,
+      "step": 8050
+    },
+    {
+      "epoch": 0.3187723704245684,
+      "grad_norm": 1.0136016687133358,
+      "learning_rate": 9.99881048395674e-06,
+      "loss": 1.1614,
+      "step": 8060
+    },
+    {
+      "epoch": 0.31916786964345745,
+      "grad_norm": 1.0670458566600354,
+      "learning_rate": 9.99875976606713e-06,
+      "loss": 1.1466,
+      "step": 8070
+    },
+    {
+      "epoch": 0.3195633688623465,
+      "grad_norm": 1.0068615908917462,
+      "learning_rate": 9.998707989525843e-06,
+      "loss": 1.1265,
+      "step": 8080
+    },
+    {
+      "epoch": 0.3199588680812355,
+      "grad_norm": 1.0486530760868977,
+      "learning_rate": 9.99865515434384e-06,
+      "loss": 1.1256,
+      "step": 8090
+    },
+    {
+      "epoch": 0.32035436730012457,
+      "grad_norm": 1.0650042476163608,
+      "learning_rate": 9.998601260532314e-06,
+      "loss": 1.135,
+      "step": 8100
+    },
+    {
+      "epoch": 0.3207498665190136,
+      "grad_norm": 1.0213542768808401,
+      "learning_rate": 9.998546308102678e-06,
+      "loss": 1.1319,
+      "step": 8110
+    },
+    {
+      "epoch": 0.32114536573790264,
+      "grad_norm": 1.0696396928211043,
+      "learning_rate": 9.998490297066569e-06,
+      "loss": 1.1336,
+      "step": 8120
+    },
+    {
+      "epoch": 0.3215408649567917,
+      "grad_norm": 1.0680316900358602,
+      "learning_rate": 9.998433227435852e-06,
+      "loss": 1.1304,
+      "step": 8130
+    },
+    {
+      "epoch": 0.3219363641756808,
+      "grad_norm": 0.9966427438402035,
+      "learning_rate": 9.99837509922261e-06,
+      "loss": 1.1381,
+      "step": 8140
+    },
+    {
+      "epoch": 0.3223318633945698,
+      "grad_norm": 1.0016410299457212,
+      "learning_rate": 9.998315912439156e-06,
+      "loss": 1.1589,
+      "step": 8150
+    },
+    {
+      "epoch": 0.32272736261345886,
+      "grad_norm": 1.122299645061776,
+      "learning_rate": 9.998255667098025e-06,
+      "loss": 1.1318,
+      "step": 8160
+    },
+    {
+      "epoch": 0.3231228618323479,
+      "grad_norm": 1.049021357182291,
+      "learning_rate": 9.998194363211972e-06,
+      "loss": 1.1326,
+      "step": 8170
+    },
+    {
+      "epoch": 0.32351836105123694,
+      "grad_norm": 1.0976292616812775,
+      "learning_rate": 9.998132000793986e-06,
+      "loss": 1.1203,
+      "step": 8180
+    },
+    {
+      "epoch": 0.323913860270126,
+      "grad_norm": 1.0319956044056868,
+      "learning_rate": 9.998068579857269e-06,
+      "loss": 1.159,
+      "step": 8190
+    },
+    {
+      "epoch": 0.324309359489015,
+      "grad_norm": 1.0894378415867148,
+      "learning_rate": 9.998004100415255e-06,
+      "loss": 1.1515,
+      "step": 8200
+    },
+    {
+      "epoch": 0.32470485870790405,
+      "grad_norm": 1.049393195654963,
+      "learning_rate": 9.997938562481599e-06,
+      "loss": 1.1386,
+      "step": 8210
+    },
+    {
+      "epoch": 0.3251003579267931,
+      "grad_norm": 1.0516067365416315,
+      "learning_rate": 9.99787196607018e-06,
+      "loss": 1.1388,
+      "step": 8220
+    },
+    {
+      "epoch": 0.32549585714568213,
+      "grad_norm": 1.003688864228743,
+      "learning_rate": 9.997804311195106e-06,
+      "loss": 1.1303,
+      "step": 8230
+    },
+    {
+      "epoch": 0.32589135636457117,
+      "grad_norm": 1.0103497056664168,
+      "learning_rate": 9.997735597870701e-06,
+      "loss": 1.1371,
+      "step": 8240
+    },
+    {
+      "epoch": 0.3262868555834602,
+      "grad_norm": 1.05287598804644,
+      "learning_rate": 9.997665826111518e-06,
+      "loss": 1.1368,
+      "step": 8250
+    },
+    {
+      "epoch": 0.32668235480234925,
+      "grad_norm": 1.175642449912088,
+      "learning_rate": 9.997594995932333e-06,
+      "loss": 1.1427,
+      "step": 8260
+    },
+    {
+      "epoch": 0.3270778540212383,
+      "grad_norm": 1.0735047383599023,
+      "learning_rate": 9.99752310734815e-06,
+      "loss": 1.1267,
+      "step": 8270
+    },
+    {
+      "epoch": 0.32747335324012733,
+      "grad_norm": 0.9774478877035402,
+      "learning_rate": 9.99745016037419e-06,
+      "loss": 1.1474,
+      "step": 8280
+    },
+    {
+      "epoch": 0.32786885245901637,
+      "grad_norm": 1.0373222243376572,
+      "learning_rate": 9.997376155025904e-06,
+      "loss": 1.1299,
+      "step": 8290
+    },
+    {
+      "epoch": 0.32826435167790546,
+      "grad_norm": 1.0347723390073413,
+      "learning_rate": 9.997301091318964e-06,
+      "loss": 1.13,
+      "step": 8300
+    },
+    {
+      "epoch": 0.3286598508967945,
+      "grad_norm": 1.0572562809127977,
+      "learning_rate": 9.997224969269268e-06,
+      "loss": 1.1376,
+      "step": 8310
+    },
+    {
+      "epoch": 0.32905535011568354,
+      "grad_norm": 1.0487817572388178,
+      "learning_rate": 9.997147788892936e-06,
+      "loss": 1.1183,
+      "step": 8320
+    },
+    {
+      "epoch": 0.3294508493345726,
+      "grad_norm": 0.9624167447177824,
+      "learning_rate": 9.997069550206315e-06,
+      "loss": 1.1303,
+      "step": 8330
+    },
+    {
+      "epoch": 0.3298463485534616,
+      "grad_norm": 1.117192972497449,
+      "learning_rate": 9.996990253225973e-06,
+      "loss": 1.141,
+      "step": 8340
+    },
+    {
+      "epoch": 0.33024184777235066,
+      "grad_norm": 1.0512934689987978,
+      "learning_rate": 9.996909897968705e-06,
+      "loss": 1.1164,
+      "step": 8350
+    },
+    {
+      "epoch": 0.3306373469912397,
+      "grad_norm": 1.0639151487875418,
+      "learning_rate": 9.996828484451531e-06,
+      "loss": 1.1281,
+      "step": 8360
+    },
+    {
+      "epoch": 0.33103284621012874,
+      "grad_norm": 1.0482950706258876,
+      "learning_rate": 9.996746012691687e-06,
+      "loss": 1.1245,
+      "step": 8370
+    },
+    {
+      "epoch": 0.3314283454290178,
+      "grad_norm": 0.993302669544618,
+      "learning_rate": 9.996662482706644e-06,
+      "loss": 1.1254,
+      "step": 8380
+    },
+    {
+      "epoch": 0.3318238446479068,
+      "grad_norm": 1.0470210374550084,
+      "learning_rate": 9.996577894514093e-06,
+      "loss": 1.1174,
+      "step": 8390
+    },
+    {
+      "epoch": 0.33221934386679586,
+      "grad_norm": 1.0306278610152682,
+      "learning_rate": 9.996492248131944e-06,
+      "loss": 1.1455,
+      "step": 8400
+    },
+    {
+      "epoch": 0.3326148430856849,
+      "grad_norm": 1.062753211878572,
+      "learning_rate": 9.996405543578339e-06,
+      "loss": 1.1423,
+      "step": 8410
+    },
+    {
+      "epoch": 0.33301034230457394,
+      "grad_norm": 1.0500947550400117,
+      "learning_rate": 9.996317780871638e-06,
+      "loss": 1.1468,
+      "step": 8420
+    },
+    {
+      "epoch": 0.333405841523463,
+      "grad_norm": 1.0349615376872283,
+      "learning_rate": 9.99622896003043e-06,
+      "loss": 1.124,
+      "step": 8430
+    },
+    {
+      "epoch": 0.333801340742352,
+      "grad_norm": 1.0565068248985459,
+      "learning_rate": 9.996139081073524e-06,
+      "loss": 1.1457,
+      "step": 8440
+    },
+    {
+      "epoch": 0.33419683996124105,
+      "grad_norm": 1.0076749331975892,
+      "learning_rate": 9.996048144019957e-06,
+      "loss": 1.1264,
+      "step": 8450
+    },
+    {
+      "epoch": 0.3345923391801301,
+      "grad_norm": 1.016972936264471,
+      "learning_rate": 9.995956148888983e-06,
+      "loss": 1.1515,
+      "step": 8460
+    },
+    {
+      "epoch": 0.3349878383990192,
+      "grad_norm": 0.9848720500399132,
+      "learning_rate": 9.99586309570009e-06,
+      "loss": 1.1346,
+      "step": 8470
+    },
+    {
+      "epoch": 0.33538333761790823,
+      "grad_norm": 1.0678641620210505,
+      "learning_rate": 9.995768984472985e-06,
+      "loss": 1.121,
+      "step": 8480
+    },
+    {
+      "epoch": 0.33577883683679727,
+      "grad_norm": 1.0260256688449667,
+      "learning_rate": 9.995673815227596e-06,
+      "loss": 1.1432,
+      "step": 8490
+    },
+    {
+      "epoch": 0.3361743360556863,
+      "grad_norm": 1.0527333136612878,
+      "learning_rate": 9.99557758798408e-06,
+      "loss": 1.1533,
+      "step": 8500
+    },
+    {
+      "epoch": 0.33656983527457535,
+      "grad_norm": 1.0512450400570732,
+      "learning_rate": 9.995480302762816e-06,
+      "loss": 1.1211,
+      "step": 8510
+    },
+    {
+      "epoch": 0.3369653344934644,
+      "grad_norm": 0.9824509401570435,
+      "learning_rate": 9.995381959584407e-06,
+      "loss": 1.1213,
+      "step": 8520
+    },
+    {
+      "epoch": 0.3373608337123534,
+      "grad_norm": 1.0059149965826761,
+      "learning_rate": 9.995282558469681e-06,
+      "loss": 1.1261,
+      "step": 8530
+    },
+    {
+      "epoch": 0.33775633293124246,
+      "grad_norm": 1.024361140946889,
+      "learning_rate": 9.995182099439689e-06,
+      "loss": 1.1297,
+      "step": 8540
+    },
+    {
+      "epoch": 0.3381518321501315,
+      "grad_norm": 1.0198188759211742,
+      "learning_rate": 9.995080582515707e-06,
+      "loss": 1.1395,
+      "step": 8550
+    },
+    {
+      "epoch": 0.33854733136902054,
+      "grad_norm": 1.0720153277284645,
+      "learning_rate": 9.994978007719235e-06,
+      "loss": 1.1357,
+      "step": 8560
+    },
+    {
+      "epoch": 0.3389428305879096,
+      "grad_norm": 1.095374327537891,
+      "learning_rate": 9.994874375071995e-06,
+      "loss": 1.1204,
+      "step": 8570
+    },
+    {
+      "epoch": 0.3393383298067986,
+      "grad_norm": 1.0500355357486757,
+      "learning_rate": 9.994769684595934e-06,
+      "loss": 1.1193,
+      "step": 8580
+    },
+    {
+      "epoch": 0.33973382902568766,
+      "grad_norm": 1.0550273474361438,
+      "learning_rate": 9.994663936313228e-06,
+      "loss": 1.1327,
+      "step": 8590
+    },
+    {
+      "epoch": 0.3401293282445767,
+      "grad_norm": 1.0372600544669366,
+      "learning_rate": 9.994557130246269e-06,
+      "loss": 1.1109,
+      "step": 8600
+    },
+    {
+      "epoch": 0.34052482746346574,
+      "grad_norm": 1.0063154344165048,
+      "learning_rate": 9.994449266417679e-06,
+      "loss": 1.1425,
+      "step": 8610
+    },
+    {
+      "epoch": 0.3409203266823548,
+      "grad_norm": 1.003439581413564,
+      "learning_rate": 9.994340344850297e-06,
+      "loss": 1.1263,
+      "step": 8620
+    },
+    {
+      "epoch": 0.3413158259012438,
+      "grad_norm": 1.020846517256961,
+      "learning_rate": 9.994230365567196e-06,
+      "loss": 1.1342,
+      "step": 8630
+    },
+    {
+      "epoch": 0.3417113251201329,
+      "grad_norm": 0.9552756004207231,
+      "learning_rate": 9.994119328591665e-06,
+      "loss": 1.1352,
+      "step": 8640
+    },
+    {
+      "epoch": 0.34210682433902195,
+      "grad_norm": 1.029396470406454,
+      "learning_rate": 9.99400723394722e-06,
+      "loss": 1.1294,
+      "step": 8650
+    },
+    {
+      "epoch": 0.342502323557911,
+      "grad_norm": 0.9694119140311189,
+      "learning_rate": 9.993894081657603e-06,
+      "loss": 1.134,
+      "step": 8660
+    },
+    {
+      "epoch": 0.34289782277680003,
+      "grad_norm": 0.953088343990665,
+      "learning_rate": 9.993779871746773e-06,
+      "loss": 1.1296,
+      "step": 8670
+    },
+    {
+      "epoch": 0.34329332199568907,
+      "grad_norm": 1.0865767299958913,
+      "learning_rate": 9.993664604238922e-06,
+      "loss": 1.1428,
+      "step": 8680
+    },
+    {
+      "epoch": 0.3436888212145781,
+      "grad_norm": 1.0647804472961102,
+      "learning_rate": 9.99354827915846e-06,
+      "loss": 1.129,
+      "step": 8690
+    },
+    {
+      "epoch": 0.34408432043346715,
+      "grad_norm": 1.0618258705063175,
+      "learning_rate": 9.993430896530023e-06,
+      "loss": 1.1537,
+      "step": 8700
+    },
+    {
+      "epoch": 0.3444798196523562,
+      "grad_norm": 1.1019951904190435,
+      "learning_rate": 9.993312456378473e-06,
+      "loss": 1.1248,
+      "step": 8710
+    },
+    {
+      "epoch": 0.34487531887124523,
+      "grad_norm": 0.982679017120889,
+      "learning_rate": 9.993192958728887e-06,
+      "loss": 1.1227,
+      "step": 8720
+    },
+    {
+      "epoch": 0.34527081809013427,
+      "grad_norm": 1.0440148335700177,
+      "learning_rate": 9.993072403606579e-06,
+      "loss": 1.1243,
+      "step": 8730
+    },
+    {
+      "epoch": 0.3456663173090233,
+      "grad_norm": 1.0077523853357433,
+      "learning_rate": 9.992950791037077e-06,
+      "loss": 1.1447,
+      "step": 8740
+    },
+    {
+      "epoch": 0.34606181652791235,
+      "grad_norm": 1.0414658166393587,
+      "learning_rate": 9.992828121046138e-06,
+      "loss": 1.1117,
+      "step": 8750
+    },
+    {
+      "epoch": 0.3464573157468014,
+      "grad_norm": 1.1905362292596,
+      "learning_rate": 9.992704393659742e-06,
+      "loss": 1.1233,
+      "step": 8760
+    },
+    {
+      "epoch": 0.3468528149656904,
+      "grad_norm": 1.0416872679906661,
+      "learning_rate": 9.99257960890409e-06,
+      "loss": 1.1383,
+      "step": 8770
+    },
+    {
+      "epoch": 0.34724831418457947,
+      "grad_norm": 1.1049680371902348,
+      "learning_rate": 9.992453766805613e-06,
+      "loss": 1.1306,
+      "step": 8780
+    },
+    {
+      "epoch": 0.3476438134034685,
+      "grad_norm": 1.0051653624864054,
+      "learning_rate": 9.992326867390958e-06,
+      "loss": 1.133,
+      "step": 8790
+    },
+    {
+      "epoch": 0.34803931262235754,
+      "grad_norm": 0.9983549946079809,
+      "learning_rate": 9.992198910687e-06,
+      "loss": 1.1205,
+      "step": 8800
+    },
+    {
+      "epoch": 0.34843481184124664,
+      "grad_norm": 1.010146381281008,
+      "learning_rate": 9.992069896720844e-06,
+      "loss": 1.1259,
+      "step": 8810
+    },
+    {
+      "epoch": 0.3488303110601357,
+      "grad_norm": 0.9587943420000765,
+      "learning_rate": 9.991939825519808e-06,
+      "loss": 1.1407,
+      "step": 8820
+    },
+    {
+      "epoch": 0.3492258102790247,
+      "grad_norm": 1.02595893601764,
+      "learning_rate": 9.991808697111438e-06,
+      "loss": 1.1134,
+      "step": 8830
+    },
+    {
+      "epoch": 0.34962130949791376,
+      "grad_norm": 1.134712823025914,
+      "learning_rate": 9.991676511523507e-06,
+      "loss": 1.1299,
+      "step": 8840
+    },
+    {
+      "epoch": 0.3500168087168028,
+      "grad_norm": 0.9259903359373105,
+      "learning_rate": 9.99154326878401e-06,
+      "loss": 1.1278,
+      "step": 8850
+    },
+    {
+      "epoch": 0.35041230793569184,
+      "grad_norm": 0.999047646858415,
+      "learning_rate": 9.991408968921164e-06,
+      "loss": 1.1308,
+      "step": 8860
+    },
+    {
+      "epoch": 0.3508078071545809,
+      "grad_norm": 1.0416490559257132,
+      "learning_rate": 9.991273611963413e-06,
+      "loss": 1.1095,
+      "step": 8870
+    },
+    {
+      "epoch": 0.3512033063734699,
+      "grad_norm": 1.0280117276909009,
+      "learning_rate": 9.991137197939422e-06,
+      "loss": 1.1319,
+      "step": 8880
+    },
+    {
+      "epoch": 0.35159880559235895,
+      "grad_norm": 0.9683079811229817,
+      "learning_rate": 9.990999726878082e-06,
+      "loss": 1.1366,
+      "step": 8890
+    },
+    {
+      "epoch": 0.351994304811248,
+      "grad_norm": 1.084251411653226,
+      "learning_rate": 9.990861198808505e-06,
+      "loss": 1.1248,
+      "step": 8900
+    },
+    {
+      "epoch": 0.35238980403013703,
+      "grad_norm": 1.1429364337151708,
+      "learning_rate": 9.990721613760033e-06,
+      "loss": 1.1426,
+      "step": 8910
+    },
+    {
+      "epoch": 0.35278530324902607,
+      "grad_norm": 1.0592192729220058,
+      "learning_rate": 9.990580971762222e-06,
+      "loss": 1.1104,
+      "step": 8920
+    },
+    {
+      "epoch": 0.3531808024679151,
+      "grad_norm": 1.0719208180664814,
+      "learning_rate": 9.990439272844864e-06,
+      "loss": 1.1251,
+      "step": 8930
+    },
+    {
+      "epoch": 0.35357630168680415,
+      "grad_norm": 0.9872484475798681,
+      "learning_rate": 9.990296517037965e-06,
+      "loss": 1.1137,
+      "step": 8940
+    },
+    {
+      "epoch": 0.3539718009056932,
+      "grad_norm": 1.059548749398577,
+      "learning_rate": 9.990152704371757e-06,
+      "loss": 1.1403,
+      "step": 8950
+    },
+    {
+      "epoch": 0.35436730012458223,
+      "grad_norm": 1.0636950555553508,
+      "learning_rate": 9.990007834876698e-06,
+      "loss": 1.1236,
+      "step": 8960
+    },
+    {
+      "epoch": 0.35476279934347127,
+      "grad_norm": 0.9765624645329233,
+      "learning_rate": 9.98986190858347e-06,
+      "loss": 1.1381,
+      "step": 8970
+    },
+    {
+      "epoch": 0.35515829856236036,
+      "grad_norm": 1.0347913337210208,
+      "learning_rate": 9.989714925522978e-06,
+      "loss": 1.1273,
+      "step": 8980
+    },
+    {
+      "epoch": 0.3555537977812494,
+      "grad_norm": 0.9929768089068478,
+      "learning_rate": 9.989566885726348e-06,
+      "loss": 1.1327,
+      "step": 8990
+    },
+    {
+      "epoch": 0.35594929700013844,
+      "grad_norm": 1.0786578398767044,
+      "learning_rate": 9.989417789224933e-06,
+      "loss": 1.1317,
+      "step": 9000
+    },
+    {
+      "epoch": 0.3563447962190275,
+      "grad_norm": 1.0329841930485364,
+      "learning_rate": 9.989267636050312e-06,
+      "loss": 1.1167,
+      "step": 9010
+    },
+    {
+      "epoch": 0.3567402954379165,
+      "grad_norm": 1.089220080518237,
+      "learning_rate": 9.989116426234282e-06,
+      "loss": 1.1352,
+      "step": 9020
+    },
+    {
+      "epoch": 0.35713579465680556,
+      "grad_norm": 0.9763428588628974,
+      "learning_rate": 9.988964159808868e-06,
+      "loss": 1.1391,
+      "step": 9030
+    },
+    {
+      "epoch": 0.3575312938756946,
+      "grad_norm": 1.0717155765345276,
+      "learning_rate": 9.988810836806316e-06,
+      "loss": 1.1282,
+      "step": 9040
+    },
+    {
+      "epoch": 0.35792679309458364,
+      "grad_norm": 0.9923632888709168,
+      "learning_rate": 9.988656457259098e-06,
+      "loss": 1.1256,
+      "step": 9050
+    },
+    {
+      "epoch": 0.3583222923134727,
+      "grad_norm": 1.1279892149995216,
+      "learning_rate": 9.988501021199909e-06,
+      "loss": 1.1184,
+      "step": 9060
+    },
+    {
+      "epoch": 0.3587177915323617,
+      "grad_norm": 1.1052382595200194,
+      "learning_rate": 9.98834452866167e-06,
+      "loss": 1.0957,
+      "step": 9070
+    },
+    {
+      "epoch": 0.35911329075125076,
+      "grad_norm": 1.051273066699656,
+      "learning_rate": 9.988186979677516e-06,
+      "loss": 1.1223,
+      "step": 9080
+    },
+    {
+      "epoch": 0.3595087899701398,
+      "grad_norm": 1.1603777999114528,
+      "learning_rate": 9.988028374280823e-06,
+      "loss": 1.1247,
+      "step": 9090
+    },
+    {
+      "epoch": 0.35990428918902884,
+      "grad_norm": 1.0289379139218968,
+      "learning_rate": 9.987868712505173e-06,
+      "loss": 1.1153,
+      "step": 9100
+    },
+    {
+      "epoch": 0.3602997884079179,
+      "grad_norm": 1.01444739938161,
+      "learning_rate": 9.987707994384384e-06,
+      "loss": 1.1209,
+      "step": 9110
+    },
+    {
+      "epoch": 0.3606952876268069,
+      "grad_norm": 1.0022852699404514,
+      "learning_rate": 9.987546219952493e-06,
+      "loss": 1.1272,
+      "step": 9120
+    },
+    {
+      "epoch": 0.36109078684569595,
+      "grad_norm": 1.0095822304412834,
+      "learning_rate": 9.98738338924376e-06,
+      "loss": 1.1376,
+      "step": 9130
+    },
+    {
+      "epoch": 0.36148628606458505,
+      "grad_norm": 1.0494025047808524,
+      "learning_rate": 9.987219502292669e-06,
+      "loss": 1.1121,
+      "step": 9140
+    },
+    {
+      "epoch": 0.3618817852834741,
+      "grad_norm": 1.1322077048475407,
+      "learning_rate": 9.98705455913393e-06,
+      "loss": 1.126,
+      "step": 9150
+    },
+    {
+      "epoch": 0.36227728450236313,
+      "grad_norm": 1.1236124912176868,
+      "learning_rate": 9.986888559802475e-06,
+      "loss": 1.1224,
+      "step": 9160
+    },
+    {
+      "epoch": 0.36267278372125217,
+      "grad_norm": 0.9997482962234848,
+      "learning_rate": 9.986721504333459e-06,
+      "loss": 1.1196,
+      "step": 9170
+    },
+    {
+      "epoch": 0.3630682829401412,
+      "grad_norm": 0.9925651425548624,
+      "learning_rate": 9.98655339276226e-06,
+      "loss": 1.127,
+      "step": 9180
+    },
+    {
+      "epoch": 0.36346378215903025,
+      "grad_norm": 1.077678112281237,
+      "learning_rate": 9.986384225124486e-06,
+      "loss": 1.1072,
+      "step": 9190
+    },
+    {
+      "epoch": 0.3638592813779193,
+      "grad_norm": 1.073854043618709,
+      "learning_rate": 9.98621400145596e-06,
+      "loss": 1.1167,
+      "step": 9200
+    },
+    {
+      "epoch": 0.3642547805968083,
+      "grad_norm": 1.0539842850855157,
+      "learning_rate": 9.986042721792733e-06,
+      "loss": 1.1262,
+      "step": 9210
+    },
+    {
+      "epoch": 0.36465027981569736,
+      "grad_norm": 1.1017431606474788,
+      "learning_rate": 9.985870386171079e-06,
+      "loss": 1.134,
+      "step": 9220
+    },
+    {
+      "epoch": 0.3650457790345864,
+      "grad_norm": 1.0682422190223901,
+      "learning_rate": 9.985696994627495e-06,
+      "loss": 1.1041,
+      "step": 9230
+    },
+    {
+      "epoch": 0.36544127825347544,
+      "grad_norm": 1.0477678757749243,
+      "learning_rate": 9.985522547198705e-06,
+      "loss": 1.1007,
+      "step": 9240
+    },
+    {
+      "epoch": 0.3658367774723645,
+      "grad_norm": 1.1042800116769727,
+      "learning_rate": 9.985347043921651e-06,
+      "loss": 1.1043,
+      "step": 9250
+    },
+    {
+      "epoch": 0.3662322766912535,
+      "grad_norm": 0.997968398430382,
+      "learning_rate": 9.985170484833504e-06,
+      "loss": 1.1076,
+      "step": 9260
+    },
+    {
+      "epoch": 0.36662777591014256,
+      "grad_norm": 0.977491959015956,
+      "learning_rate": 9.984992869971656e-06,
+      "loss": 1.1237,
+      "step": 9270
+    },
+    {
+      "epoch": 0.3670232751290316,
+      "grad_norm": 1.1095048264133744,
+      "learning_rate": 9.98481419937372e-06,
+      "loss": 1.1371,
+      "step": 9280
+    },
+    {
+      "epoch": 0.36741877434792064,
+      "grad_norm": 1.037416916737659,
+      "learning_rate": 9.98463447307754e-06,
+      "loss": 1.1153,
+      "step": 9290
+    },
+    {
+      "epoch": 0.3678142735668097,
+      "grad_norm": 1.0086105496331816,
+      "learning_rate": 9.984453691121174e-06,
+      "loss": 1.1204,
+      "step": 9300
+    },
+    {
+      "epoch": 0.3682097727856988,
+      "grad_norm": 1.0816176705148972,
+      "learning_rate": 9.984271853542913e-06,
+      "loss": 1.1,
+      "step": 9310
+    },
+    {
+      "epoch": 0.3686052720045878,
+      "grad_norm": 0.9572557457076023,
+      "learning_rate": 9.984088960381262e-06,
+      "loss": 1.1277,
+      "step": 9320
+    },
+    {
+      "epoch": 0.36900077122347685,
+      "grad_norm": 1.0295004448675464,
+      "learning_rate": 9.98390501167496e-06,
+      "loss": 1.1171,
+      "step": 9330
+    },
+    {
+      "epoch": 0.3693962704423659,
+      "grad_norm": 1.0427358996392067,
+      "learning_rate": 9.98372000746296e-06,
+      "loss": 1.1357,
+      "step": 9340
+    },
+    {
+      "epoch": 0.36979176966125493,
+      "grad_norm": 1.116715702244539,
+      "learning_rate": 9.983533947784445e-06,
+      "loss": 1.1302,
+      "step": 9350
+    },
+    {
+      "epoch": 0.37018726888014397,
+      "grad_norm": 1.0522704178594473,
+      "learning_rate": 9.98334683267882e-06,
+      "loss": 1.1122,
+      "step": 9360
+    },
+    {
+      "epoch": 0.370582768099033,
+      "grad_norm": 1.0634456329625646,
+      "learning_rate": 9.983158662185711e-06,
+      "loss": 1.1228,
+      "step": 9370
+    },
+    {
+      "epoch": 0.37097826731792205,
+      "grad_norm": 1.0710293850768031,
+      "learning_rate": 9.98296943634497e-06,
+      "loss": 1.136,
+      "step": 9380
+    },
+    {
+      "epoch": 0.3713737665368111,
+      "grad_norm": 1.0353189911976883,
+      "learning_rate": 9.98277915519667e-06,
+      "loss": 1.1223,
+      "step": 9390
+    },
+    {
+      "epoch": 0.37176926575570013,
+      "grad_norm": 1.0050316874217902,
+      "learning_rate": 9.982587818781111e-06,
+      "loss": 1.127,
+      "step": 9400
+    },
+    {
+      "epoch": 0.37216476497458917,
+      "grad_norm": 1.0339994239160502,
+      "learning_rate": 9.982395427138816e-06,
+      "loss": 1.1095,
+      "step": 9410
+    },
+    {
+      "epoch": 0.3725602641934782,
+      "grad_norm": 1.104042746902873,
+      "learning_rate": 9.982201980310529e-06,
+      "loss": 1.1227,
+      "step": 9420
+    },
+    {
+      "epoch": 0.37295576341236725,
+      "grad_norm": 0.9584769582046361,
+      "learning_rate": 9.982007478337216e-06,
+      "loss": 1.1271,
+      "step": 9430
+    },
+    {
+      "epoch": 0.3733512626312563,
+      "grad_norm": 0.9976877638030883,
+      "learning_rate": 9.981811921260074e-06,
+      "loss": 1.1175,
+      "step": 9440
+    },
+    {
+      "epoch": 0.3737467618501453,
+      "grad_norm": 0.9805635456300361,
+      "learning_rate": 9.981615309120516e-06,
+      "loss": 1.1071,
+      "step": 9450
+    },
+    {
+      "epoch": 0.37414226106903437,
+      "grad_norm": 1.0048154974103545,
+      "learning_rate": 9.981417641960181e-06,
+      "loss": 1.1029,
+      "step": 9460
+    },
+    {
+      "epoch": 0.3745377602879234,
+      "grad_norm": 1.0439486224751233,
+      "learning_rate": 9.981218919820932e-06,
+      "loss": 1.1237,
+      "step": 9470
+    },
+    {
+      "epoch": 0.3749332595068125,
+      "grad_norm": 1.0164029900498224,
+      "learning_rate": 9.981019142744857e-06,
+      "loss": 1.1158,
+      "step": 9480
+    },
+    {
+      "epoch": 0.37532875872570154,
+      "grad_norm": 1.0254852052252992,
+      "learning_rate": 9.980818310774261e-06,
+      "loss": 1.1131,
+      "step": 9490
+    },
+    {
+      "epoch": 0.3757242579445906,
+      "grad_norm": 1.1757298652288735,
+      "learning_rate": 9.98061642395168e-06,
+      "loss": 1.1077,
+      "step": 9500
+    },
+    {
+      "epoch": 0.3761197571634796,
+      "grad_norm": 1.106176068094394,
+      "learning_rate": 9.98041348231987e-06,
+      "loss": 1.0995,
+      "step": 9510
+    },
+    {
+      "epoch": 0.37651525638236866,
+      "grad_norm": 1.0229181142716006,
+      "learning_rate": 9.980209485921808e-06,
+      "loss": 1.0947,
+      "step": 9520
+    },
+    {
+      "epoch": 0.3769107556012577,
+      "grad_norm": 1.05081095195992,
+      "learning_rate": 9.980004434800701e-06,
+      "loss": 1.1137,
+      "step": 9530
+    },
+    {
+      "epoch": 0.37730625482014674,
+      "grad_norm": 0.9923798410443914,
+      "learning_rate": 9.979798328999972e-06,
+      "loss": 1.1011,
+      "step": 9540
+    },
+    {
+      "epoch": 0.3777017540390358,
+      "grad_norm": 0.9660535259951161,
+      "learning_rate": 9.97959116856327e-06,
+      "loss": 1.1131,
+      "step": 9550
+    },
+    {
+      "epoch": 0.3780972532579248,
+      "grad_norm": 1.0503255304124983,
+      "learning_rate": 9.979382953534473e-06,
+      "loss": 1.1121,
+      "step": 9560
+    },
+    {
+      "epoch": 0.37849275247681385,
+      "grad_norm": 1.0367437186998356,
+      "learning_rate": 9.979173683957672e-06,
+      "loss": 1.1252,
+      "step": 9570
+    },
+    {
+      "epoch": 0.3788882516957029,
+      "grad_norm": 1.0852408275647987,
+      "learning_rate": 9.97896335987719e-06,
+      "loss": 1.0986,
+      "step": 9580
+    },
+    {
+      "epoch": 0.37928375091459193,
+      "grad_norm": 1.102797553310787,
+      "learning_rate": 9.978751981337567e-06,
+      "loss": 1.1327,
+      "step": 9590
+    },
+    {
+      "epoch": 0.37967925013348097,
+      "grad_norm": 1.0368463735572258,
+      "learning_rate": 9.978539548383573e-06,
+      "loss": 1.1194,
+      "step": 9600
+    },
+    {
+      "epoch": 0.38007474935237,
+      "grad_norm": 1.014238982571862,
+      "learning_rate": 9.978326061060195e-06,
+      "loss": 1.1146,
+      "step": 9610
+    },
+    {
+      "epoch": 0.38047024857125905,
+      "grad_norm": 1.08945631058018,
+      "learning_rate": 9.978111519412648e-06,
+      "loss": 1.1038,
+      "step": 9620
+    },
+    {
+      "epoch": 0.3808657477901481,
+      "grad_norm": 1.0525206968371366,
+      "learning_rate": 9.977895923486368e-06,
+      "loss": 1.1058,
+      "step": 9630
+    },
+    {
+      "epoch": 0.38126124700903713,
+      "grad_norm": 1.0878230503104729,
+      "learning_rate": 9.97767927332701e-06,
+      "loss": 1.1123,
+      "step": 9640
+    },
+    {
+      "epoch": 0.3816567462279262,
+      "grad_norm": 1.0400449299260532,
+      "learning_rate": 9.977461568980464e-06,
+      "loss": 1.1126,
+      "step": 9650
+    },
+    {
+      "epoch": 0.38205224544681526,
+      "grad_norm": 1.0395133989568093,
+      "learning_rate": 9.977242810492832e-06,
+      "loss": 1.1055,
+      "step": 9660
+    },
+    {
+      "epoch": 0.3824477446657043,
+      "grad_norm": 0.9849476878072775,
+      "learning_rate": 9.977022997910443e-06,
+      "loss": 1.1194,
+      "step": 9670
+    },
+    {
+      "epoch": 0.38284324388459334,
+      "grad_norm": 1.0628990205483286,
+      "learning_rate": 9.97680213127985e-06,
+      "loss": 1.1225,
+      "step": 9680
+    },
+    {
+      "epoch": 0.3832387431034824,
+      "grad_norm": 1.07299013404265,
+      "learning_rate": 9.97658021064783e-06,
+      "loss": 1.1099,
+      "step": 9690
+    },
+    {
+      "epoch": 0.3836342423223714,
+      "grad_norm": 1.0547605927236203,
+      "learning_rate": 9.97635723606138e-06,
+      "loss": 1.1022,
+      "step": 9700
+    },
+    {
+      "epoch": 0.38402974154126046,
+      "grad_norm": 1.0043065922738772,
+      "learning_rate": 9.976133207567724e-06,
+      "loss": 1.1013,
+      "step": 9710
+    },
+    {
+      "epoch": 0.3844252407601495,
+      "grad_norm": 1.0295427867283096,
+      "learning_rate": 9.975908125214306e-06,
+      "loss": 1.1064,
+      "step": 9720
+    },
+    {
+      "epoch": 0.38482073997903854,
+      "grad_norm": 1.0029088222979348,
+      "learning_rate": 9.975681989048797e-06,
+      "loss": 1.1157,
+      "step": 9730
+    },
+    {
+      "epoch": 0.3852162391979276,
+      "grad_norm": 1.0216732831230682,
+      "learning_rate": 9.975454799119086e-06,
+      "loss": 1.1314,
+      "step": 9740
+    },
+    {
+      "epoch": 0.3856117384168166,
+      "grad_norm": 0.9637745317609469,
+      "learning_rate": 9.975226555473289e-06,
+      "loss": 1.1358,
+      "step": 9750
+    },
+    {
+      "epoch": 0.38600723763570566,
+      "grad_norm": 1.0179867247304883,
+      "learning_rate": 9.974997258159744e-06,
+      "loss": 1.1123,
+      "step": 9760
+    },
+    {
+      "epoch": 0.3864027368545947,
+      "grad_norm": 1.0121486615203783,
+      "learning_rate": 9.974766907227012e-06,
+      "loss": 1.1238,
+      "step": 9770
+    },
+    {
+      "epoch": 0.38679823607348374,
+      "grad_norm": 0.9918105729938603,
+      "learning_rate": 9.974535502723878e-06,
+      "loss": 1.1036,
+      "step": 9780
+    },
+    {
+      "epoch": 0.3871937352923728,
+      "grad_norm": 1.0458694498923289,
+      "learning_rate": 9.97430304469935e-06,
+      "loss": 1.1226,
+      "step": 9790
+    },
+    {
+      "epoch": 0.3875892345112618,
+      "grad_norm": 0.9793050895150185,
+      "learning_rate": 9.974069533202656e-06,
+      "loss": 1.1171,
+      "step": 9800
+    },
+    {
+      "epoch": 0.3879847337301509,
+      "grad_norm": 0.9793473334784043,
+      "learning_rate": 9.973834968283253e-06,
+      "loss": 1.1346,
+      "step": 9810
+    },
+    {
+      "epoch": 0.38838023294903995,
+      "grad_norm": 1.001509588770705,
+      "learning_rate": 9.973599349990815e-06,
+      "loss": 1.1242,
+      "step": 9820
+    },
+    {
+      "epoch": 0.388775732167929,
+      "grad_norm": 0.9710508476739187,
+      "learning_rate": 9.973362678375245e-06,
+      "loss": 1.1104,
+      "step": 9830
+    },
+    {
+      "epoch": 0.38917123138681803,
+      "grad_norm": 1.0417830548414544,
+      "learning_rate": 9.973124953486664e-06,
+      "loss": 1.1068,
+      "step": 9840
+    },
+    {
+      "epoch": 0.38956673060570707,
+      "grad_norm": 0.9162198755636622,
+      "learning_rate": 9.972886175375418e-06,
+      "loss": 1.0954,
+      "step": 9850
+    },
+    {
+      "epoch": 0.3899622298245961,
+      "grad_norm": 1.0406694536486392,
+      "learning_rate": 9.972646344092075e-06,
+      "loss": 1.1072,
+      "step": 9860
+    },
+    {
+      "epoch": 0.39035772904348515,
+      "grad_norm": 1.0711449808285272,
+      "learning_rate": 9.972405459687432e-06,
+      "loss": 1.1126,
+      "step": 9870
+    },
+    {
+      "epoch": 0.3907532282623742,
+      "grad_norm": 0.9852329077584552,
+      "learning_rate": 9.9721635222125e-06,
+      "loss": 1.0983,
+      "step": 9880
+    },
+    {
+      "epoch": 0.3911487274812632,
+      "grad_norm": 1.0693895343040385,
+      "learning_rate": 9.971920531718515e-06,
+      "loss": 1.0997,
+      "step": 9890
+    },
+    {
+      "epoch": 0.39154422670015226,
+      "grad_norm": 0.9214177095460434,
+      "learning_rate": 9.971676488256944e-06,
+      "loss": 1.1064,
+      "step": 9900
+    },
+    {
+      "epoch": 0.3919397259190413,
+      "grad_norm": 1.0337024299120587,
+      "learning_rate": 9.971431391879467e-06,
+      "loss": 1.1204,
+      "step": 9910
+    },
+    {
+      "epoch": 0.39233522513793034,
+      "grad_norm": 0.9560437759606772,
+      "learning_rate": 9.971185242637994e-06,
+      "loss": 1.1122,
+      "step": 9920
+    },
+    {
+      "epoch": 0.3927307243568194,
+      "grad_norm": 1.0551011377239838,
+      "learning_rate": 9.970938040584654e-06,
+      "loss": 1.1075,
+      "step": 9930
+    },
+    {
+      "epoch": 0.3931262235757084,
+      "grad_norm": 1.151113115049536,
+      "learning_rate": 9.970689785771798e-06,
+      "loss": 1.0967,
+      "step": 9940
+    },
+    {
+      "epoch": 0.39352172279459746,
+      "grad_norm": 1.0201224138961769,
+      "learning_rate": 9.970440478252007e-06,
+      "loss": 1.1,
+      "step": 9950
+    },
+    {
+      "epoch": 0.3939172220134865,
+      "grad_norm": 1.0795605295628778,
+      "learning_rate": 9.970190118078076e-06,
+      "loss": 1.1205,
+      "step": 9960
+    },
+    {
+      "epoch": 0.39431272123237554,
+      "grad_norm": 1.0083725236350867,
+      "learning_rate": 9.969938705303027e-06,
+      "loss": 1.1073,
+      "step": 9970
+    },
+    {
+      "epoch": 0.39470822045126464,
+      "grad_norm": 1.121994911796126,
+      "learning_rate": 9.969686239980108e-06,
+      "loss": 1.1101,
+      "step": 9980
+    },
+    {
+      "epoch": 0.3951037196701537,
+      "grad_norm": 1.063411115963048,
+      "learning_rate": 9.969432722162783e-06,
+      "loss": 1.1193,
+      "step": 9990
+    },
+    {
+      "epoch": 0.3954992188890427,
+      "grad_norm": 0.9878103434786119,
+      "learning_rate": 9.969178151904747e-06,
+      "loss": 1.1162,
+      "step": 10000
+    },
+    {
+      "epoch": 0.39589471810793175,
+      "grad_norm": 1.0938036325499716,
+      "learning_rate": 9.96892252925991e-06,
+      "loss": 1.1101,
+      "step": 10010
+    },
+    {
+      "epoch": 0.3962902173268208,
+      "grad_norm": 1.0270892809102057,
+      "learning_rate": 9.96866585428241e-06,
+      "loss": 1.1021,
+      "step": 10020
+    },
+    {
+      "epoch": 0.39668571654570983,
+      "grad_norm": 1.0218555121076212,
+      "learning_rate": 9.968408127026607e-06,
+      "loss": 1.1063,
+      "step": 10030
+    },
+    {
+      "epoch": 0.39708121576459887,
+      "grad_norm": 1.0649632847639012,
+      "learning_rate": 9.96814934754708e-06,
+      "loss": 1.0916,
+      "step": 10040
+    },
+    {
+      "epoch": 0.3974767149834879,
+      "grad_norm": 0.9610939878087368,
+      "learning_rate": 9.967889515898639e-06,
+      "loss": 1.1209,
+      "step": 10050
+    },
+    {
+      "epoch": 0.39787221420237695,
+      "grad_norm": 0.9618619533903922,
+      "learning_rate": 9.967628632136309e-06,
+      "loss": 1.0864,
+      "step": 10060
+    },
+    {
+      "epoch": 0.398267713421266,
+      "grad_norm": 1.0807077777484355,
+      "learning_rate": 9.967366696315341e-06,
+      "loss": 1.1199,
+      "step": 10070
+    },
+    {
+      "epoch": 0.39866321264015503,
+      "grad_norm": 1.0834602594062668,
+      "learning_rate": 9.967103708491208e-06,
+      "loss": 1.1092,
+      "step": 10080
+    },
+    {
+      "epoch": 0.39905871185904407,
+      "grad_norm": 1.1191855887555566,
+      "learning_rate": 9.966839668719606e-06,
+      "loss": 1.1088,
+      "step": 10090
+    },
+    {
+      "epoch": 0.3994542110779331,
+      "grad_norm": 0.9072934458458796,
+      "learning_rate": 9.966574577056456e-06,
+      "loss": 1.0996,
+      "step": 10100
+    },
+    {
+      "epoch": 0.39984971029682215,
+      "grad_norm": 1.0565222435402193,
+      "learning_rate": 9.966308433557898e-06,
+      "loss": 1.087,
+      "step": 10110
+    },
+    {
+      "epoch": 0.4002452095157112,
+      "grad_norm": 0.9801589633119278,
+      "learning_rate": 9.9660412382803e-06,
+      "loss": 1.0973,
+      "step": 10120
+    },
+    {
+      "epoch": 0.4006407087346002,
+      "grad_norm": 1.0435532438776338,
+      "learning_rate": 9.965772991280245e-06,
+      "loss": 1.1139,
+      "step": 10130
+    },
+    {
+      "epoch": 0.40103620795348927,
+      "grad_norm": 0.9504987792409991,
+      "learning_rate": 9.965503692614546e-06,
+      "loss": 1.105,
+      "step": 10140
+    },
+    {
+      "epoch": 0.40143170717237836,
+      "grad_norm": 0.9888649298002512,
+      "learning_rate": 9.965233342340234e-06,
+      "loss": 1.1218,
+      "step": 10150
+    },
+    {
+      "epoch": 0.4018272063912674,
+      "grad_norm": 1.0699139847993098,
+      "learning_rate": 9.964961940514566e-06,
+      "loss": 1.1051,
+      "step": 10160
+    },
+    {
+      "epoch": 0.40222270561015644,
+      "grad_norm": 1.0575019427475523,
+      "learning_rate": 9.964689487195018e-06,
+      "loss": 1.0961,
+      "step": 10170
+    },
+    {
+      "epoch": 0.4026182048290455,
+      "grad_norm": 1.0329371927847701,
+      "learning_rate": 9.964415982439295e-06,
+      "loss": 1.1151,
+      "step": 10180
+    },
+    {
+      "epoch": 0.4030137040479345,
+      "grad_norm": 1.071340976149401,
+      "learning_rate": 9.964141426305317e-06,
+      "loss": 1.0811,
+      "step": 10190
+    },
+    {
+      "epoch": 0.40340920326682356,
+      "grad_norm": 1.004672370789076,
+      "learning_rate": 9.96386581885123e-06,
+      "loss": 1.1283,
+      "step": 10200
+    },
+    {
+      "epoch": 0.4038047024857126,
+      "grad_norm": 1.0359033718828674,
+      "learning_rate": 9.963589160135408e-06,
+      "loss": 1.1197,
+      "step": 10210
+    },
+    {
+      "epoch": 0.40420020170460164,
+      "grad_norm": 1.0139309760656154,
+      "learning_rate": 9.963311450216436e-06,
+      "loss": 1.1142,
+      "step": 10220
+    },
+    {
+      "epoch": 0.4045957009234907,
+      "grad_norm": 0.980819296100321,
+      "learning_rate": 9.963032689153133e-06,
+      "loss": 1.1149,
+      "step": 10230
+    },
+    {
+      "epoch": 0.4049912001423797,
+      "grad_norm": 1.0669016594371714,
+      "learning_rate": 9.962752877004533e-06,
+      "loss": 1.1158,
+      "step": 10240
+    },
+    {
+      "epoch": 0.40538669936126875,
+      "grad_norm": 1.009605538851146,
+      "learning_rate": 9.962472013829897e-06,
+      "loss": 1.1037,
+      "step": 10250
+    },
+    {
+      "epoch": 0.4057821985801578,
+      "grad_norm": 1.0624790508946451,
+      "learning_rate": 9.962190099688707e-06,
+      "loss": 1.0977,
+      "step": 10260
+    },
+    {
+      "epoch": 0.40617769779904683,
+      "grad_norm": 1.027668060928655,
+      "learning_rate": 9.961907134640665e-06,
+      "loss": 1.1112,
+      "step": 10270
+    },
+    {
+      "epoch": 0.4065731970179359,
+      "grad_norm": 0.9587393153295167,
+      "learning_rate": 9.961623118745702e-06,
+      "loss": 1.0933,
+      "step": 10280
+    },
+    {
+      "epoch": 0.4069686962368249,
+      "grad_norm": 0.9798009593140417,
+      "learning_rate": 9.961338052063966e-06,
+      "loss": 1.0934,
+      "step": 10290
+    },
+    {
+      "epoch": 0.40736419545571395,
+      "grad_norm": 1.0802394118511471,
+      "learning_rate": 9.961051934655829e-06,
+      "loss": 1.0963,
+      "step": 10300
+    },
+    {
+      "epoch": 0.407759694674603,
+      "grad_norm": 1.0214608892621604,
+      "learning_rate": 9.960764766581884e-06,
+      "loss": 1.112,
+      "step": 10310
+    },
+    {
+      "epoch": 0.4081551938934921,
+      "grad_norm": 1.0436194949255686,
+      "learning_rate": 9.960476547902954e-06,
+      "loss": 1.0966,
+      "step": 10320
+    },
+    {
+      "epoch": 0.4085506931123811,
+      "grad_norm": 0.9598798853908683,
+      "learning_rate": 9.960187278680071e-06,
+      "loss": 1.1026,
+      "step": 10330
+    },
+    {
+      "epoch": 0.40894619233127016,
+      "grad_norm": 0.9534817754362086,
+      "learning_rate": 9.959896958974504e-06,
+      "loss": 1.1222,
+      "step": 10340
+    },
+    {
+      "epoch": 0.4093416915501592,
+      "grad_norm": 1.0059454448103347,
+      "learning_rate": 9.959605588847734e-06,
+      "loss": 1.11,
+      "step": 10350
+    },
+    {
+      "epoch": 0.40973719076904824,
+      "grad_norm": 1.0241136722645863,
+      "learning_rate": 9.95931316836147e-06,
+      "loss": 1.1033,
+      "step": 10360
+    },
+    {
+      "epoch": 0.4101326899879373,
+      "grad_norm": 1.0233542649657095,
+      "learning_rate": 9.959019697577639e-06,
+      "loss": 1.0957,
+      "step": 10370
+    },
+    {
+      "epoch": 0.4105281892068263,
+      "grad_norm": 1.004489393605286,
+      "learning_rate": 9.958725176558397e-06,
+      "loss": 1.1182,
+      "step": 10380
+    },
+    {
+      "epoch": 0.41092368842571536,
+      "grad_norm": 1.066089084963295,
+      "learning_rate": 9.958429605366116e-06,
+      "loss": 1.1046,
+      "step": 10390
+    },
+    {
+      "epoch": 0.4113191876446044,
+      "grad_norm": 1.0556130220433249,
+      "learning_rate": 9.958132984063391e-06,
+      "loss": 1.1104,
+      "step": 10400
+    },
+    {
+      "epoch": 0.41171468686349344,
+      "grad_norm": 1.0577388426627141,
+      "learning_rate": 9.957835312713047e-06,
+      "loss": 1.1078,
+      "step": 10410
+    },
+    {
+      "epoch": 0.4121101860823825,
+      "grad_norm": 1.0253595957999095,
+      "learning_rate": 9.95753659137812e-06,
+      "loss": 1.0947,
+      "step": 10420
+    },
+    {
+      "epoch": 0.4125056853012715,
+      "grad_norm": 1.0002491533261828,
+      "learning_rate": 9.957236820121877e-06,
+      "loss": 1.092,
+      "step": 10430
+    },
+    {
+      "epoch": 0.41290118452016056,
+      "grad_norm": 0.9888996364654783,
+      "learning_rate": 9.956935999007804e-06,
+      "loss": 1.1111,
+      "step": 10440
+    },
+    {
+      "epoch": 0.4132966837390496,
+      "grad_norm": 0.9740027516031293,
+      "learning_rate": 9.95663412809961e-06,
+      "loss": 1.1146,
+      "step": 10450
+    },
+    {
+      "epoch": 0.41369218295793864,
+      "grad_norm": 1.009827198550138,
+      "learning_rate": 9.956331207461225e-06,
+      "loss": 1.0992,
+      "step": 10460
+    },
+    {
+      "epoch": 0.4140876821768277,
+      "grad_norm": 1.0188489363577975,
+      "learning_rate": 9.956027237156802e-06,
+      "loss": 1.1024,
+      "step": 10470
+    },
+    {
+      "epoch": 0.41448318139571677,
+      "grad_norm": 1.0173021251829502,
+      "learning_rate": 9.95572221725072e-06,
+      "loss": 1.1033,
+      "step": 10480
+    },
+    {
+      "epoch": 0.4148786806146058,
+      "grad_norm": 1.0002097050284078,
+      "learning_rate": 9.955416147807575e-06,
+      "loss": 1.0835,
+      "step": 10490
+    },
+    {
+      "epoch": 0.41527417983349485,
+      "grad_norm": 1.0642686991454433,
+      "learning_rate": 9.955109028892184e-06,
+      "loss": 1.1141,
+      "step": 10500
+    },
+    {
+      "epoch": 0.4156696790523839,
+      "grad_norm": 1.1188278694377543,
+      "learning_rate": 9.954800860569596e-06,
+      "loss": 1.1086,
+      "step": 10510
+    },
+    {
+      "epoch": 0.41606517827127293,
+      "grad_norm": 1.0944385863207664,
+      "learning_rate": 9.95449164290507e-06,
+      "loss": 1.0843,
+      "step": 10520
+    },
+    {
+      "epoch": 0.41646067749016197,
+      "grad_norm": 1.0768852933614141,
+      "learning_rate": 9.954181375964097e-06,
+      "loss": 1.098,
+      "step": 10530
+    },
+    {
+      "epoch": 0.416856176709051,
+      "grad_norm": 0.9951046848585466,
+      "learning_rate": 9.953870059812382e-06,
+      "loss": 1.1012,
+      "step": 10540
+    },
+    {
+      "epoch": 0.41725167592794005,
+      "grad_norm": 0.9933143998683207,
+      "learning_rate": 9.95355769451586e-06,
+      "loss": 1.0885,
+      "step": 10550
+    },
+    {
+      "epoch": 0.4176471751468291,
+      "grad_norm": 1.0131764592022374,
+      "learning_rate": 9.953244280140684e-06,
+      "loss": 1.1033,
+      "step": 10560
+    },
+    {
+      "epoch": 0.4180426743657181,
+      "grad_norm": 1.055615563609746,
+      "learning_rate": 9.952929816753229e-06,
+      "loss": 1.098,
+      "step": 10570
+    },
+    {
+      "epoch": 0.41843817358460716,
+      "grad_norm": 1.07471858065964,
+      "learning_rate": 9.952614304420096e-06,
+      "loss": 1.1043,
+      "step": 10580
+    },
+    {
+      "epoch": 0.4188336728034962,
+      "grad_norm": 1.1261206466260603,
+      "learning_rate": 9.952297743208099e-06,
+      "loss": 1.0996,
+      "step": 10590
+    },
+    {
+      "epoch": 0.41922917202238524,
+      "grad_norm": 1.0580983550302636,
+      "learning_rate": 9.951980133184285e-06,
+      "loss": 1.0838,
+      "step": 10600
+    },
+    {
+      "epoch": 0.4196246712412743,
+      "grad_norm": 1.0264943411677727,
+      "learning_rate": 9.951661474415917e-06,
+      "loss": 1.0974,
+      "step": 10610
+    },
+    {
+      "epoch": 0.4200201704601633,
+      "grad_norm": 0.9534541948641568,
+      "learning_rate": 9.951341766970481e-06,
+      "loss": 1.1084,
+      "step": 10620
+    },
+    {
+      "epoch": 0.42041566967905236,
+      "grad_norm": 0.982430076540803,
+      "learning_rate": 9.951021010915687e-06,
+      "loss": 1.0972,
+      "step": 10630
+    },
+    {
+      "epoch": 0.4208111688979414,
+      "grad_norm": 0.9654257368809844,
+      "learning_rate": 9.950699206319465e-06,
+      "loss": 1.0962,
+      "step": 10640
+    },
+    {
+      "epoch": 0.4212066681168305,
+      "grad_norm": 1.0502245195456388,
+      "learning_rate": 9.950376353249966e-06,
+      "loss": 1.088,
+      "step": 10650
+    },
+    {
+      "epoch": 0.42160216733571954,
+      "grad_norm": 0.9216094220145762,
+      "learning_rate": 9.950052451775566e-06,
+      "loss": 1.0977,
+      "step": 10660
+    },
+    {
+      "epoch": 0.4219976665546086,
+      "grad_norm": 0.9815229444734691,
+      "learning_rate": 9.949727501964865e-06,
+      "loss": 1.1054,
+      "step": 10670
+    },
+    {
+      "epoch": 0.4223931657734976,
+      "grad_norm": 1.082794253318771,
+      "learning_rate": 9.949401503886676e-06,
+      "loss": 1.1091,
+      "step": 10680
+    },
+    {
+      "epoch": 0.42278866499238665,
+      "grad_norm": 1.0199739190386292,
+      "learning_rate": 9.949074457610044e-06,
+      "loss": 1.0938,
+      "step": 10690
+    },
+    {
+      "epoch": 0.4231841642112757,
+      "grad_norm": 1.0513838285010413,
+      "learning_rate": 9.948746363204229e-06,
+      "loss": 1.0902,
+      "step": 10700
+    },
+    {
+      "epoch": 0.42357966343016473,
+      "grad_norm": 1.0671073155623783,
+      "learning_rate": 9.948417220738718e-06,
+      "loss": 1.0938,
+      "step": 10710
+    },
+    {
+      "epoch": 0.42397516264905377,
+      "grad_norm": 1.078713729070709,
+      "learning_rate": 9.948087030283215e-06,
+      "loss": 1.0994,
+      "step": 10720
+    },
+    {
+      "epoch": 0.4243706618679428,
+      "grad_norm": 1.0682584409664808,
+      "learning_rate": 9.947755791907654e-06,
+      "loss": 1.087,
+      "step": 10730
+    },
+    {
+      "epoch": 0.42476616108683185,
+      "grad_norm": 1.0971027622201872,
+      "learning_rate": 9.947423505682178e-06,
+      "loss": 1.0782,
+      "step": 10740
+    },
+    {
+      "epoch": 0.4251616603057209,
+      "grad_norm": 1.0131832229168385,
+      "learning_rate": 9.947090171677167e-06,
+      "loss": 1.1163,
+      "step": 10750
+    },
+    {
+      "epoch": 0.42555715952460993,
+      "grad_norm": 1.008127849914707,
+      "learning_rate": 9.946755789963211e-06,
+      "loss": 1.094,
+      "step": 10760
+    },
+    {
+      "epoch": 0.42595265874349897,
+      "grad_norm": 0.9861033669460224,
+      "learning_rate": 9.94642036061113e-06,
+      "loss": 1.0984,
+      "step": 10770
+    },
+    {
+      "epoch": 0.426348157962388,
+      "grad_norm": 0.9440359467572984,
+      "learning_rate": 9.946083883691955e-06,
+      "loss": 1.1009,
+      "step": 10780
+    },
+    {
+      "epoch": 0.42674365718127705,
+      "grad_norm": 0.9137583200995897,
+      "learning_rate": 9.945746359276954e-06,
+      "loss": 1.1187,
+      "step": 10790
+    },
+    {
+      "epoch": 0.4271391564001661,
+      "grad_norm": 1.0012169737165786,
+      "learning_rate": 9.945407787437604e-06,
+      "loss": 1.1036,
+      "step": 10800
+    },
+    {
+      "epoch": 0.4275346556190551,
+      "grad_norm": 1.1283936633576026,
+      "learning_rate": 9.94506816824561e-06,
+      "loss": 1.1035,
+      "step": 10810
+    },
+    {
+      "epoch": 0.4279301548379442,
+      "grad_norm": 1.0048217417799656,
+      "learning_rate": 9.9447275017729e-06,
+      "loss": 1.0896,
+      "step": 10820
+    },
+    {
+      "epoch": 0.42832565405683326,
+      "grad_norm": 0.94677519928578,
+      "learning_rate": 9.944385788091617e-06,
+      "loss": 1.0873,
+      "step": 10830
+    },
+    {
+      "epoch": 0.4287211532757223,
+      "grad_norm": 1.0430445396126522,
+      "learning_rate": 9.944043027274133e-06,
+      "loss": 1.0828,
+      "step": 10840
+    },
+    {
+      "epoch": 0.42911665249461134,
+      "grad_norm": 0.9860688108752107,
+      "learning_rate": 9.943699219393038e-06,
+      "loss": 1.1124,
+      "step": 10850
+    },
+    {
+      "epoch": 0.4295121517135004,
+      "grad_norm": 1.050354458261774,
+      "learning_rate": 9.943354364521145e-06,
+      "loss": 1.0949,
+      "step": 10860
+    },
+    {
+      "epoch": 0.4299076509323894,
+      "grad_norm": 0.9830839591794984,
+      "learning_rate": 9.943008462731487e-06,
+      "loss": 1.1098,
+      "step": 10870
+    },
+    {
+      "epoch": 0.43030315015127846,
+      "grad_norm": 1.0779875148884694,
+      "learning_rate": 9.942661514097322e-06,
+      "loss": 1.0864,
+      "step": 10880
+    },
+    {
+      "epoch": 0.4306986493701675,
+      "grad_norm": 0.9933059459800833,
+      "learning_rate": 9.942313518692126e-06,
+      "loss": 1.1168,
+      "step": 10890
+    },
+    {
+      "epoch": 0.43109414858905654,
+      "grad_norm": 1.0597642753884047,
+      "learning_rate": 9.9419644765896e-06,
+      "loss": 1.1016,
+      "step": 10900
+    },
+    {
+      "epoch": 0.4314896478079456,
+      "grad_norm": 1.037187833468306,
+      "learning_rate": 9.941614387863666e-06,
+      "loss": 1.1081,
+      "step": 10910
+    },
+    {
+      "epoch": 0.4318851470268346,
+      "grad_norm": 1.01108747221716,
+      "learning_rate": 9.941263252588465e-06,
+      "loss": 1.1043,
+      "step": 10920
+    },
+    {
+      "epoch": 0.43228064624572365,
+      "grad_norm": 1.0019618828263877,
+      "learning_rate": 9.94091107083836e-06,
+      "loss": 1.1053,
+      "step": 10930
+    },
+    {
+      "epoch": 0.4326761454646127,
+      "grad_norm": 0.964488851707323,
+      "learning_rate": 9.94055784268794e-06,
+      "loss": 1.1211,
+      "step": 10940
+    },
+    {
+      "epoch": 0.43307164468350173,
+      "grad_norm": 1.0847389521137731,
+      "learning_rate": 9.94020356821201e-06,
+      "loss": 1.0846,
+      "step": 10950
+    },
+    {
+      "epoch": 0.4334671439023908,
+      "grad_norm": 1.0803169854020769,
+      "learning_rate": 9.939848247485603e-06,
+      "loss": 1.0989,
+      "step": 10960
+    },
+    {
+      "epoch": 0.4338626431212798,
+      "grad_norm": 1.1564151971278853,
+      "learning_rate": 9.939491880583967e-06,
+      "loss": 1.0813,
+      "step": 10970
+    },
+    {
+      "epoch": 0.43425814234016885,
+      "grad_norm": 0.9969080807018494,
+      "learning_rate": 9.939134467582574e-06,
+      "loss": 1.1055,
+      "step": 10980
+    },
+    {
+      "epoch": 0.43465364155905795,
+      "grad_norm": 0.995488236802104,
+      "learning_rate": 9.93877600855712e-06,
+      "loss": 1.1032,
+      "step": 10990
+    },
+    {
+      "epoch": 0.435049140777947,
+      "grad_norm": 1.1401382616049591,
+      "learning_rate": 9.938416503583518e-06,
+      "loss": 1.1048,
+      "step": 11000
+    },
+    {
+      "epoch": 0.435444639996836,
+      "grad_norm": 0.9830834040407321,
+      "learning_rate": 9.938055952737908e-06,
+      "loss": 1.0994,
+      "step": 11010
+    },
+    {
+      "epoch": 0.43584013921572506,
+      "grad_norm": 0.966136786648485,
+      "learning_rate": 9.937694356096646e-06,
+      "loss": 1.0939,
+      "step": 11020
+    },
+    {
+      "epoch": 0.4362356384346141,
+      "grad_norm": 1.0545500702021946,
+      "learning_rate": 9.937331713736313e-06,
+      "loss": 1.094,
+      "step": 11030
+    },
+    {
+      "epoch": 0.43663113765350314,
+      "grad_norm": 1.0354111758715492,
+      "learning_rate": 9.93696802573371e-06,
+      "loss": 1.1124,
+      "step": 11040
+    },
+    {
+      "epoch": 0.4370266368723922,
+      "grad_norm": 1.0264134414827726,
+      "learning_rate": 9.93660329216586e-06,
+      "loss": 1.1046,
+      "step": 11050
+    },
+    {
+      "epoch": 0.4374221360912812,
+      "grad_norm": 1.0169260152512325,
+      "learning_rate": 9.936237513110009e-06,
+      "loss": 1.1036,
+      "step": 11060
+    },
+    {
+      "epoch": 0.43781763531017026,
+      "grad_norm": 0.9644859700148494,
+      "learning_rate": 9.935870688643621e-06,
+      "loss": 1.1112,
+      "step": 11070
+    },
+    {
+      "epoch": 0.4382131345290593,
+      "grad_norm": 1.0797495750563293,
+      "learning_rate": 9.935502818844382e-06,
+      "loss": 1.095,
+      "step": 11080
+    },
+    {
+      "epoch": 0.43860863374794834,
+      "grad_norm": 0.9794299641147116,
+      "learning_rate": 9.935133903790204e-06,
+      "loss": 1.092,
+      "step": 11090
+    },
+    {
+      "epoch": 0.4390041329668374,
+      "grad_norm": 1.103512613125175,
+      "learning_rate": 9.934763943559213e-06,
+      "loss": 1.099,
+      "step": 11100
+    },
+    {
+      "epoch": 0.4393996321857264,
+      "grad_norm": 0.9263846713150826,
+      "learning_rate": 9.934392938229765e-06,
+      "loss": 1.1171,
+      "step": 11110
+    },
+    {
+      "epoch": 0.43979513140461546,
+      "grad_norm": 1.0244173963950725,
+      "learning_rate": 9.934020887880427e-06,
+      "loss": 1.0991,
+      "step": 11120
+    },
+    {
+      "epoch": 0.4401906306235045,
+      "grad_norm": 1.0219006301989262,
+      "learning_rate": 9.933647792589996e-06,
+      "loss": 1.1033,
+      "step": 11130
+    },
+    {
+      "epoch": 0.44058612984239354,
+      "grad_norm": 0.9673422144227102,
+      "learning_rate": 9.933273652437485e-06,
+      "loss": 1.0825,
+      "step": 11140
+    },
+    {
+      "epoch": 0.44098162906128263,
+      "grad_norm": 1.051906770079977,
+      "learning_rate": 9.932898467502135e-06,
+      "loss": 1.0713,
+      "step": 11150
+    },
+    {
+      "epoch": 0.44137712828017167,
+      "grad_norm": 1.0131891534524688,
+      "learning_rate": 9.9325222378634e-06,
+      "loss": 1.1088,
+      "step": 11160
+    },
+    {
+      "epoch": 0.4417726274990607,
+      "grad_norm": 1.0437909035026047,
+      "learning_rate": 9.932144963600959e-06,
+      "loss": 1.0825,
+      "step": 11170
+    },
+    {
+      "epoch": 0.44216812671794975,
+      "grad_norm": 1.0552136599026627,
+      "learning_rate": 9.931766644794714e-06,
+      "loss": 1.1126,
+      "step": 11180
+    },
+    {
+      "epoch": 0.4425636259368388,
+      "grad_norm": 1.0173212695449023,
+      "learning_rate": 9.931387281524785e-06,
+      "loss": 1.0967,
+      "step": 11190
+    },
+    {
+      "epoch": 0.44295912515572783,
+      "grad_norm": 1.0344426914268472,
+      "learning_rate": 9.931006873871517e-06,
+      "loss": 1.0958,
+      "step": 11200
+    },
+    {
+      "epoch": 0.44335462437461687,
+      "grad_norm": 0.9938248423989837,
+      "learning_rate": 9.930625421915469e-06,
+      "loss": 1.0883,
+      "step": 11210
+    },
+    {
+      "epoch": 0.4437501235935059,
+      "grad_norm": 1.0781933853368326,
+      "learning_rate": 9.930242925737433e-06,
+      "loss": 1.1122,
+      "step": 11220
+    },
+    {
+      "epoch": 0.44414562281239495,
+      "grad_norm": 1.009762301149643,
+      "learning_rate": 9.929859385418408e-06,
+      "loss": 1.0655,
+      "step": 11230
+    },
+    {
+      "epoch": 0.444541122031284,
+      "grad_norm": 1.0237014475868262,
+      "learning_rate": 9.929474801039625e-06,
+      "loss": 1.1111,
+      "step": 11240
+    },
+    {
+      "epoch": 0.444936621250173,
+      "grad_norm": 0.9696411371836282,
+      "learning_rate": 9.929089172682533e-06,
+      "loss": 1.0891,
+      "step": 11250
+    },
+    {
+      "epoch": 0.44533212046906206,
+      "grad_norm": 1.0896299597864119,
+      "learning_rate": 9.928702500428799e-06,
+      "loss": 1.0847,
+      "step": 11260
+    },
+    {
+      "epoch": 0.4457276196879511,
+      "grad_norm": 1.0144938906422898,
+      "learning_rate": 9.928314784360315e-06,
+      "loss": 1.0873,
+      "step": 11270
+    },
+    {
+      "epoch": 0.44612311890684014,
+      "grad_norm": 1.0216108407321423,
+      "learning_rate": 9.927926024559193e-06,
+      "loss": 1.089,
+      "step": 11280
+    },
+    {
+      "epoch": 0.4465186181257292,
+      "grad_norm": 1.0269947541047353,
+      "learning_rate": 9.927536221107766e-06,
+      "loss": 1.1025,
+      "step": 11290
+    },
+    {
+      "epoch": 0.4469141173446182,
+      "grad_norm": 0.9971150678741842,
+      "learning_rate": 9.927145374088586e-06,
+      "loss": 1.0868,
+      "step": 11300
+    },
+    {
+      "epoch": 0.44730961656350726,
+      "grad_norm": 1.011487243390694,
+      "learning_rate": 9.926753483584428e-06,
+      "loss": 1.0785,
+      "step": 11310
+    },
+    {
+      "epoch": 0.44770511578239636,
+      "grad_norm": 1.0382933157283536,
+      "learning_rate": 9.926360549678288e-06,
+      "loss": 1.0807,
+      "step": 11320
+    },
+    {
+      "epoch": 0.4481006150012854,
+      "grad_norm": 0.9880253668282665,
+      "learning_rate": 9.925966572453385e-06,
+      "loss": 1.1003,
+      "step": 11330
+    },
+    {
+      "epoch": 0.44849611422017444,
+      "grad_norm": 1.135149114105303,
+      "learning_rate": 9.925571551993155e-06,
+      "loss": 1.0837,
+      "step": 11340
+    },
+    {
+      "epoch": 0.4488916134390635,
+      "grad_norm": 0.9486772039120146,
+      "learning_rate": 9.925175488381252e-06,
+      "loss": 1.0951,
+      "step": 11350
+    },
+    {
+      "epoch": 0.4492871126579525,
+      "grad_norm": 1.028428710304506,
+      "learning_rate": 9.924778381701562e-06,
+      "loss": 1.124,
+      "step": 11360
+    },
+    {
+      "epoch": 0.44968261187684155,
+      "grad_norm": 0.8649859516919115,
+      "learning_rate": 9.924380232038184e-06,
+      "loss": 1.0794,
+      "step": 11370
+    },
+    {
+      "epoch": 0.4500781110957306,
+      "grad_norm": 1.1226433354182574,
+      "learning_rate": 9.923981039475437e-06,
+      "loss": 1.0906,
+      "step": 11380
+    },
+    {
+      "epoch": 0.45047361031461963,
+      "grad_norm": 1.0296294561193022,
+      "learning_rate": 9.923580804097865e-06,
+      "loss": 1.0967,
+      "step": 11390
+    },
+    {
+      "epoch": 0.45086910953350867,
+      "grad_norm": 0.9556360939507376,
+      "learning_rate": 9.92317952599023e-06,
+      "loss": 1.0862,
+      "step": 11400
+    },
+    {
+      "epoch": 0.4512646087523977,
+      "grad_norm": 1.0777201300745027,
+      "learning_rate": 9.922777205237516e-06,
+      "loss": 1.0724,
+      "step": 11410
+    },
+    {
+      "epoch": 0.45166010797128675,
+      "grad_norm": 1.0349762935995492,
+      "learning_rate": 9.922373841924928e-06,
+      "loss": 1.0862,
+      "step": 11420
+    },
+    {
+      "epoch": 0.4520556071901758,
+      "grad_norm": 1.0079290348637329,
+      "learning_rate": 9.92196943613789e-06,
+      "loss": 1.0961,
+      "step": 11430
+    },
+    {
+      "epoch": 0.45245110640906483,
+      "grad_norm": 1.0456911072799369,
+      "learning_rate": 9.921563987962052e-06,
+      "loss": 1.1076,
+      "step": 11440
+    },
+    {
+      "epoch": 0.45284660562795387,
+      "grad_norm": 1.0847324487057688,
+      "learning_rate": 9.921157497483278e-06,
+      "loss": 1.0978,
+      "step": 11450
+    },
+    {
+      "epoch": 0.4532421048468429,
+      "grad_norm": 1.0073979749023843,
+      "learning_rate": 9.920749964787656e-06,
+      "loss": 1.1106,
+      "step": 11460
+    },
+    {
+      "epoch": 0.45363760406573195,
+      "grad_norm": 1.0591959526202663,
+      "learning_rate": 9.920341389961495e-06,
+      "loss": 1.0938,
+      "step": 11470
+    },
+    {
+      "epoch": 0.454033103284621,
+      "grad_norm": 0.9979334924011445,
+      "learning_rate": 9.919931773091322e-06,
+      "loss": 1.0857,
+      "step": 11480
+    },
+    {
+      "epoch": 0.4544286025035101,
+      "grad_norm": 1.008374374303569,
+      "learning_rate": 9.91952111426389e-06,
+      "loss": 1.0903,
+      "step": 11490
+    },
+    {
+      "epoch": 0.4548241017223991,
+      "grad_norm": 1.0096373802309044,
+      "learning_rate": 9.919109413566168e-06,
+      "loss": 1.0946,
+      "step": 11500
+    },
+    {
+      "epoch": 0.45521960094128816,
+      "grad_norm": 1.031545265902342,
+      "learning_rate": 9.918696671085349e-06,
+      "loss": 1.0902,
+      "step": 11510
+    },
+    {
+      "epoch": 0.4556151001601772,
+      "grad_norm": 1.033713201533962,
+      "learning_rate": 9.918282886908841e-06,
+      "loss": 1.0919,
+      "step": 11520
+    },
+    {
+      "epoch": 0.45601059937906624,
+      "grad_norm": 1.0285322950368352,
+      "learning_rate": 9.917868061124279e-06,
+      "loss": 1.1101,
+      "step": 11530
+    },
+    {
+      "epoch": 0.4564060985979553,
+      "grad_norm": 1.040993749385655,
+      "learning_rate": 9.917452193819515e-06,
+      "loss": 1.085,
+      "step": 11540
+    },
+    {
+      "epoch": 0.4568015978168443,
+      "grad_norm": 1.0617214951659109,
+      "learning_rate": 9.917035285082624e-06,
+      "loss": 1.0769,
+      "step": 11550
+    },
+    {
+      "epoch": 0.45719709703573336,
+      "grad_norm": 1.0777029523976296,
+      "learning_rate": 9.916617335001899e-06,
+      "loss": 1.1079,
+      "step": 11560
+    },
+    {
+      "epoch": 0.4575925962546224,
+      "grad_norm": 1.0088378485165441,
+      "learning_rate": 9.916198343665856e-06,
+      "loss": 1.097,
+      "step": 11570
+    },
+    {
+      "epoch": 0.45798809547351144,
+      "grad_norm": 1.0665297566664302,
+      "learning_rate": 9.915778311163227e-06,
+      "loss": 1.0801,
+      "step": 11580
+    },
+    {
+      "epoch": 0.4583835946924005,
+      "grad_norm": 1.0557086385187475,
+      "learning_rate": 9.91535723758297e-06,
+      "loss": 1.0861,
+      "step": 11590
+    },
+    {
+      "epoch": 0.4587790939112895,
+      "grad_norm": 0.9819357971990172,
+      "learning_rate": 9.914935123014263e-06,
+      "loss": 1.0906,
+      "step": 11600
+    },
+    {
+      "epoch": 0.45917459313017855,
+      "grad_norm": 1.0145791284930408,
+      "learning_rate": 9.914511967546498e-06,
+      "loss": 1.092,
+      "step": 11610
+    },
+    {
+      "epoch": 0.4595700923490676,
+      "grad_norm": 1.1044019745433686,
+      "learning_rate": 9.914087771269296e-06,
+      "loss": 1.072,
+      "step": 11620
+    },
+    {
+      "epoch": 0.45996559156795663,
+      "grad_norm": 1.0902321507184303,
+      "learning_rate": 9.913662534272492e-06,
+      "loss": 1.0934,
+      "step": 11630
+    },
+    {
+      "epoch": 0.4603610907868457,
+      "grad_norm": 1.100295762026507,
+      "learning_rate": 9.913236256646145e-06,
+      "loss": 1.0978,
+      "step": 11640
+    },
+    {
+      "epoch": 0.4607565900057347,
+      "grad_norm": 1.0044819453818326,
+      "learning_rate": 9.912808938480533e-06,
+      "loss": 1.0867,
+      "step": 11650
+    },
+    {
+      "epoch": 0.4611520892246238,
+      "grad_norm": 1.0662349452488766,
+      "learning_rate": 9.912380579866157e-06,
+      "loss": 1.1033,
+      "step": 11660
+    },
+    {
+      "epoch": 0.46154758844351285,
+      "grad_norm": 1.1200683203600492,
+      "learning_rate": 9.911951180893734e-06,
+      "loss": 1.0931,
+      "step": 11670
+    },
+    {
+      "epoch": 0.4619430876624019,
+      "grad_norm": 1.050212509308587,
+      "learning_rate": 9.911520741654201e-06,
+      "loss": 1.0767,
+      "step": 11680
+    },
+    {
+      "epoch": 0.4623385868812909,
+      "grad_norm": 1.0443608142168066,
+      "learning_rate": 9.911089262238723e-06,
+      "loss": 1.087,
+      "step": 11690
+    },
+    {
+      "epoch": 0.46273408610017996,
+      "grad_norm": 0.999757773447789,
+      "learning_rate": 9.910656742738676e-06,
+      "loss": 1.0918,
+      "step": 11700
+    },
+    {
+      "epoch": 0.463129585319069,
+      "grad_norm": 0.9564949020952047,
+      "learning_rate": 9.91022318324566e-06,
+      "loss": 1.1037,
+      "step": 11710
+    },
+    {
+      "epoch": 0.46352508453795804,
+      "grad_norm": 0.9691957741842717,
+      "learning_rate": 9.909788583851498e-06,
+      "loss": 1.0968,
+      "step": 11720
+    },
+    {
+      "epoch": 0.4639205837568471,
+      "grad_norm": 0.9503580891800856,
+      "learning_rate": 9.909352944648227e-06,
+      "loss": 1.0908,
+      "step": 11730
+    },
+    {
+      "epoch": 0.4643160829757361,
+      "grad_norm": 1.0460585072246669,
+      "learning_rate": 9.908916265728113e-06,
+      "loss": 1.1174,
+      "step": 11740
+    },
+    {
+      "epoch": 0.46471158219462516,
+      "grad_norm": 1.0394186563830252,
+      "learning_rate": 9.908478547183633e-06,
+      "loss": 1.0849,
+      "step": 11750
+    },
+    {
+      "epoch": 0.4651070814135142,
+      "grad_norm": 1.036767392066307,
+      "learning_rate": 9.90803978910749e-06,
+      "loss": 1.0686,
+      "step": 11760
+    },
+    {
+      "epoch": 0.46550258063240324,
+      "grad_norm": 1.155171777528196,
+      "learning_rate": 9.907599991592605e-06,
+      "loss": 1.0764,
+      "step": 11770
+    },
+    {
+      "epoch": 0.4658980798512923,
+      "grad_norm": 1.0892668738559457,
+      "learning_rate": 9.90715915473212e-06,
+      "loss": 1.0793,
+      "step": 11780
+    },
+    {
+      "epoch": 0.4662935790701813,
+      "grad_norm": 1.0133820746448876,
+      "learning_rate": 9.906717278619397e-06,
+      "loss": 1.0721,
+      "step": 11790
+    },
+    {
+      "epoch": 0.46668907828907036,
+      "grad_norm": 1.0344580017250842,
+      "learning_rate": 9.906274363348016e-06,
+      "loss": 1.0841,
+      "step": 11800
+    },
+    {
+      "epoch": 0.4670845775079594,
+      "grad_norm": 0.9279184665449852,
+      "learning_rate": 9.905830409011781e-06,
+      "loss": 1.0851,
+      "step": 11810
+    },
+    {
+      "epoch": 0.46748007672684844,
+      "grad_norm": 1.0650989169005847,
+      "learning_rate": 9.905385415704713e-06,
+      "loss": 1.0795,
+      "step": 11820
+    },
+    {
+      "epoch": 0.46787557594573753,
+      "grad_norm": 0.932389326389645,
+      "learning_rate": 9.904939383521052e-06,
+      "loss": 1.0898,
+      "step": 11830
+    },
+    {
+      "epoch": 0.46827107516462657,
+      "grad_norm": 0.952762616303843,
+      "learning_rate": 9.904492312555266e-06,
+      "loss": 1.0927,
+      "step": 11840
+    },
+    {
+      "epoch": 0.4686665743835156,
+      "grad_norm": 1.0762257012559517,
+      "learning_rate": 9.904044202902029e-06,
+      "loss": 1.0787,
+      "step": 11850
+    },
+    {
+      "epoch": 0.46906207360240465,
+      "grad_norm": 1.0494600090264288,
+      "learning_rate": 9.903595054656247e-06,
+      "loss": 1.0916,
+      "step": 11860
+    },
+    {
+      "epoch": 0.4694575728212937,
+      "grad_norm": 0.9725846913871297,
+      "learning_rate": 9.903144867913043e-06,
+      "loss": 1.0816,
+      "step": 11870
+    },
+    {
+      "epoch": 0.46985307204018273,
+      "grad_norm": 0.9722201634090373,
+      "learning_rate": 9.902693642767757e-06,
+      "loss": 1.0777,
+      "step": 11880
+    },
+    {
+      "epoch": 0.47024857125907177,
+      "grad_norm": 1.1207495758168449,
+      "learning_rate": 9.902241379315954e-06,
+      "loss": 1.0897,
+      "step": 11890
+    },
+    {
+      "epoch": 0.4706440704779608,
+      "grad_norm": 1.0329046376010542,
+      "learning_rate": 9.901788077653408e-06,
+      "loss": 1.0881,
+      "step": 11900
+    },
+    {
+      "epoch": 0.47103956969684985,
+      "grad_norm": 0.9455616199819888,
+      "learning_rate": 9.901333737876131e-06,
+      "loss": 1.0815,
+      "step": 11910
+    },
+    {
+      "epoch": 0.4714350689157389,
+      "grad_norm": 0.9277554527421996,
+      "learning_rate": 9.900878360080335e-06,
+      "loss": 1.0928,
+      "step": 11920
+    },
+    {
+      "epoch": 0.4718305681346279,
+      "grad_norm": 1.0253345272405776,
+      "learning_rate": 9.900421944362466e-06,
+      "loss": 1.0988,
+      "step": 11930
+    },
+    {
+      "epoch": 0.47222606735351697,
+      "grad_norm": 1.1110439566703343,
+      "learning_rate": 9.899964490819186e-06,
+      "loss": 1.0859,
+      "step": 11940
+    },
+    {
+      "epoch": 0.472621566572406,
+      "grad_norm": 1.078231731471087,
+      "learning_rate": 9.899505999547371e-06,
+      "loss": 1.0912,
+      "step": 11950
+    },
+    {
+      "epoch": 0.47301706579129504,
+      "grad_norm": 0.9087399551188918,
+      "learning_rate": 9.899046470644127e-06,
+      "loss": 1.052,
+      "step": 11960
+    },
+    {
+      "epoch": 0.4734125650101841,
+      "grad_norm": 1.0049353481340764,
+      "learning_rate": 9.89858590420677e-06,
+      "loss": 1.0857,
+      "step": 11970
+    },
+    {
+      "epoch": 0.4738080642290731,
+      "grad_norm": 0.9876838875099272,
+      "learning_rate": 9.898124300332843e-06,
+      "loss": 1.0824,
+      "step": 11980
+    },
+    {
+      "epoch": 0.4742035634479622,
+      "grad_norm": 1.0678057975124664,
+      "learning_rate": 9.897661659120106e-06,
+      "loss": 1.1015,
+      "step": 11990
+    },
+    {
+      "epoch": 0.47459906266685126,
+      "grad_norm": 1.0303887789275032,
+      "learning_rate": 9.897197980666536e-06,
+      "loss": 1.0855,
+      "step": 12000
+    },
+    {
+      "epoch": 0.4749945618857403,
+      "grad_norm": 1.0113675885216502,
+      "learning_rate": 9.896733265070333e-06,
+      "loss": 1.1062,
+      "step": 12010
+    },
+    {
+      "epoch": 0.47539006110462934,
+      "grad_norm": 0.98642841545544,
+      "learning_rate": 9.896267512429915e-06,
+      "loss": 1.0927,
+      "step": 12020
+    },
+    {
+      "epoch": 0.4757855603235184,
+      "grad_norm": 0.9956984024423865,
+      "learning_rate": 9.895800722843925e-06,
+      "loss": 1.0945,
+      "step": 12030
+    },
+    {
+      "epoch": 0.4761810595424074,
+      "grad_norm": 1.0842417022917363,
+      "learning_rate": 9.895332896411217e-06,
+      "loss": 1.0978,
+      "step": 12040
+    },
+    {
+      "epoch": 0.47657655876129645,
+      "grad_norm": 0.997743812847416,
+      "learning_rate": 9.894864033230867e-06,
+      "loss": 1.0745,
+      "step": 12050
+    },
+    {
+      "epoch": 0.4769720579801855,
+      "grad_norm": 1.0134286989652361,
+      "learning_rate": 9.894394133402175e-06,
+      "loss": 1.0684,
+      "step": 12060
+    },
+    {
+      "epoch": 0.47736755719907453,
+      "grad_norm": 0.9426141371159262,
+      "learning_rate": 9.89392319702466e-06,
+      "loss": 1.0811,
+      "step": 12070
+    },
+    {
+      "epoch": 0.47776305641796357,
+      "grad_norm": 1.0219322715914516,
+      "learning_rate": 9.893451224198051e-06,
+      "loss": 1.1027,
+      "step": 12080
+    },
+    {
+      "epoch": 0.4781585556368526,
+      "grad_norm": 1.0027324177164836,
+      "learning_rate": 9.892978215022312e-06,
+      "loss": 1.0756,
+      "step": 12090
+    },
+    {
+      "epoch": 0.47855405485574165,
+      "grad_norm": 1.0740016007133908,
+      "learning_rate": 9.892504169597614e-06,
+      "loss": 1.0891,
+      "step": 12100
+    },
+    {
+      "epoch": 0.4789495540746307,
+      "grad_norm": 0.9756711772273633,
+      "learning_rate": 9.89202908802435e-06,
+      "loss": 1.0742,
+      "step": 12110
+    },
+    {
+      "epoch": 0.47934505329351973,
+      "grad_norm": 0.9688987944855754,
+      "learning_rate": 9.891552970403137e-06,
+      "loss": 1.0797,
+      "step": 12120
+    },
+    {
+      "epoch": 0.47974055251240877,
+      "grad_norm": 0.9792967068846127,
+      "learning_rate": 9.891075816834809e-06,
+      "loss": 1.0819,
+      "step": 12130
+    },
+    {
+      "epoch": 0.4801360517312978,
+      "grad_norm": 1.028872658022042,
+      "learning_rate": 9.890597627420418e-06,
+      "loss": 1.085,
+      "step": 12140
+    },
+    {
+      "epoch": 0.48053155095018685,
+      "grad_norm": 1.057487460504427,
+      "learning_rate": 9.890118402261235e-06,
+      "loss": 1.1044,
+      "step": 12150
+    },
+    {
+      "epoch": 0.48092705016907594,
+      "grad_norm": 1.007728077149704,
+      "learning_rate": 9.889638141458754e-06,
+      "loss": 1.1037,
+      "step": 12160
+    },
+    {
+      "epoch": 0.481322549387965,
+      "grad_norm": 1.0173070331403782,
+      "learning_rate": 9.889156845114685e-06,
+      "loss": 1.0912,
+      "step": 12170
+    },
+    {
+      "epoch": 0.481718048606854,
+      "grad_norm": 0.9747683657362735,
+      "learning_rate": 9.888674513330956e-06,
+      "loss": 1.079,
+      "step": 12180
+    },
+    {
+      "epoch": 0.48211354782574306,
+      "grad_norm": 0.9919873981355115,
+      "learning_rate": 9.888191146209721e-06,
+      "loss": 1.0803,
+      "step": 12190
+    },
+    {
+      "epoch": 0.4825090470446321,
+      "grad_norm": 1.0100836974102454,
+      "learning_rate": 9.887706743853347e-06,
+      "loss": 1.0923,
+      "step": 12200
+    },
+    {
+      "epoch": 0.48290454626352114,
+      "grad_norm": 1.0758189062336616,
+      "learning_rate": 9.887221306364419e-06,
+      "loss": 1.0701,
+      "step": 12210
+    },
+    {
+      "epoch": 0.4833000454824102,
+      "grad_norm": 0.9652997832170335,
+      "learning_rate": 9.88673483384575e-06,
+      "loss": 1.0888,
+      "step": 12220
+    },
+    {
+      "epoch": 0.4836955447012992,
+      "grad_norm": 1.0114980971074492,
+      "learning_rate": 9.886247326400362e-06,
+      "loss": 1.0993,
+      "step": 12230
+    },
+    {
+      "epoch": 0.48409104392018826,
+      "grad_norm": 1.0405278323372296,
+      "learning_rate": 9.885758784131503e-06,
+      "loss": 1.0752,
+      "step": 12240
+    },
+    {
+      "epoch": 0.4844865431390773,
+      "grad_norm": 1.0254542050683517,
+      "learning_rate": 9.885269207142636e-06,
+      "loss": 1.0819,
+      "step": 12250
+    },
+    {
+      "epoch": 0.48488204235796634,
+      "grad_norm": 0.9964566407468562,
+      "learning_rate": 9.884778595537448e-06,
+      "loss": 1.1045,
+      "step": 12260
+    },
+    {
+      "epoch": 0.4852775415768554,
+      "grad_norm": 1.0637435439518692,
+      "learning_rate": 9.884286949419838e-06,
+      "loss": 1.0716,
+      "step": 12270
+    },
+    {
+      "epoch": 0.4856730407957444,
+      "grad_norm": 1.031654890592826,
+      "learning_rate": 9.883794268893933e-06,
+      "loss": 1.0898,
+      "step": 12280
+    },
+    {
+      "epoch": 0.48606854001463345,
+      "grad_norm": 1.135973706220895,
+      "learning_rate": 9.883300554064072e-06,
+      "loss": 1.0955,
+      "step": 12290
+    },
+    {
+      "epoch": 0.4864640392335225,
+      "grad_norm": 1.002511164046068,
+      "learning_rate": 9.882805805034816e-06,
+      "loss": 1.0698,
+      "step": 12300
+    },
+    {
+      "epoch": 0.48685953845241153,
+      "grad_norm": 0.9653185875255408,
+      "learning_rate": 9.88231002191094e-06,
+      "loss": 1.0899,
+      "step": 12310
+    },
+    {
+      "epoch": 0.4872550376713006,
+      "grad_norm": 0.9927404665577052,
+      "learning_rate": 9.88181320479745e-06,
+      "loss": 1.1031,
+      "step": 12320
+    },
+    {
+      "epoch": 0.48765053689018967,
+      "grad_norm": 1.022571424844845,
+      "learning_rate": 9.881315353799556e-06,
+      "loss": 1.0829,
+      "step": 12330
+    },
+    {
+      "epoch": 0.4880460361090787,
+      "grad_norm": 1.0251656545020469,
+      "learning_rate": 9.880816469022701e-06,
+      "loss": 1.0628,
+      "step": 12340
+    },
+    {
+      "epoch": 0.48844153532796775,
+      "grad_norm": 0.9778051734671916,
+      "learning_rate": 9.880316550572535e-06,
+      "loss": 1.0785,
+      "step": 12350
+    },
+    {
+      "epoch": 0.4888370345468568,
+      "grad_norm": 0.9479517726765861,
+      "learning_rate": 9.879815598554934e-06,
+      "loss": 1.0724,
+      "step": 12360
+    },
+    {
+      "epoch": 0.4892325337657458,
+      "grad_norm": 1.023416506979459,
+      "learning_rate": 9.879313613075992e-06,
+      "loss": 1.0821,
+      "step": 12370
+    },
+    {
+      "epoch": 0.48962803298463486,
+      "grad_norm": 1.0252808980561536,
+      "learning_rate": 9.878810594242019e-06,
+      "loss": 1.075,
+      "step": 12380
+    },
+    {
+      "epoch": 0.4900235322035239,
+      "grad_norm": 1.0328235630158107,
+      "learning_rate": 9.878306542159548e-06,
+      "loss": 1.0987,
+      "step": 12390
+    },
+    {
+      "epoch": 0.49041903142241294,
+      "grad_norm": 0.9459660658661184,
+      "learning_rate": 9.877801456935328e-06,
+      "loss": 1.097,
+      "step": 12400
+    },
+    {
+      "epoch": 0.490814530641302,
+      "grad_norm": 1.108492146052984,
+      "learning_rate": 9.877295338676325e-06,
+      "loss": 1.0783,
+      "step": 12410
+    },
+    {
+      "epoch": 0.491210029860191,
+      "grad_norm": 1.0365459150804477,
+      "learning_rate": 9.876788187489727e-06,
+      "loss": 1.0945,
+      "step": 12420
+    },
+    {
+      "epoch": 0.49160552907908006,
+      "grad_norm": 0.960860713701472,
+      "learning_rate": 9.876280003482943e-06,
+      "loss": 1.0867,
+      "step": 12430
+    },
+    {
+      "epoch": 0.4920010282979691,
+      "grad_norm": 1.0123915589609034,
+      "learning_rate": 9.875770786763596e-06,
+      "loss": 1.0888,
+      "step": 12440
+    },
+    {
+      "epoch": 0.49239652751685814,
+      "grad_norm": 0.9646965263241251,
+      "learning_rate": 9.875260537439528e-06,
+      "loss": 1.086,
+      "step": 12450
+    },
+    {
+      "epoch": 0.4927920267357472,
+      "grad_norm": 1.0441635488790992,
+      "learning_rate": 9.874749255618803e-06,
+      "loss": 1.0804,
+      "step": 12460
+    },
+    {
+      "epoch": 0.4931875259546362,
+      "grad_norm": 1.090821433254823,
+      "learning_rate": 9.8742369414097e-06,
+      "loss": 1.0867,
+      "step": 12470
+    },
+    {
+      "epoch": 0.49358302517352526,
+      "grad_norm": 1.0191128312170485,
+      "learning_rate": 9.873723594920719e-06,
+      "loss": 1.0885,
+      "step": 12480
+    },
+    {
+      "epoch": 0.4939785243924143,
+      "grad_norm": 1.0297257642552196,
+      "learning_rate": 9.873209216260578e-06,
+      "loss": 1.0658,
+      "step": 12490
+    },
+    {
+      "epoch": 0.4943740236113034,
+      "grad_norm": 1.1144393262572139,
+      "learning_rate": 9.872693805538215e-06,
+      "loss": 1.0895,
+      "step": 12500
+    },
+    {
+      "epoch": 0.49476952283019243,
+      "grad_norm": 0.9453693123598528,
+      "learning_rate": 9.872177362862783e-06,
+      "loss": 1.0734,
+      "step": 12510
+    },
+    {
+      "epoch": 0.49516502204908147,
+      "grad_norm": 1.0353432870673445,
+      "learning_rate": 9.871659888343656e-06,
+      "loss": 1.0973,
+      "step": 12520
+    },
+    {
+      "epoch": 0.4955605212679705,
+      "grad_norm": 1.0193367526957051,
+      "learning_rate": 9.871141382090428e-06,
+      "loss": 1.0773,
+      "step": 12530
+    },
+    {
+      "epoch": 0.49595602048685955,
+      "grad_norm": 1.0252116759825207,
+      "learning_rate": 9.87062184421291e-06,
+      "loss": 1.0914,
+      "step": 12540
+    },
+    {
+      "epoch": 0.4963515197057486,
+      "grad_norm": 1.0273843758175163,
+      "learning_rate": 9.870101274821127e-06,
+      "loss": 1.0737,
+      "step": 12550
+    },
+    {
+      "epoch": 0.49674701892463763,
+      "grad_norm": 0.9522271245472749,
+      "learning_rate": 9.86957967402533e-06,
+      "loss": 1.08,
+      "step": 12560
+    },
+    {
+      "epoch": 0.49714251814352667,
+      "grad_norm": 1.0063969810516513,
+      "learning_rate": 9.869057041935985e-06,
+      "loss": 1.0712,
+      "step": 12570
+    },
+    {
+      "epoch": 0.4975380173624157,
+      "grad_norm": 1.1048900741589638,
+      "learning_rate": 9.868533378663776e-06,
+      "loss": 1.0516,
+      "step": 12580
+    },
+    {
+      "epoch": 0.49793351658130475,
+      "grad_norm": 0.9709815003695493,
+      "learning_rate": 9.868008684319607e-06,
+      "loss": 1.0819,
+      "step": 12590
+    },
+    {
+      "epoch": 0.4983290158001938,
+      "grad_norm": 1.0520641839876166,
+      "learning_rate": 9.867482959014597e-06,
+      "loss": 1.0806,
+      "step": 12600
+    },
+    {
+      "epoch": 0.4987245150190828,
+      "grad_norm": 0.9938078714241106,
+      "learning_rate": 9.866956202860088e-06,
+      "loss": 1.0595,
+      "step": 12610
+    },
+    {
+      "epoch": 0.49912001423797187,
+      "grad_norm": 1.0592966209857555,
+      "learning_rate": 9.866428415967636e-06,
+      "loss": 1.0779,
+      "step": 12620
+    },
+    {
+      "epoch": 0.4995155134568609,
+      "grad_norm": 1.0003343099466233,
+      "learning_rate": 9.865899598449018e-06,
+      "loss": 1.0775,
+      "step": 12630
+    },
+    {
+      "epoch": 0.49991101267574994,
+      "grad_norm": 1.0334223822699393,
+      "learning_rate": 9.86536975041623e-06,
+      "loss": 1.0761,
+      "step": 12640
+    },
+    {
+      "epoch": 0.500306511894639,
+      "grad_norm": 1.0234692832340981,
+      "learning_rate": 9.864838871981481e-06,
+      "loss": 1.0833,
+      "step": 12650
+    },
+    {
+      "epoch": 0.5007020111135281,
+      "grad_norm": 0.9378572649147121,
+      "learning_rate": 9.864306963257207e-06,
+      "loss": 1.0758,
+      "step": 12660
+    },
+    {
+      "epoch": 0.5010975103324171,
+      "grad_norm": 0.9725097781603558,
+      "learning_rate": 9.863774024356052e-06,
+      "loss": 1.0577,
+      "step": 12670
+    },
+    {
+      "epoch": 0.5014930095513062,
+      "grad_norm": 1.112499062561878,
+      "learning_rate": 9.863240055390886e-06,
+      "loss": 1.0846,
+      "step": 12680
+    },
+    {
+      "epoch": 0.5018885087701952,
+      "grad_norm": 1.099532695654552,
+      "learning_rate": 9.862705056474795e-06,
+      "loss": 1.0781,
+      "step": 12690
+    },
+    {
+      "epoch": 0.5022840079890842,
+      "grad_norm": 1.0372090126460214,
+      "learning_rate": 9.862169027721083e-06,
+      "loss": 1.0711,
+      "step": 12700
+    },
+    {
+      "epoch": 0.5026795072079733,
+      "grad_norm": 1.0099421435902356,
+      "learning_rate": 9.861631969243268e-06,
+      "loss": 1.0762,
+      "step": 12710
+    },
+    {
+      "epoch": 0.5030750064268623,
+      "grad_norm": 0.9886273815878905,
+      "learning_rate": 9.861093881155092e-06,
+      "loss": 1.0633,
+      "step": 12720
+    },
+    {
+      "epoch": 0.5034705056457514,
+      "grad_norm": 1.005784117163544,
+      "learning_rate": 9.860554763570516e-06,
+      "loss": 1.0711,
+      "step": 12730
+    },
+    {
+      "epoch": 0.5038660048646404,
+      "grad_norm": 1.0873888018070612,
+      "learning_rate": 9.860014616603713e-06,
+      "loss": 1.092,
+      "step": 12740
+    },
+    {
+      "epoch": 0.5042615040835294,
+      "grad_norm": 0.9515312149483481,
+      "learning_rate": 9.859473440369074e-06,
+      "loss": 1.0731,
+      "step": 12750
+    },
+    {
+      "epoch": 0.5046570033024185,
+      "grad_norm": 1.0059155401757616,
+      "learning_rate": 9.858931234981215e-06,
+      "loss": 1.0684,
+      "step": 12760
+    },
+    {
+      "epoch": 0.5050525025213075,
+      "grad_norm": 0.9716542106768279,
+      "learning_rate": 9.858388000554967e-06,
+      "loss": 1.1067,
+      "step": 12770
+    },
+    {
+      "epoch": 0.5054480017401966,
+      "grad_norm": 1.0200551515159013,
+      "learning_rate": 9.857843737205371e-06,
+      "loss": 1.0891,
+      "step": 12780
+    },
+    {
+      "epoch": 0.5058435009590856,
+      "grad_norm": 0.9934024401412571,
+      "learning_rate": 9.857298445047701e-06,
+      "loss": 1.0563,
+      "step": 12790
+    },
+    {
+      "epoch": 0.5062390001779746,
+      "grad_norm": 1.0152758677203917,
+      "learning_rate": 9.856752124197433e-06,
+      "loss": 1.0762,
+      "step": 12800
+    },
+    {
+      "epoch": 0.5066344993968637,
+      "grad_norm": 0.9018296429061226,
+      "learning_rate": 9.856204774770274e-06,
+      "loss": 1.0842,
+      "step": 12810
+    },
+    {
+      "epoch": 0.5070299986157527,
+      "grad_norm": 1.0514237704455383,
+      "learning_rate": 9.85565639688214e-06,
+      "loss": 1.0886,
+      "step": 12820
+    },
+    {
+      "epoch": 0.5074254978346417,
+      "grad_norm": 0.9925557847659466,
+      "learning_rate": 9.85510699064917e-06,
+      "loss": 1.0674,
+      "step": 12830
+    },
+    {
+      "epoch": 0.5078209970535308,
+      "grad_norm": 0.9886757906812879,
+      "learning_rate": 9.854556556187717e-06,
+      "loss": 1.1064,
+      "step": 12840
+    },
+    {
+      "epoch": 0.5082164962724198,
+      "grad_norm": 1.0201916255835866,
+      "learning_rate": 9.854005093614355e-06,
+      "loss": 1.079,
+      "step": 12850
+    },
+    {
+      "epoch": 0.5086119954913089,
+      "grad_norm": 0.9685907478835968,
+      "learning_rate": 9.853452603045876e-06,
+      "loss": 1.0853,
+      "step": 12860
+    },
+    {
+      "epoch": 0.5090074947101979,
+      "grad_norm": 0.9868830824251713,
+      "learning_rate": 9.852899084599285e-06,
+      "loss": 1.0847,
+      "step": 12870
+    },
+    {
+      "epoch": 0.509402993929087,
+      "grad_norm": 0.9849429435244901,
+      "learning_rate": 9.852344538391808e-06,
+      "loss": 1.0796,
+      "step": 12880
+    },
+    {
+      "epoch": 0.509798493147976,
+      "grad_norm": 0.9661313036071937,
+      "learning_rate": 9.851788964540888e-06,
+      "loss": 1.0683,
+      "step": 12890
+    },
+    {
+      "epoch": 0.510193992366865,
+      "grad_norm": 0.9423262812388347,
+      "learning_rate": 9.851232363164188e-06,
+      "loss": 1.0856,
+      "step": 12900
+    },
+    {
+      "epoch": 0.5105894915857541,
+      "grad_norm": 0.9254842741163509,
+      "learning_rate": 9.850674734379586e-06,
+      "loss": 1.0662,
+      "step": 12910
+    },
+    {
+      "epoch": 0.5109849908046432,
+      "grad_norm": 0.9547498054163909,
+      "learning_rate": 9.850116078305178e-06,
+      "loss": 1.0741,
+      "step": 12920
+    },
+    {
+      "epoch": 0.5113804900235323,
+      "grad_norm": 0.915368379638883,
+      "learning_rate": 9.849556395059278e-06,
+      "loss": 1.0737,
+      "step": 12930
+    },
+    {
+      "epoch": 0.5117759892424213,
+      "grad_norm": 0.9595236029594361,
+      "learning_rate": 9.848995684760416e-06,
+      "loss": 1.0722,
+      "step": 12940
+    },
+    {
+      "epoch": 0.5121714884613103,
+      "grad_norm": 0.9548736074393787,
+      "learning_rate": 9.848433947527342e-06,
+      "loss": 1.0899,
+      "step": 12950
+    },
+    {
+      "epoch": 0.5125669876801994,
+      "grad_norm": 1.0109914361919559,
+      "learning_rate": 9.847871183479024e-06,
+      "loss": 1.0788,
+      "step": 12960
+    },
+    {
+      "epoch": 0.5129624868990884,
+      "grad_norm": 1.112177288890901,
+      "learning_rate": 9.847307392734641e-06,
+      "loss": 1.0593,
+      "step": 12970
+    },
+    {
+      "epoch": 0.5133579861179774,
+      "grad_norm": 0.9738999245385244,
+      "learning_rate": 9.8467425754136e-06,
+      "loss": 1.0778,
+      "step": 12980
+    },
+    {
+      "epoch": 0.5137534853368665,
+      "grad_norm": 0.9555220462997933,
+      "learning_rate": 9.846176731635515e-06,
+      "loss": 1.0782,
+      "step": 12990
+    },
+    {
+      "epoch": 0.5141489845557555,
+      "grad_norm": 1.0841923023542257,
+      "learning_rate": 9.845609861520225e-06,
+      "loss": 1.0888,
+      "step": 13000
+    },
+    {
+      "epoch": 0.5145444837746446,
+      "grad_norm": 1.036629669285172,
+      "learning_rate": 9.84504196518778e-06,
+      "loss": 1.0719,
+      "step": 13010
+    },
+    {
+      "epoch": 0.5149399829935336,
+      "grad_norm": 1.108205866109674,
+      "learning_rate": 9.844473042758455e-06,
+      "loss": 1.0777,
+      "step": 13020
+    },
+    {
+      "epoch": 0.5153354822124226,
+      "grad_norm": 0.9357968593070068,
+      "learning_rate": 9.843903094352735e-06,
+      "loss": 1.0808,
+      "step": 13030
+    },
+    {
+      "epoch": 0.5157309814313117,
+      "grad_norm": 0.9932924897904928,
+      "learning_rate": 9.843332120091329e-06,
+      "loss": 1.0673,
+      "step": 13040
+    },
+    {
+      "epoch": 0.5161264806502007,
+      "grad_norm": 1.0597709572544896,
+      "learning_rate": 9.842760120095154e-06,
+      "loss": 1.0927,
+      "step": 13050
+    },
+    {
+      "epoch": 0.5165219798690898,
+      "grad_norm": 0.9442667849128606,
+      "learning_rate": 9.842187094485354e-06,
+      "loss": 1.0646,
+      "step": 13060
+    },
+    {
+      "epoch": 0.5169174790879788,
+      "grad_norm": 1.0092561087087946,
+      "learning_rate": 9.841613043383282e-06,
+      "loss": 1.0831,
+      "step": 13070
+    },
+    {
+      "epoch": 0.5173129783068678,
+      "grad_norm": 0.9993460583781132,
+      "learning_rate": 9.841037966910519e-06,
+      "loss": 1.0765,
+      "step": 13080
+    },
+    {
+      "epoch": 0.5177084775257569,
+      "grad_norm": 1.0109837742862735,
+      "learning_rate": 9.840461865188848e-06,
+      "loss": 1.0869,
+      "step": 13090
+    },
+    {
+      "epoch": 0.5181039767446459,
+      "grad_norm": 1.0476423284084753,
+      "learning_rate": 9.839884738340285e-06,
+      "loss": 1.0728,
+      "step": 13100
+    },
+    {
+      "epoch": 0.518499475963535,
+      "grad_norm": 1.0160294431898018,
+      "learning_rate": 9.839306586487051e-06,
+      "loss": 1.0694,
+      "step": 13110
+    },
+    {
+      "epoch": 0.518894975182424,
+      "grad_norm": 0.9614342828711414,
+      "learning_rate": 9.83872740975159e-06,
+      "loss": 1.0872,
+      "step": 13120
+    },
+    {
+      "epoch": 0.519290474401313,
+      "grad_norm": 0.9661915296610115,
+      "learning_rate": 9.83814720825656e-06,
+      "loss": 1.0563,
+      "step": 13130
+    },
+    {
+      "epoch": 0.5196859736202021,
+      "grad_norm": 0.9646793509423565,
+      "learning_rate": 9.837565982124841e-06,
+      "loss": 1.0714,
+      "step": 13140
+    },
+    {
+      "epoch": 0.5200814728390911,
+      "grad_norm": 0.9951242152490484,
+      "learning_rate": 9.836983731479526e-06,
+      "loss": 1.0566,
+      "step": 13150
+    },
+    {
+      "epoch": 0.5204769720579802,
+      "grad_norm": 0.9958068144095807,
+      "learning_rate": 9.836400456443924e-06,
+      "loss": 1.0729,
+      "step": 13160
+    },
+    {
+      "epoch": 0.5208724712768692,
+      "grad_norm": 0.9246449785521856,
+      "learning_rate": 9.835816157141563e-06,
+      "loss": 1.0686,
+      "step": 13170
+    },
+    {
+      "epoch": 0.5212679704957582,
+      "grad_norm": 1.0332848229000948,
+      "learning_rate": 9.835230833696187e-06,
+      "loss": 1.0781,
+      "step": 13180
+    },
+    {
+      "epoch": 0.5216634697146473,
+      "grad_norm": 1.0657823734331777,
+      "learning_rate": 9.834644486231761e-06,
+      "loss": 1.0671,
+      "step": 13190
+    },
+    {
+      "epoch": 0.5220589689335363,
+      "grad_norm": 0.9561827585668331,
+      "learning_rate": 9.834057114872459e-06,
+      "loss": 1.0656,
+      "step": 13200
+    },
+    {
+      "epoch": 0.5224544681524254,
+      "grad_norm": 1.0524846967020414,
+      "learning_rate": 9.83346871974268e-06,
+      "loss": 1.0744,
+      "step": 13210
+    },
+    {
+      "epoch": 0.5228499673713144,
+      "grad_norm": 1.1182652016129095,
+      "learning_rate": 9.832879300967031e-06,
+      "loss": 1.0566,
+      "step": 13220
+    },
+    {
+      "epoch": 0.5232454665902034,
+      "grad_norm": 1.0429235656233988,
+      "learning_rate": 9.832288858670348e-06,
+      "loss": 1.0677,
+      "step": 13230
+    },
+    {
+      "epoch": 0.5236409658090925,
+      "grad_norm": 1.0119110608263238,
+      "learning_rate": 9.831697392977668e-06,
+      "loss": 1.0588,
+      "step": 13240
+    },
+    {
+      "epoch": 0.5240364650279816,
+      "grad_norm": 1.077962292429758,
+      "learning_rate": 9.831104904014259e-06,
+      "loss": 1.087,
+      "step": 13250
+    },
+    {
+      "epoch": 0.5244319642468707,
+      "grad_norm": 1.0243997136978984,
+      "learning_rate": 9.8305113919056e-06,
+      "loss": 1.0952,
+      "step": 13260
+    },
+    {
+      "epoch": 0.5248274634657597,
+      "grad_norm": 1.0678484907203238,
+      "learning_rate": 9.829916856777386e-06,
+      "loss": 1.1014,
+      "step": 13270
+    },
+    {
+      "epoch": 0.5252229626846487,
+      "grad_norm": 0.9836947307676847,
+      "learning_rate": 9.829321298755527e-06,
+      "loss": 1.0888,
+      "step": 13280
+    },
+    {
+      "epoch": 0.5256184619035378,
+      "grad_norm": 0.9950540732279035,
+      "learning_rate": 9.828724717966153e-06,
+      "loss": 1.0759,
+      "step": 13290
+    },
+    {
+      "epoch": 0.5260139611224268,
+      "grad_norm": 0.9644969878304526,
+      "learning_rate": 9.828127114535613e-06,
+      "loss": 1.0767,
+      "step": 13300
+    },
+    {
+      "epoch": 0.5264094603413159,
+      "grad_norm": 1.0450309983137005,
+      "learning_rate": 9.827528488590466e-06,
+      "loss": 1.0779,
+      "step": 13310
+    },
+    {
+      "epoch": 0.5268049595602049,
+      "grad_norm": 0.9885703761695057,
+      "learning_rate": 9.82692884025749e-06,
+      "loss": 1.0786,
+      "step": 13320
+    },
+    {
+      "epoch": 0.5272004587790939,
+      "grad_norm": 1.0016227168515768,
+      "learning_rate": 9.826328169663682e-06,
+      "loss": 1.0771,
+      "step": 13330
+    },
+    {
+      "epoch": 0.527595957997983,
+      "grad_norm": 0.9706655976674615,
+      "learning_rate": 9.825726476936254e-06,
+      "loss": 1.0811,
+      "step": 13340
+    },
+    {
+      "epoch": 0.527991457216872,
+      "grad_norm": 1.0890777461097025,
+      "learning_rate": 9.825123762202633e-06,
+      "loss": 1.085,
+      "step": 13350
+    },
+    {
+      "epoch": 0.5283869564357611,
+      "grad_norm": 1.0412103121994987,
+      "learning_rate": 9.824520025590463e-06,
+      "loss": 1.068,
+      "step": 13360
+    },
+    {
+      "epoch": 0.5287824556546501,
+      "grad_norm": 1.0695103028933133,
+      "learning_rate": 9.823915267227606e-06,
+      "loss": 1.0691,
+      "step": 13370
+    },
+    {
+      "epoch": 0.5291779548735391,
+      "grad_norm": 1.0425166666925885,
+      "learning_rate": 9.823309487242141e-06,
+      "loss": 1.0719,
+      "step": 13380
+    },
+    {
+      "epoch": 0.5295734540924282,
+      "grad_norm": 1.0970921716275426,
+      "learning_rate": 9.82270268576236e-06,
+      "loss": 1.0692,
+      "step": 13390
+    },
+    {
+      "epoch": 0.5299689533113172,
+      "grad_norm": 0.9969672706890884,
+      "learning_rate": 9.822094862916774e-06,
+      "loss": 1.0768,
+      "step": 13400
+    },
+    {
+      "epoch": 0.5303644525302063,
+      "grad_norm": 1.044696734619016,
+      "learning_rate": 9.82148601883411e-06,
+      "loss": 1.083,
+      "step": 13410
+    },
+    {
+      "epoch": 0.5307599517490953,
+      "grad_norm": 1.0632580665858382,
+      "learning_rate": 9.820876153643308e-06,
+      "loss": 1.0879,
+      "step": 13420
+    },
+    {
+      "epoch": 0.5311554509679843,
+      "grad_norm": 1.0862186220045404,
+      "learning_rate": 9.82026526747353e-06,
+      "loss": 1.081,
+      "step": 13430
+    },
+    {
+      "epoch": 0.5315509501868734,
+      "grad_norm": 0.9631305119660133,
+      "learning_rate": 9.819653360454149e-06,
+      "loss": 1.0743,
+      "step": 13440
+    },
+    {
+      "epoch": 0.5319464494057624,
+      "grad_norm": 1.191609859345613,
+      "learning_rate": 9.819040432714757e-06,
+      "loss": 1.0536,
+      "step": 13450
+    },
+    {
+      "epoch": 0.5323419486246515,
+      "grad_norm": 0.9710466738269231,
+      "learning_rate": 9.818426484385164e-06,
+      "loss": 1.0804,
+      "step": 13460
+    },
+    {
+      "epoch": 0.5327374478435405,
+      "grad_norm": 1.0098095888638832,
+      "learning_rate": 9.81781151559539e-06,
+      "loss": 1.0761,
+      "step": 13470
+    },
+    {
+      "epoch": 0.5331329470624295,
+      "grad_norm": 0.9982685961266377,
+      "learning_rate": 9.817195526475677e-06,
+      "loss": 1.0819,
+      "step": 13480
+    },
+    {
+      "epoch": 0.5335284462813186,
+      "grad_norm": 0.969676179766563,
+      "learning_rate": 9.816578517156483e-06,
+      "loss": 1.08,
+      "step": 13490
+    },
+    {
+      "epoch": 0.5339239455002076,
+      "grad_norm": 0.9625389617591572,
+      "learning_rate": 9.815960487768474e-06,
+      "loss": 1.08,
+      "step": 13500
+    },
+    {
+      "epoch": 0.5343194447190966,
+      "grad_norm": 1.0139124926163963,
+      "learning_rate": 9.815341438442544e-06,
+      "loss": 1.0703,
+      "step": 13510
+    },
+    {
+      "epoch": 0.5347149439379857,
+      "grad_norm": 1.035596062944271,
+      "learning_rate": 9.814721369309794e-06,
+      "loss": 1.0792,
+      "step": 13520
+    },
+    {
+      "epoch": 0.5351104431568747,
+      "grad_norm": 1.0629783791460925,
+      "learning_rate": 9.814100280501543e-06,
+      "loss": 1.0575,
+      "step": 13530
+    },
+    {
+      "epoch": 0.5355059423757638,
+      "grad_norm": 1.0046572807335932,
+      "learning_rate": 9.813478172149331e-06,
+      "loss": 1.0795,
+      "step": 13540
+    },
+    {
+      "epoch": 0.5359014415946528,
+      "grad_norm": 1.1034935233440906,
+      "learning_rate": 9.812855044384908e-06,
+      "loss": 1.0667,
+      "step": 13550
+    },
+    {
+      "epoch": 0.5362969408135418,
+      "grad_norm": 1.1075149324824336,
+      "learning_rate": 9.812230897340241e-06,
+      "loss": 1.0759,
+      "step": 13560
+    },
+    {
+      "epoch": 0.5366924400324309,
+      "grad_norm": 0.9761437375440437,
+      "learning_rate": 9.811605731147512e-06,
+      "loss": 1.0489,
+      "step": 13570
+    },
+    {
+      "epoch": 0.5370879392513199,
+      "grad_norm": 1.0111784611012464,
+      "learning_rate": 9.810979545939124e-06,
+      "loss": 1.0718,
+      "step": 13580
+    },
+    {
+      "epoch": 0.5374834384702091,
+      "grad_norm": 1.0244542089780235,
+      "learning_rate": 9.81035234184769e-06,
+      "loss": 1.0796,
+      "step": 13590
+    },
+    {
+      "epoch": 0.5378789376890981,
+      "grad_norm": 1.0702299166427474,
+      "learning_rate": 9.809724119006043e-06,
+      "loss": 1.0683,
+      "step": 13600
+    },
+    {
+      "epoch": 0.5382744369079872,
+      "grad_norm": 1.0636495729496882,
+      "learning_rate": 9.809094877547227e-06,
+      "loss": 1.063,
+      "step": 13610
+    },
+    {
+      "epoch": 0.5386699361268762,
+      "grad_norm": 0.9735941525994887,
+      "learning_rate": 9.808464617604508e-06,
+      "loss": 1.0706,
+      "step": 13620
+    },
+    {
+      "epoch": 0.5390654353457652,
+      "grad_norm": 0.9882813540711624,
+      "learning_rate": 9.807833339311363e-06,
+      "loss": 1.0798,
+      "step": 13630
+    },
+    {
+      "epoch": 0.5394609345646543,
+      "grad_norm": 1.0743232892680614,
+      "learning_rate": 9.807201042801484e-06,
+      "loss": 1.0734,
+      "step": 13640
+    },
+    {
+      "epoch": 0.5398564337835433,
+      "grad_norm": 1.013358151680488,
+      "learning_rate": 9.806567728208782e-06,
+      "loss": 1.0613,
+      "step": 13650
+    },
+    {
+      "epoch": 0.5402519330024323,
+      "grad_norm": 1.1232389903512228,
+      "learning_rate": 9.805933395667381e-06,
+      "loss": 1.0573,
+      "step": 13660
+    },
+    {
+      "epoch": 0.5406474322213214,
+      "grad_norm": 1.0170648287379531,
+      "learning_rate": 9.805298045311626e-06,
+      "loss": 1.0754,
+      "step": 13670
+    },
+    {
+      "epoch": 0.5410429314402104,
+      "grad_norm": 0.9140112227865539,
+      "learning_rate": 9.804661677276068e-06,
+      "loss": 1.0635,
+      "step": 13680
+    },
+    {
+      "epoch": 0.5414384306590995,
+      "grad_norm": 1.0621555243875256,
+      "learning_rate": 9.804024291695482e-06,
+      "loss": 1.0771,
+      "step": 13690
+    },
+    {
+      "epoch": 0.5418339298779885,
+      "grad_norm": 1.0017512691148713,
+      "learning_rate": 9.803385888704855e-06,
+      "loss": 1.0694,
+      "step": 13700
+    },
+    {
+      "epoch": 0.5422294290968775,
+      "grad_norm": 0.9926049921328485,
+      "learning_rate": 9.802746468439389e-06,
+      "loss": 1.0632,
+      "step": 13710
+    },
+    {
+      "epoch": 0.5426249283157666,
+      "grad_norm": 1.1010638965079955,
+      "learning_rate": 9.802106031034501e-06,
+      "loss": 1.071,
+      "step": 13720
+    },
+    {
+      "epoch": 0.5430204275346556,
+      "grad_norm": 0.968280822571627,
+      "learning_rate": 9.801464576625828e-06,
+      "loss": 1.0489,
+      "step": 13730
+    },
+    {
+      "epoch": 0.5434159267535447,
+      "grad_norm": 1.0154394712132147,
+      "learning_rate": 9.800822105349218e-06,
+      "loss": 1.0821,
+      "step": 13740
+    },
+    {
+      "epoch": 0.5438114259724337,
+      "grad_norm": 0.9905098788062335,
+      "learning_rate": 9.800178617340732e-06,
+      "loss": 1.0693,
+      "step": 13750
+    },
+    {
+      "epoch": 0.5442069251913227,
+      "grad_norm": 0.9738302284129928,
+      "learning_rate": 9.799534112736654e-06,
+      "loss": 1.0412,
+      "step": 13760
+    },
+    {
+      "epoch": 0.5446024244102118,
+      "grad_norm": 1.0335390563495404,
+      "learning_rate": 9.798888591673477e-06,
+      "loss": 1.064,
+      "step": 13770
+    },
+    {
+      "epoch": 0.5449979236291008,
+      "grad_norm": 1.1729412428291819,
+      "learning_rate": 9.798242054287912e-06,
+      "loss": 1.0632,
+      "step": 13780
+    },
+    {
+      "epoch": 0.5453934228479899,
+      "grad_norm": 1.014657384320453,
+      "learning_rate": 9.797594500716885e-06,
+      "loss": 1.0575,
+      "step": 13790
+    },
+    {
+      "epoch": 0.5457889220668789,
+      "grad_norm": 0.9999494695274581,
+      "learning_rate": 9.796945931097534e-06,
+      "loss": 1.0726,
+      "step": 13800
+    },
+    {
+      "epoch": 0.5461844212857679,
+      "grad_norm": 1.0289273225918913,
+      "learning_rate": 9.796296345567218e-06,
+      "loss": 1.0642,
+      "step": 13810
+    },
+    {
+      "epoch": 0.546579920504657,
+      "grad_norm": 1.106102173833945,
+      "learning_rate": 9.795645744263508e-06,
+      "loss": 1.0418,
+      "step": 13820
+    },
+    {
+      "epoch": 0.546975419723546,
+      "grad_norm": 1.0469389940927747,
+      "learning_rate": 9.794994127324189e-06,
+      "loss": 1.0749,
+      "step": 13830
+    },
+    {
+      "epoch": 0.5473709189424351,
+      "grad_norm": 0.9827986030176245,
+      "learning_rate": 9.794341494887262e-06,
+      "loss": 1.0552,
+      "step": 13840
+    },
+    {
+      "epoch": 0.5477664181613241,
+      "grad_norm": 1.00101825826828,
+      "learning_rate": 9.793687847090946e-06,
+      "loss": 1.0657,
+      "step": 13850
+    },
+    {
+      "epoch": 0.5481619173802131,
+      "grad_norm": 1.0129658463299986,
+      "learning_rate": 9.793033184073667e-06,
+      "loss": 1.0807,
+      "step": 13860
+    },
+    {
+      "epoch": 0.5485574165991022,
+      "grad_norm": 1.082875174178682,
+      "learning_rate": 9.792377505974076e-06,
+      "loss": 1.0688,
+      "step": 13870
+    },
+    {
+      "epoch": 0.5489529158179912,
+      "grad_norm": 1.0662794223857854,
+      "learning_rate": 9.791720812931034e-06,
+      "loss": 1.0775,
+      "step": 13880
+    },
+    {
+      "epoch": 0.5493484150368803,
+      "grad_norm": 0.9168213954046683,
+      "learning_rate": 9.791063105083616e-06,
+      "loss": 1.0532,
+      "step": 13890
+    },
+    {
+      "epoch": 0.5497439142557693,
+      "grad_norm": 1.031366588327019,
+      "learning_rate": 9.790404382571112e-06,
+      "loss": 1.07,
+      "step": 13900
+    },
+    {
+      "epoch": 0.5501394134746583,
+      "grad_norm": 0.9518098858135989,
+      "learning_rate": 9.789744645533032e-06,
+      "loss": 1.0562,
+      "step": 13910
+    },
+    {
+      "epoch": 0.5505349126935475,
+      "grad_norm": 0.993824500131525,
+      "learning_rate": 9.789083894109095e-06,
+      "loss": 1.068,
+      "step": 13920
+    },
+    {
+      "epoch": 0.5509304119124365,
+      "grad_norm": 0.9409445662655898,
+      "learning_rate": 9.788422128439237e-06,
+      "loss": 1.0706,
+      "step": 13930
+    },
+    {
+      "epoch": 0.5513259111313256,
+      "grad_norm": 1.1203258334991153,
+      "learning_rate": 9.787759348663607e-06,
+      "loss": 1.0535,
+      "step": 13940
+    },
+    {
+      "epoch": 0.5517214103502146,
+      "grad_norm": 0.9611409075486226,
+      "learning_rate": 9.787095554922573e-06,
+      "loss": 1.053,
+      "step": 13950
+    },
+    {
+      "epoch": 0.5521169095691036,
+      "grad_norm": 1.1071554899345504,
+      "learning_rate": 9.786430747356713e-06,
+      "loss": 1.0686,
+      "step": 13960
+    },
+    {
+      "epoch": 0.5525124087879927,
+      "grad_norm": 1.0078986838759314,
+      "learning_rate": 9.785764926106822e-06,
+      "loss": 1.0673,
+      "step": 13970
+    },
+    {
+      "epoch": 0.5529079080068817,
+      "grad_norm": 0.9674594540100612,
+      "learning_rate": 9.785098091313911e-06,
+      "loss": 1.0575,
+      "step": 13980
+    },
+    {
+      "epoch": 0.5533034072257708,
+      "grad_norm": 1.0299207339728886,
+      "learning_rate": 9.784430243119204e-06,
+      "loss": 1.0674,
+      "step": 13990
+    },
+    {
+      "epoch": 0.5536989064446598,
+      "grad_norm": 0.978476623511374,
+      "learning_rate": 9.783761381664138e-06,
+      "loss": 1.0602,
+      "step": 14000
+    },
+    {
+      "epoch": 0.5540944056635488,
+      "grad_norm": 0.9691692440883812,
+      "learning_rate": 9.78309150709037e-06,
+      "loss": 1.08,
+      "step": 14010
+    },
+    {
+      "epoch": 0.5544899048824379,
+      "grad_norm": 1.0221520315677104,
+      "learning_rate": 9.782420619539763e-06,
+      "loss": 1.0598,
+      "step": 14020
+    },
+    {
+      "epoch": 0.5548854041013269,
+      "grad_norm": 0.9986080154606959,
+      "learning_rate": 9.781748719154404e-06,
+      "loss": 1.0658,
+      "step": 14030
+    },
+    {
+      "epoch": 0.555280903320216,
+      "grad_norm": 1.1064300422894056,
+      "learning_rate": 9.781075806076587e-06,
+      "loss": 1.0517,
+      "step": 14040
+    },
+    {
+      "epoch": 0.555676402539105,
+      "grad_norm": 0.9499336876699358,
+      "learning_rate": 9.780401880448825e-06,
+      "loss": 1.0456,
+      "step": 14050
+    },
+    {
+      "epoch": 0.556071901757994,
+      "grad_norm": 0.9624555898317938,
+      "learning_rate": 9.779726942413844e-06,
+      "loss": 1.0625,
+      "step": 14060
+    },
+    {
+      "epoch": 0.5564674009768831,
+      "grad_norm": 0.9436090058305142,
+      "learning_rate": 9.779050992114583e-06,
+      "loss": 1.0696,
+      "step": 14070
+    },
+    {
+      "epoch": 0.5568629001957721,
+      "grad_norm": 0.9584763365172448,
+      "learning_rate": 9.778374029694197e-06,
+      "loss": 1.0516,
+      "step": 14080
+    },
+    {
+      "epoch": 0.5572583994146612,
+      "grad_norm": 0.964007302187305,
+      "learning_rate": 9.777696055296058e-06,
+      "loss": 1.0744,
+      "step": 14090
+    },
+    {
+      "epoch": 0.5576538986335502,
+      "grad_norm": 0.9322258697955476,
+      "learning_rate": 9.777017069063744e-06,
+      "loss": 1.0612,
+      "step": 14100
+    },
+    {
+      "epoch": 0.5580493978524392,
+      "grad_norm": 1.0084551723697883,
+      "learning_rate": 9.776337071141058e-06,
+      "loss": 1.0582,
+      "step": 14110
+    },
+    {
+      "epoch": 0.5584448970713283,
+      "grad_norm": 1.065298363827714,
+      "learning_rate": 9.775656061672008e-06,
+      "loss": 1.0799,
+      "step": 14120
+    },
+    {
+      "epoch": 0.5588403962902173,
+      "grad_norm": 1.0439123906506067,
+      "learning_rate": 9.774974040800822e-06,
+      "loss": 1.0602,
+      "step": 14130
+    },
+    {
+      "epoch": 0.5592358955091064,
+      "grad_norm": 1.0885594656958613,
+      "learning_rate": 9.77429100867194e-06,
+      "loss": 1.0682,
+      "step": 14140
+    },
+    {
+      "epoch": 0.5596313947279954,
+      "grad_norm": 1.0707397259869602,
+      "learning_rate": 9.773606965430015e-06,
+      "loss": 1.0678,
+      "step": 14150
+    },
+    {
+      "epoch": 0.5600268939468844,
+      "grad_norm": 1.044888408792444,
+      "learning_rate": 9.772921911219918e-06,
+      "loss": 1.069,
+      "step": 14160
+    },
+    {
+      "epoch": 0.5604223931657735,
+      "grad_norm": 0.9941824802702044,
+      "learning_rate": 9.772235846186731e-06,
+      "loss": 1.065,
+      "step": 14170
+    },
+    {
+      "epoch": 0.5608178923846625,
+      "grad_norm": 1.0578392031701096,
+      "learning_rate": 9.77154877047575e-06,
+      "loss": 1.0436,
+      "step": 14180
+    },
+    {
+      "epoch": 0.5612133916035515,
+      "grad_norm": 1.0463928978967134,
+      "learning_rate": 9.770860684232489e-06,
+      "loss": 1.0538,
+      "step": 14190
+    },
+    {
+      "epoch": 0.5616088908224406,
+      "grad_norm": 1.0104383891446775,
+      "learning_rate": 9.770171587602667e-06,
+      "loss": 1.0683,
+      "step": 14200
+    },
+    {
+      "epoch": 0.5620043900413296,
+      "grad_norm": 0.9270930435001443,
+      "learning_rate": 9.769481480732228e-06,
+      "loss": 1.059,
+      "step": 14210
+    },
+    {
+      "epoch": 0.5623998892602187,
+      "grad_norm": 1.0064178890569941,
+      "learning_rate": 9.768790363767321e-06,
+      "loss": 1.0541,
+      "step": 14220
+    },
+    {
+      "epoch": 0.5627953884791077,
+      "grad_norm": 0.950426170647676,
+      "learning_rate": 9.768098236854317e-06,
+      "loss": 1.0895,
+      "step": 14230
+    },
+    {
+      "epoch": 0.5631908876979967,
+      "grad_norm": 1.1040013044632542,
+      "learning_rate": 9.767405100139795e-06,
+      "loss": 1.0723,
+      "step": 14240
+    },
+    {
+      "epoch": 0.5635863869168858,
+      "grad_norm": 1.1705841153079175,
+      "learning_rate": 9.766710953770547e-06,
+      "loss": 1.0472,
+      "step": 14250
+    },
+    {
+      "epoch": 0.5639818861357749,
+      "grad_norm": 0.9712752067057575,
+      "learning_rate": 9.766015797893585e-06,
+      "loss": 1.0745,
+      "step": 14260
+    },
+    {
+      "epoch": 0.564377385354664,
+      "grad_norm": 0.9569803366536023,
+      "learning_rate": 9.765319632656127e-06,
+      "loss": 1.0667,
+      "step": 14270
+    },
+    {
+      "epoch": 0.564772884573553,
+      "grad_norm": 1.0538959133057306,
+      "learning_rate": 9.764622458205613e-06,
+      "loss": 1.0803,
+      "step": 14280
+    },
+    {
+      "epoch": 0.565168383792442,
+      "grad_norm": 1.0279622216414128,
+      "learning_rate": 9.76392427468969e-06,
+      "loss": 1.0537,
+      "step": 14290
+    },
+    {
+      "epoch": 0.5655638830113311,
+      "grad_norm": 1.1047931819810026,
+      "learning_rate": 9.763225082256222e-06,
+      "loss": 1.0642,
+      "step": 14300
+    },
+    {
+      "epoch": 0.5659593822302201,
+      "grad_norm": 1.094562900905085,
+      "learning_rate": 9.762524881053286e-06,
+      "loss": 1.0645,
+      "step": 14310
+    },
+    {
+      "epoch": 0.5663548814491092,
+      "grad_norm": 1.2120506375073645,
+      "learning_rate": 9.761823671229174e-06,
+      "loss": 1.0855,
+      "step": 14320
+    },
+    {
+      "epoch": 0.5667503806679982,
+      "grad_norm": 1.0465353063688134,
+      "learning_rate": 9.761121452932388e-06,
+      "loss": 1.0594,
+      "step": 14330
+    },
+    {
+      "epoch": 0.5671458798868872,
+      "grad_norm": 1.0059219324897901,
+      "learning_rate": 9.760418226311645e-06,
+      "loss": 1.0612,
+      "step": 14340
+    },
+    {
+      "epoch": 0.5675413791057763,
+      "grad_norm": 1.0816828350475822,
+      "learning_rate": 9.75971399151588e-06,
+      "loss": 1.0629,
+      "step": 14350
+    },
+    {
+      "epoch": 0.5679368783246653,
+      "grad_norm": 1.0336399381857253,
+      "learning_rate": 9.759008748694236e-06,
+      "loss": 1.047,
+      "step": 14360
+    },
+    {
+      "epoch": 0.5683323775435544,
+      "grad_norm": 0.9435486631404181,
+      "learning_rate": 9.75830249799607e-06,
+      "loss": 1.0451,
+      "step": 14370
+    },
+    {
+      "epoch": 0.5687278767624434,
+      "grad_norm": 1.0058353522521832,
+      "learning_rate": 9.757595239570956e-06,
+      "loss": 1.0663,
+      "step": 14380
+    },
+    {
+      "epoch": 0.5691233759813324,
+      "grad_norm": 1.0917494701626327,
+      "learning_rate": 9.756886973568678e-06,
+      "loss": 1.0628,
+      "step": 14390
+    },
+    {
+      "epoch": 0.5695188752002215,
+      "grad_norm": 0.9750835519172512,
+      "learning_rate": 9.756177700139235e-06,
+      "loss": 1.0648,
+      "step": 14400
+    },
+    {
+      "epoch": 0.5699143744191105,
+      "grad_norm": 0.979520248566433,
+      "learning_rate": 9.75546741943284e-06,
+      "loss": 1.0469,
+      "step": 14410
+    },
+    {
+      "epoch": 0.5703098736379996,
+      "grad_norm": 1.065799273200916,
+      "learning_rate": 9.754756131599916e-06,
+      "loss": 1.0796,
+      "step": 14420
+    },
+    {
+      "epoch": 0.5707053728568886,
+      "grad_norm": 1.1191241359780404,
+      "learning_rate": 9.754043836791102e-06,
+      "loss": 1.0693,
+      "step": 14430
+    },
+    {
+      "epoch": 0.5711008720757776,
+      "grad_norm": 1.1901337483605807,
+      "learning_rate": 9.753330535157251e-06,
+      "loss": 1.0558,
+      "step": 14440
+    },
+    {
+      "epoch": 0.5714963712946667,
+      "grad_norm": 1.0271929833606757,
+      "learning_rate": 9.752616226849429e-06,
+      "loss": 1.0498,
+      "step": 14450
+    },
+    {
+      "epoch": 0.5718918705135557,
+      "grad_norm": 0.9220748471813301,
+      "learning_rate": 9.751900912018911e-06,
+      "loss": 1.076,
+      "step": 14460
+    },
+    {
+      "epoch": 0.5722873697324448,
+      "grad_norm": 1.0016342467326624,
+      "learning_rate": 9.75118459081719e-06,
+      "loss": 1.081,
+      "step": 14470
+    },
+    {
+      "epoch": 0.5726828689513338,
+      "grad_norm": 0.9945342400947232,
+      "learning_rate": 9.750467263395973e-06,
+      "loss": 1.0581,
+      "step": 14480
+    },
+    {
+      "epoch": 0.5730783681702228,
+      "grad_norm": 1.0776017816556556,
+      "learning_rate": 9.749748929907175e-06,
+      "loss": 1.0552,
+      "step": 14490
+    },
+    {
+      "epoch": 0.5734738673891119,
+      "grad_norm": 1.0570637214839262,
+      "learning_rate": 9.749029590502926e-06,
+      "loss": 1.0491,
+      "step": 14500
+    },
+    {
+      "epoch": 0.5738693666080009,
+      "grad_norm": 1.048554258133626,
+      "learning_rate": 9.748309245335572e-06,
+      "loss": 1.0664,
+      "step": 14510
+    },
+    {
+      "epoch": 0.57426486582689,
+      "grad_norm": 1.0277972902139945,
+      "learning_rate": 9.747587894557668e-06,
+      "loss": 1.0731,
+      "step": 14520
+    },
+    {
+      "epoch": 0.574660365045779,
+      "grad_norm": 1.0466018342543084,
+      "learning_rate": 9.746865538321985e-06,
+      "loss": 1.0622,
+      "step": 14530
+    },
+    {
+      "epoch": 0.575055864264668,
+      "grad_norm": 1.0700970016881668,
+      "learning_rate": 9.746142176781505e-06,
+      "loss": 1.0761,
+      "step": 14540
+    },
+    {
+      "epoch": 0.5754513634835571,
+      "grad_norm": 1.0542186557957431,
+      "learning_rate": 9.745417810089424e-06,
+      "loss": 1.0603,
+      "step": 14550
+    },
+    {
+      "epoch": 0.5758468627024461,
+      "grad_norm": 1.0097740192347697,
+      "learning_rate": 9.74469243839915e-06,
+      "loss": 1.0478,
+      "step": 14560
+    },
+    {
+      "epoch": 0.5762423619213352,
+      "grad_norm": 1.0658781968262367,
+      "learning_rate": 9.743966061864305e-06,
+      "loss": 1.0635,
+      "step": 14570
+    },
+    {
+      "epoch": 0.5766378611402242,
+      "grad_norm": 1.04669305300436,
+      "learning_rate": 9.743238680638723e-06,
+      "loss": 1.0496,
+      "step": 14580
+    },
+    {
+      "epoch": 0.5770333603591133,
+      "grad_norm": 1.0859311684312374,
+      "learning_rate": 9.742510294876448e-06,
+      "loss": 1.0651,
+      "step": 14590
+    },
+    {
+      "epoch": 0.5774288595780024,
+      "grad_norm": 1.0365819330251662,
+      "learning_rate": 9.741780904731745e-06,
+      "loss": 1.0693,
+      "step": 14600
+    },
+    {
+      "epoch": 0.5778243587968914,
+      "grad_norm": 1.0669536495448684,
+      "learning_rate": 9.741050510359083e-06,
+      "loss": 1.0691,
+      "step": 14610
+    },
+    {
+      "epoch": 0.5782198580157805,
+      "grad_norm": 1.0259895326595243,
+      "learning_rate": 9.740319111913147e-06,
+      "loss": 1.0801,
+      "step": 14620
+    },
+    {
+      "epoch": 0.5786153572346695,
+      "grad_norm": 0.9263003103008343,
+      "learning_rate": 9.739586709548833e-06,
+      "loss": 1.0552,
+      "step": 14630
+    },
+    {
+      "epoch": 0.5790108564535585,
+      "grad_norm": 1.0363470442134415,
+      "learning_rate": 9.738853303421256e-06,
+      "loss": 1.0681,
+      "step": 14640
+    },
+    {
+      "epoch": 0.5794063556724476,
+      "grad_norm": 0.9658290966503398,
+      "learning_rate": 9.738118893685737e-06,
+      "loss": 1.062,
+      "step": 14650
+    },
+    {
+      "epoch": 0.5798018548913366,
+      "grad_norm": 1.0533008924353124,
+      "learning_rate": 9.73738348049781e-06,
+      "loss": 1.0508,
+      "step": 14660
+    },
+    {
+      "epoch": 0.5801973541102257,
+      "grad_norm": 0.9851834646191139,
+      "learning_rate": 9.736647064013222e-06,
+      "loss": 1.0586,
+      "step": 14670
+    },
+    {
+      "epoch": 0.5805928533291147,
+      "grad_norm": 1.009778085247017,
+      "learning_rate": 9.735909644387935e-06,
+      "loss": 1.0615,
+      "step": 14680
+    },
+    {
+      "epoch": 0.5809883525480037,
+      "grad_norm": 1.0037987651638043,
+      "learning_rate": 9.735171221778124e-06,
+      "loss": 1.0615,
+      "step": 14690
+    },
+    {
+      "epoch": 0.5813838517668928,
+      "grad_norm": 1.0372469593333695,
+      "learning_rate": 9.73443179634017e-06,
+      "loss": 1.082,
+      "step": 14700
+    },
+    {
+      "epoch": 0.5817793509857818,
+      "grad_norm": 1.0512479846633207,
+      "learning_rate": 9.733691368230674e-06,
+      "loss": 1.0425,
+      "step": 14710
+    },
+    {
+      "epoch": 0.5821748502046709,
+      "grad_norm": 1.0047544364879402,
+      "learning_rate": 9.732949937606443e-06,
+      "loss": 1.0559,
+      "step": 14720
+    },
+    {
+      "epoch": 0.5825703494235599,
+      "grad_norm": 1.059647319417386,
+      "learning_rate": 9.732207504624502e-06,
+      "loss": 1.0492,
+      "step": 14730
+    },
+    {
+      "epoch": 0.5829658486424489,
+      "grad_norm": 1.0417221530055403,
+      "learning_rate": 9.731464069442085e-06,
+      "loss": 1.0594,
+      "step": 14740
+    },
+    {
+      "epoch": 0.583361347861338,
+      "grad_norm": 1.0665569096635807,
+      "learning_rate": 9.730719632216639e-06,
+      "loss": 1.0523,
+      "step": 14750
+    },
+    {
+      "epoch": 0.583756847080227,
+      "grad_norm": 1.0608520584836654,
+      "learning_rate": 9.729974193105821e-06,
+      "loss": 1.0702,
+      "step": 14760
+    },
+    {
+      "epoch": 0.584152346299116,
+      "grad_norm": 0.994228321768493,
+      "learning_rate": 9.729227752267505e-06,
+      "loss": 1.0664,
+      "step": 14770
+    },
+    {
+      "epoch": 0.5845478455180051,
+      "grad_norm": 1.0294958669837573,
+      "learning_rate": 9.728480309859772e-06,
+      "loss": 1.0598,
+      "step": 14780
+    },
+    {
+      "epoch": 0.5849433447368941,
+      "grad_norm": 1.0246580273768906,
+      "learning_rate": 9.727731866040919e-06,
+      "loss": 1.0885,
+      "step": 14790
+    },
+    {
+      "epoch": 0.5853388439557832,
+      "grad_norm": 1.000945043460481,
+      "learning_rate": 9.726982420969453e-06,
+      "loss": 1.0444,
+      "step": 14800
+    },
+    {
+      "epoch": 0.5857343431746722,
+      "grad_norm": 1.048116849601965,
+      "learning_rate": 9.726231974804095e-06,
+      "loss": 1.0578,
+      "step": 14810
+    },
+    {
+      "epoch": 0.5861298423935613,
+      "grad_norm": 0.9969029486415593,
+      "learning_rate": 9.725480527703775e-06,
+      "loss": 1.0746,
+      "step": 14820
+    },
+    {
+      "epoch": 0.5865253416124503,
+      "grad_norm": 1.1077102508394456,
+      "learning_rate": 9.724728079827636e-06,
+      "loss": 1.0643,
+      "step": 14830
+    },
+    {
+      "epoch": 0.5869208408313393,
+      "grad_norm": 0.9665468573186794,
+      "learning_rate": 9.723974631335036e-06,
+      "loss": 1.0538,
+      "step": 14840
+    },
+    {
+      "epoch": 0.5873163400502284,
+      "grad_norm": 1.0920165976368272,
+      "learning_rate": 9.72322018238554e-06,
+      "loss": 1.0557,
+      "step": 14850
+    },
+    {
+      "epoch": 0.5877118392691174,
+      "grad_norm": 1.0406138810349053,
+      "learning_rate": 9.722464733138929e-06,
+      "loss": 1.0533,
+      "step": 14860
+    },
+    {
+      "epoch": 0.5881073384880064,
+      "grad_norm": 0.985438850282144,
+      "learning_rate": 9.721708283755193e-06,
+      "loss": 1.0551,
+      "step": 14870
+    },
+    {
+      "epoch": 0.5885028377068955,
+      "grad_norm": 0.942142456971354,
+      "learning_rate": 9.720950834394535e-06,
+      "loss": 1.0613,
+      "step": 14880
+    },
+    {
+      "epoch": 0.5888983369257845,
+      "grad_norm": 1.0650758372360574,
+      "learning_rate": 9.720192385217373e-06,
+      "loss": 1.0735,
+      "step": 14890
+    },
+    {
+      "epoch": 0.5892938361446736,
+      "grad_norm": 0.9986753318280494,
+      "learning_rate": 9.71943293638433e-06,
+      "loss": 1.057,
+      "step": 14900
+    },
+    {
+      "epoch": 0.5896893353635626,
+      "grad_norm": 0.9695326569791033,
+      "learning_rate": 9.718672488056245e-06,
+      "loss": 1.0681,
+      "step": 14910
+    },
+    {
+      "epoch": 0.5900848345824516,
+      "grad_norm": 0.981208355486021,
+      "learning_rate": 9.717911040394168e-06,
+      "loss": 1.0614,
+      "step": 14920
+    },
+    {
+      "epoch": 0.5904803338013408,
+      "grad_norm": 1.0830526413000467,
+      "learning_rate": 9.717148593559361e-06,
+      "loss": 1.0409,
+      "step": 14930
+    },
+    {
+      "epoch": 0.5908758330202298,
+      "grad_norm": 1.030443542194117,
+      "learning_rate": 9.716385147713296e-06,
+      "loss": 1.0527,
+      "step": 14940
+    },
+    {
+      "epoch": 0.5912713322391189,
+      "grad_norm": 0.9855361413370963,
+      "learning_rate": 9.715620703017662e-06,
+      "loss": 1.0646,
+      "step": 14950
+    },
+    {
+      "epoch": 0.5916668314580079,
+      "grad_norm": 0.9036056328509712,
+      "learning_rate": 9.714855259634348e-06,
+      "loss": 1.0544,
+      "step": 14960
+    },
+    {
+      "epoch": 0.592062330676897,
+      "grad_norm": 1.1764351894929956,
+      "learning_rate": 9.71408881772547e-06,
+      "loss": 1.0483,
+      "step": 14970
+    },
+    {
+      "epoch": 0.592457829895786,
+      "grad_norm": 1.0102986546707016,
+      "learning_rate": 9.71332137745334e-06,
+      "loss": 1.0631,
+      "step": 14980
+    },
+    {
+      "epoch": 0.592853329114675,
+      "grad_norm": 0.977979001140588,
+      "learning_rate": 9.71255293898049e-06,
+      "loss": 1.0519,
+      "step": 14990
+    },
+    {
+      "epoch": 0.5932488283335641,
+      "grad_norm": 0.9904635712879952,
+      "learning_rate": 9.711783502469667e-06,
+      "loss": 1.057,
+      "step": 15000
+    },
+    {
+      "epoch": 0.5936443275524531,
+      "grad_norm": 1.0094491684538063,
+      "learning_rate": 9.711013068083822e-06,
+      "loss": 1.0558,
+      "step": 15010
+    },
+    {
+      "epoch": 0.5940398267713421,
+      "grad_norm": 0.9579777214000441,
+      "learning_rate": 9.710241635986118e-06,
+      "loss": 1.0507,
+      "step": 15020
+    },
+    {
+      "epoch": 0.5944353259902312,
+      "grad_norm": 1.026933614967618,
+      "learning_rate": 9.70946920633993e-06,
+      "loss": 1.0542,
+      "step": 15030
+    },
+    {
+      "epoch": 0.5948308252091202,
+      "grad_norm": 1.0228934255346838,
+      "learning_rate": 9.70869577930885e-06,
+      "loss": 1.0458,
+      "step": 15040
+    },
+    {
+      "epoch": 0.5952263244280093,
+      "grad_norm": 1.0487346580375194,
+      "learning_rate": 9.707921355056674e-06,
+      "loss": 1.0539,
+      "step": 15050
+    },
+    {
+      "epoch": 0.5956218236468983,
+      "grad_norm": 0.9507838990940268,
+      "learning_rate": 9.707145933747412e-06,
+      "loss": 1.0519,
+      "step": 15060
+    },
+    {
+      "epoch": 0.5960173228657873,
+      "grad_norm": 1.0200610160859969,
+      "learning_rate": 9.706369515545285e-06,
+      "loss": 1.036,
+      "step": 15070
+    },
+    {
+      "epoch": 0.5964128220846764,
+      "grad_norm": 0.9810991452812137,
+      "learning_rate": 9.705592100614724e-06,
+      "loss": 1.0584,
+      "step": 15080
+    },
+    {
+      "epoch": 0.5968083213035654,
+      "grad_norm": 1.0084529514505371,
+      "learning_rate": 9.704813689120374e-06,
+      "loss": 1.0555,
+      "step": 15090
+    },
+    {
+      "epoch": 0.5972038205224545,
+      "grad_norm": 1.232769552998101,
+      "learning_rate": 9.704034281227089e-06,
+      "loss": 1.0543,
+      "step": 15100
+    },
+    {
+      "epoch": 0.5975993197413435,
+      "grad_norm": 1.0423027039004151,
+      "learning_rate": 9.703253877099932e-06,
+      "loss": 1.0576,
+      "step": 15110
+    },
+    {
+      "epoch": 0.5979948189602325,
+      "grad_norm": 1.071786517794014,
+      "learning_rate": 9.702472476904183e-06,
+      "loss": 1.0414,
+      "step": 15120
+    },
+    {
+      "epoch": 0.5983903181791216,
+      "grad_norm": 0.9894882961863555,
+      "learning_rate": 9.701690080805325e-06,
+      "loss": 1.0532,
+      "step": 15130
+    },
+    {
+      "epoch": 0.5987858173980106,
+      "grad_norm": 1.0409108316873947,
+      "learning_rate": 9.70090668896906e-06,
+      "loss": 1.0551,
+      "step": 15140
+    },
+    {
+      "epoch": 0.5991813166168997,
+      "grad_norm": 0.9896086036179519,
+      "learning_rate": 9.700122301561294e-06,
+      "loss": 1.0563,
+      "step": 15150
+    },
+    {
+      "epoch": 0.5995768158357887,
+      "grad_norm": 1.0695496378107894,
+      "learning_rate": 9.699336918748148e-06,
+      "loss": 1.0412,
+      "step": 15160
+    },
+    {
+      "epoch": 0.5999723150546777,
+      "grad_norm": 1.0684644238315584,
+      "learning_rate": 9.698550540695952e-06,
+      "loss": 1.0342,
+      "step": 15170
+    },
+    {
+      "epoch": 0.6003678142735668,
+      "grad_norm": 0.9932752202379643,
+      "learning_rate": 9.69776316757125e-06,
+      "loss": 1.044,
+      "step": 15180
+    },
+    {
+      "epoch": 0.6007633134924558,
+      "grad_norm": 0.9335087742220949,
+      "learning_rate": 9.69697479954079e-06,
+      "loss": 1.065,
+      "step": 15190
+    },
+    {
+      "epoch": 0.6011588127113449,
+      "grad_norm": 0.9471058124439672,
+      "learning_rate": 9.696185436771537e-06,
+      "loss": 1.051,
+      "step": 15200
+    },
+    {
+      "epoch": 0.6015543119302339,
+      "grad_norm": 1.1107762981191798,
+      "learning_rate": 9.695395079430665e-06,
+      "loss": 1.0739,
+      "step": 15210
+    },
+    {
+      "epoch": 0.6019498111491229,
+      "grad_norm": 1.0135798265705005,
+      "learning_rate": 9.694603727685558e-06,
+      "loss": 1.0464,
+      "step": 15220
+    },
+    {
+      "epoch": 0.602345310368012,
+      "grad_norm": 1.0994180124371509,
+      "learning_rate": 9.69381138170381e-06,
+      "loss": 1.0383,
+      "step": 15230
+    },
+    {
+      "epoch": 0.602740809586901,
+      "grad_norm": 1.0086090208781033,
+      "learning_rate": 9.693018041653226e-06,
+      "loss": 1.0597,
+      "step": 15240
+    },
+    {
+      "epoch": 0.6031363088057901,
+      "grad_norm": 0.9852983261676425,
+      "learning_rate": 9.692223707701823e-06,
+      "loss": 1.0536,
+      "step": 15250
+    },
+    {
+      "epoch": 0.6035318080246792,
+      "grad_norm": 0.9596718925605977,
+      "learning_rate": 9.691428380017827e-06,
+      "loss": 1.0365,
+      "step": 15260
+    },
+    {
+      "epoch": 0.6039273072435682,
+      "grad_norm": 1.0507673134614033,
+      "learning_rate": 9.690632058769673e-06,
+      "loss": 1.0461,
+      "step": 15270
+    },
+    {
+      "epoch": 0.6043228064624573,
+      "grad_norm": 1.1726382182933972,
+      "learning_rate": 9.689834744126013e-06,
+      "loss": 1.0563,
+      "step": 15280
+    },
+    {
+      "epoch": 0.6047183056813463,
+      "grad_norm": 1.035216247921188,
+      "learning_rate": 9.689036436255698e-06,
+      "loss": 1.0551,
+      "step": 15290
+    },
+    {
+      "epoch": 0.6051138049002354,
+      "grad_norm": 1.0455376045216924,
+      "learning_rate": 9.6882371353278e-06,
+      "loss": 1.0523,
+      "step": 15300
+    },
+    {
+      "epoch": 0.6055093041191244,
+      "grad_norm": 0.9260890212225377,
+      "learning_rate": 9.687436841511598e-06,
+      "loss": 1.0386,
+      "step": 15310
+    },
+    {
+      "epoch": 0.6059048033380134,
+      "grad_norm": 1.0409545590750842,
+      "learning_rate": 9.686635554976577e-06,
+      "loss": 1.0686,
+      "step": 15320
+    },
+    {
+      "epoch": 0.6063003025569025,
+      "grad_norm": 1.0109137515069873,
+      "learning_rate": 9.685833275892441e-06,
+      "loss": 1.0684,
+      "step": 15330
+    },
+    {
+      "epoch": 0.6066958017757915,
+      "grad_norm": 1.0402733994453395,
+      "learning_rate": 9.685030004429093e-06,
+      "loss": 1.0603,
+      "step": 15340
+    },
+    {
+      "epoch": 0.6070913009946806,
+      "grad_norm": 1.0978306075614528,
+      "learning_rate": 9.684225740756655e-06,
+      "loss": 1.046,
+      "step": 15350
+    },
+    {
+      "epoch": 0.6074868002135696,
+      "grad_norm": 1.0396316491266315,
+      "learning_rate": 9.683420485045458e-06,
+      "loss": 1.0312,
+      "step": 15360
+    },
+    {
+      "epoch": 0.6078822994324586,
+      "grad_norm": 1.0253989712894895,
+      "learning_rate": 9.682614237466037e-06,
+      "loss": 1.076,
+      "step": 15370
+    },
+    {
+      "epoch": 0.6082777986513477,
+      "grad_norm": 1.072807339417918,
+      "learning_rate": 9.681806998189145e-06,
+      "loss": 1.0611,
+      "step": 15380
+    },
+    {
+      "epoch": 0.6086732978702367,
+      "grad_norm": 1.0045632014713812,
+      "learning_rate": 9.68099876738574e-06,
+      "loss": 1.0589,
+      "step": 15390
+    },
+    {
+      "epoch": 0.6090687970891258,
+      "grad_norm": 1.0831603732613064,
+      "learning_rate": 9.680189545226993e-06,
+      "loss": 1.0382,
+      "step": 15400
+    },
+    {
+      "epoch": 0.6094642963080148,
+      "grad_norm": 1.041366522961604,
+      "learning_rate": 9.679379331884282e-06,
+      "loss": 1.0433,
+      "step": 15410
+    },
+    {
+      "epoch": 0.6098597955269038,
+      "grad_norm": 0.9781163961992932,
+      "learning_rate": 9.678568127529196e-06,
+      "loss": 1.046,
+      "step": 15420
+    },
+    {
+      "epoch": 0.6102552947457929,
+      "grad_norm": 1.1066566274300034,
+      "learning_rate": 9.677755932333535e-06,
+      "loss": 1.0579,
+      "step": 15430
+    },
+    {
+      "epoch": 0.6106507939646819,
+      "grad_norm": 1.0437910028356834,
+      "learning_rate": 9.676942746469308e-06,
+      "loss": 1.0518,
+      "step": 15440
+    },
+    {
+      "epoch": 0.611046293183571,
+      "grad_norm": 0.9342493840171474,
+      "learning_rate": 9.676128570108732e-06,
+      "loss": 1.0265,
+      "step": 15450
+    },
+    {
+      "epoch": 0.61144179240246,
+      "grad_norm": 1.1150442198080608,
+      "learning_rate": 9.67531340342424e-06,
+      "loss": 1.057,
+      "step": 15460
+    },
+    {
+      "epoch": 0.611837291621349,
+      "grad_norm": 1.0510810001841868,
+      "learning_rate": 9.674497246588464e-06,
+      "loss": 1.0262,
+      "step": 15470
+    },
+    {
+      "epoch": 0.6122327908402381,
+      "grad_norm": 1.07587190794317,
+      "learning_rate": 9.67368009977426e-06,
+      "loss": 1.0686,
+      "step": 15480
+    },
+    {
+      "epoch": 0.6126282900591271,
+      "grad_norm": 1.0719162711619243,
+      "learning_rate": 9.672861963154676e-06,
+      "loss": 1.0535,
+      "step": 15490
+    },
+    {
+      "epoch": 0.6130237892780162,
+      "grad_norm": 1.097635840680599,
+      "learning_rate": 9.672042836902989e-06,
+      "loss": 1.0575,
+      "step": 15500
+    },
+    {
+      "epoch": 0.6134192884969052,
+      "grad_norm": 0.9399799153538689,
+      "learning_rate": 9.671222721192667e-06,
+      "loss": 1.0388,
+      "step": 15510
+    },
+    {
+      "epoch": 0.6138147877157942,
+      "grad_norm": 1.0576549713820989,
+      "learning_rate": 9.670401616197405e-06,
+      "loss": 1.0558,
+      "step": 15520
+    },
+    {
+      "epoch": 0.6142102869346833,
+      "grad_norm": 0.984195688053569,
+      "learning_rate": 9.669579522091092e-06,
+      "loss": 1.0526,
+      "step": 15530
+    },
+    {
+      "epoch": 0.6146057861535723,
+      "grad_norm": 1.084314040671667,
+      "learning_rate": 9.66875643904784e-06,
+      "loss": 1.0453,
+      "step": 15540
+    },
+    {
+      "epoch": 0.6150012853724613,
+      "grad_norm": 0.8960575809026324,
+      "learning_rate": 9.667932367241956e-06,
+      "loss": 1.0423,
+      "step": 15550
+    },
+    {
+      "epoch": 0.6153967845913504,
+      "grad_norm": 1.0143928544253158,
+      "learning_rate": 9.667107306847972e-06,
+      "loss": 1.0317,
+      "step": 15560
+    },
+    {
+      "epoch": 0.6157922838102394,
+      "grad_norm": 1.0832845046931834,
+      "learning_rate": 9.666281258040616e-06,
+      "loss": 1.0371,
+      "step": 15570
+    },
+    {
+      "epoch": 0.6161877830291285,
+      "grad_norm": 1.05790424790035,
+      "learning_rate": 9.665454220994835e-06,
+      "loss": 1.064,
+      "step": 15580
+    },
+    {
+      "epoch": 0.6165832822480175,
+      "grad_norm": 1.0221071763048168,
+      "learning_rate": 9.664626195885778e-06,
+      "loss": 1.0299,
+      "step": 15590
+    },
+    {
+      "epoch": 0.6169787814669067,
+      "grad_norm": 1.0297733962805455,
+      "learning_rate": 9.663797182888808e-06,
+      "loss": 1.0679,
+      "step": 15600
+    },
+    {
+      "epoch": 0.6173742806857957,
+      "grad_norm": 1.0087097348831306,
+      "learning_rate": 9.662967182179496e-06,
+      "loss": 1.0576,
+      "step": 15610
+    },
+    {
+      "epoch": 0.6177697799046847,
+      "grad_norm": 0.9769581711431045,
+      "learning_rate": 9.662136193933621e-06,
+      "loss": 1.0482,
+      "step": 15620
+    },
+    {
+      "epoch": 0.6181652791235738,
+      "grad_norm": 0.9336445849395211,
+      "learning_rate": 9.661304218327175e-06,
+      "loss": 1.0424,
+      "step": 15630
+    },
+    {
+      "epoch": 0.6185607783424628,
+      "grad_norm": 0.9897697543891534,
+      "learning_rate": 9.660471255536351e-06,
+      "loss": 1.0499,
+      "step": 15640
+    },
+    {
+      "epoch": 0.6189562775613519,
+      "grad_norm": 1.032804884253202,
+      "learning_rate": 9.65963730573756e-06,
+      "loss": 1.0512,
+      "step": 15650
+    },
+    {
+      "epoch": 0.6193517767802409,
+      "grad_norm": 1.099633749274004,
+      "learning_rate": 9.65880236910742e-06,
+      "loss": 1.0676,
+      "step": 15660
+    },
+    {
+      "epoch": 0.6197472759991299,
+      "grad_norm": 1.0084198908906983,
+      "learning_rate": 9.65796644582275e-06,
+      "loss": 1.0656,
+      "step": 15670
+    },
+    {
+      "epoch": 0.620142775218019,
+      "grad_norm": 0.9732233670313296,
+      "learning_rate": 9.65712953606059e-06,
+      "loss": 1.0412,
+      "step": 15680
+    },
+    {
+      "epoch": 0.620538274436908,
+      "grad_norm": 1.038136679894662,
+      "learning_rate": 9.65629163999818e-06,
+      "loss": 1.0384,
+      "step": 15690
+    },
+    {
+      "epoch": 0.620933773655797,
+      "grad_norm": 1.1081802595384365,
+      "learning_rate": 9.655452757812973e-06,
+      "loss": 1.0491,
+      "step": 15700
+    },
+    {
+      "epoch": 0.6213292728746861,
+      "grad_norm": 1.0638382984713737,
+      "learning_rate": 9.65461288968263e-06,
+      "loss": 1.0499,
+      "step": 15710
+    },
+    {
+      "epoch": 0.6217247720935751,
+      "grad_norm": 1.0214782133966622,
+      "learning_rate": 9.653772035785023e-06,
+      "loss": 1.0634,
+      "step": 15720
+    },
+    {
+      "epoch": 0.6221202713124642,
+      "grad_norm": 1.012611138352532,
+      "learning_rate": 9.652930196298226e-06,
+      "loss": 1.0578,
+      "step": 15730
+    },
+    {
+      "epoch": 0.6225157705313532,
+      "grad_norm": 0.8993044991401044,
+      "learning_rate": 9.65208737140053e-06,
+      "loss": 1.043,
+      "step": 15740
+    },
+    {
+      "epoch": 0.6229112697502422,
+      "grad_norm": 1.0319283157964163,
+      "learning_rate": 9.651243561270427e-06,
+      "loss": 1.0505,
+      "step": 15750
+    },
+    {
+      "epoch": 0.6233067689691313,
+      "grad_norm": 1.032345416509229,
+      "learning_rate": 9.650398766086624e-06,
+      "loss": 1.0415,
+      "step": 15760
+    },
+    {
+      "epoch": 0.6237022681880203,
+      "grad_norm": 1.0426650875127512,
+      "learning_rate": 9.649552986028035e-06,
+      "loss": 1.0532,
+      "step": 15770
+    },
+    {
+      "epoch": 0.6240977674069094,
+      "grad_norm": 0.9625948990820244,
+      "learning_rate": 9.64870622127378e-06,
+      "loss": 1.0467,
+      "step": 15780
+    },
+    {
+      "epoch": 0.6244932666257984,
+      "grad_norm": 0.9608687302340226,
+      "learning_rate": 9.64785847200319e-06,
+      "loss": 1.0649,
+      "step": 15790
+    },
+    {
+      "epoch": 0.6248887658446874,
+      "grad_norm": 1.0803737059788474,
+      "learning_rate": 9.647009738395804e-06,
+      "loss": 1.0502,
+      "step": 15800
+    },
+    {
+      "epoch": 0.6252842650635765,
+      "grad_norm": 1.0114238021072945,
+      "learning_rate": 9.646160020631368e-06,
+      "loss": 1.0232,
+      "step": 15810
+    },
+    {
+      "epoch": 0.6256797642824655,
+      "grad_norm": 1.0317233012910534,
+      "learning_rate": 9.64530931888984e-06,
+      "loss": 1.0721,
+      "step": 15820
+    },
+    {
+      "epoch": 0.6260752635013546,
+      "grad_norm": 1.0238292035055239,
+      "learning_rate": 9.644457633351381e-06,
+      "loss": 1.0485,
+      "step": 15830
+    },
+    {
+      "epoch": 0.6264707627202436,
+      "grad_norm": 0.9969319682162873,
+      "learning_rate": 9.643604964196365e-06,
+      "loss": 1.0682,
+      "step": 15840
+    },
+    {
+      "epoch": 0.6268662619391326,
+      "grad_norm": 1.0217425046667716,
+      "learning_rate": 9.642751311605374e-06,
+      "loss": 1.0454,
+      "step": 15850
+    },
+    {
+      "epoch": 0.6272617611580217,
+      "grad_norm": 1.0961234155411588,
+      "learning_rate": 9.641896675759195e-06,
+      "loss": 1.0327,
+      "step": 15860
+    },
+    {
+      "epoch": 0.6276572603769107,
+      "grad_norm": 1.1694052823840964,
+      "learning_rate": 9.641041056838826e-06,
+      "loss": 1.0332,
+      "step": 15870
+    },
+    {
+      "epoch": 0.6280527595957998,
+      "grad_norm": 0.9475213666808329,
+      "learning_rate": 9.640184455025472e-06,
+      "loss": 1.0481,
+      "step": 15880
+    },
+    {
+      "epoch": 0.6284482588146888,
+      "grad_norm": 1.040034342977265,
+      "learning_rate": 9.639326870500548e-06,
+      "loss": 1.0691,
+      "step": 15890
+    },
+    {
+      "epoch": 0.6288437580335778,
+      "grad_norm": 0.9950895716174126,
+      "learning_rate": 9.638468303445672e-06,
+      "loss": 1.0389,
+      "step": 15900
+    },
+    {
+      "epoch": 0.6292392572524669,
+      "grad_norm": 1.0371939192552084,
+      "learning_rate": 9.63760875404268e-06,
+      "loss": 1.0547,
+      "step": 15910
+    },
+    {
+      "epoch": 0.6296347564713559,
+      "grad_norm": 1.0533304795531735,
+      "learning_rate": 9.636748222473603e-06,
+      "loss": 1.0313,
+      "step": 15920
+    },
+    {
+      "epoch": 0.6300302556902451,
+      "grad_norm": 1.0260420868800268,
+      "learning_rate": 9.635886708920692e-06,
+      "loss": 1.0554,
+      "step": 15930
+    },
+    {
+      "epoch": 0.6304257549091341,
+      "grad_norm": 1.003458445035818,
+      "learning_rate": 9.635024213566399e-06,
+      "loss": 1.0469,
+      "step": 15940
+    },
+    {
+      "epoch": 0.6308212541280231,
+      "grad_norm": 1.0051505810602934,
+      "learning_rate": 9.634160736593385e-06,
+      "loss": 1.0326,
+      "step": 15950
+    },
+    {
+      "epoch": 0.6312167533469122,
+      "grad_norm": 1.0415000505886955,
+      "learning_rate": 9.633296278184521e-06,
+      "loss": 1.0459,
+      "step": 15960
+    },
+    {
+      "epoch": 0.6316122525658012,
+      "grad_norm": 1.066981934396122,
+      "learning_rate": 9.632430838522883e-06,
+      "loss": 1.0509,
+      "step": 15970
+    },
+    {
+      "epoch": 0.6320077517846903,
+      "grad_norm": 1.065692254459778,
+      "learning_rate": 9.631564417791758e-06,
+      "loss": 1.0424,
+      "step": 15980
+    },
+    {
+      "epoch": 0.6324032510035793,
+      "grad_norm": 0.9537518530300072,
+      "learning_rate": 9.630697016174637e-06,
+      "loss": 1.0395,
+      "step": 15990
+    },
+    {
+      "epoch": 0.6327987502224683,
+      "grad_norm": 0.9796569512744613,
+      "learning_rate": 9.629828633855223e-06,
+      "loss": 1.0698,
+      "step": 16000
+    },
+    {
+      "epoch": 0.6331942494413574,
+      "grad_norm": 1.0188668524559308,
+      "learning_rate": 9.628959271017424e-06,
+      "loss": 1.0451,
+      "step": 16010
+    },
+    {
+      "epoch": 0.6335897486602464,
+      "grad_norm": 1.0699027224160875,
+      "learning_rate": 9.628088927845354e-06,
+      "loss": 1.0577,
+      "step": 16020
+    },
+    {
+      "epoch": 0.6339852478791355,
+      "grad_norm": 1.0219284183748003,
+      "learning_rate": 9.627217604523338e-06,
+      "loss": 1.0667,
+      "step": 16030
+    },
+    {
+      "epoch": 0.6343807470980245,
+      "grad_norm": 1.0872981572529605,
+      "learning_rate": 9.62634530123591e-06,
+      "loss": 1.0473,
+      "step": 16040
+    },
+    {
+      "epoch": 0.6347762463169135,
+      "grad_norm": 0.9700775524587388,
+      "learning_rate": 9.625472018167804e-06,
+      "loss": 1.0593,
+      "step": 16050
+    },
+    {
+      "epoch": 0.6351717455358026,
+      "grad_norm": 1.0837049158295895,
+      "learning_rate": 9.62459775550397e-06,
+      "loss": 1.0609,
+      "step": 16060
+    },
+    {
+      "epoch": 0.6355672447546916,
+      "grad_norm": 1.0506032682878106,
+      "learning_rate": 9.623722513429562e-06,
+      "loss": 1.0415,
+      "step": 16070
+    },
+    {
+      "epoch": 0.6359627439735807,
+      "grad_norm": 1.179324388814712,
+      "learning_rate": 9.62284629212994e-06,
+      "loss": 1.0162,
+      "step": 16080
+    },
+    {
+      "epoch": 0.6363582431924697,
+      "grad_norm": 1.036813566203232,
+      "learning_rate": 9.621969091790672e-06,
+      "loss": 1.0634,
+      "step": 16090
+    },
+    {
+      "epoch": 0.6367537424113587,
+      "grad_norm": 1.0528915136774362,
+      "learning_rate": 9.621090912597535e-06,
+      "loss": 1.0428,
+      "step": 16100
+    },
+    {
+      "epoch": 0.6371492416302478,
+      "grad_norm": 1.0214701916658446,
+      "learning_rate": 9.620211754736512e-06,
+      "loss": 1.0527,
+      "step": 16110
+    },
+    {
+      "epoch": 0.6375447408491368,
+      "grad_norm": 1.0035423295669172,
+      "learning_rate": 9.619331618393794e-06,
+      "loss": 1.0373,
+      "step": 16120
+    },
+    {
+      "epoch": 0.6379402400680259,
+      "grad_norm": 1.120083020024742,
+      "learning_rate": 9.618450503755779e-06,
+      "loss": 1.0591,
+      "step": 16130
+    },
+    {
+      "epoch": 0.6383357392869149,
+      "grad_norm": 1.0010025289837234,
+      "learning_rate": 9.61756841100907e-06,
+      "loss": 1.044,
+      "step": 16140
+    },
+    {
+      "epoch": 0.6387312385058039,
+      "grad_norm": 0.9919865506696257,
+      "learning_rate": 9.616685340340482e-06,
+      "loss": 1.0375,
+      "step": 16150
+    },
+    {
+      "epoch": 0.639126737724693,
+      "grad_norm": 1.0185732230458162,
+      "learning_rate": 9.615801291937032e-06,
+      "loss": 1.0691,
+      "step": 16160
+    },
+    {
+      "epoch": 0.639522236943582,
+      "grad_norm": 1.0779945605665409,
+      "learning_rate": 9.614916265985947e-06,
+      "loss": 1.0377,
+      "step": 16170
+    },
+    {
+      "epoch": 0.639917736162471,
+      "grad_norm": 0.9367820876470461,
+      "learning_rate": 9.614030262674661e-06,
+      "loss": 1.0639,
+      "step": 16180
+    },
+    {
+      "epoch": 0.6403132353813601,
+      "grad_norm": 1.1075055883952594,
+      "learning_rate": 9.613143282190814e-06,
+      "loss": 1.0518,
+      "step": 16190
+    },
+    {
+      "epoch": 0.6407087346002491,
+      "grad_norm": 1.0475157895495932,
+      "learning_rate": 9.612255324722254e-06,
+      "loss": 1.0439,
+      "step": 16200
+    },
+    {
+      "epoch": 0.6411042338191382,
+      "grad_norm": 1.1059680822283366,
+      "learning_rate": 9.611366390457031e-06,
+      "loss": 1.0539,
+      "step": 16210
+    },
+    {
+      "epoch": 0.6414997330380272,
+      "grad_norm": 0.9794011283509074,
+      "learning_rate": 9.610476479583412e-06,
+      "loss": 1.0431,
+      "step": 16220
+    },
+    {
+      "epoch": 0.6418952322569162,
+      "grad_norm": 1.1107008485505965,
+      "learning_rate": 9.609585592289861e-06,
+      "loss": 1.0207,
+      "step": 16230
+    },
+    {
+      "epoch": 0.6422907314758053,
+      "grad_norm": 0.9671393494701901,
+      "learning_rate": 9.608693728765055e-06,
+      "loss": 1.0341,
+      "step": 16240
+    },
+    {
+      "epoch": 0.6426862306946943,
+      "grad_norm": 1.0762878397019289,
+      "learning_rate": 9.607800889197874e-06,
+      "loss": 1.0496,
+      "step": 16250
+    },
+    {
+      "epoch": 0.6430817299135834,
+      "grad_norm": 0.994784312567831,
+      "learning_rate": 9.606907073777407e-06,
+      "loss": 1.0462,
+      "step": 16260
+    },
+    {
+      "epoch": 0.6434772291324725,
+      "grad_norm": 1.0866974926686934,
+      "learning_rate": 9.606012282692945e-06,
+      "loss": 1.046,
+      "step": 16270
+    },
+    {
+      "epoch": 0.6438727283513616,
+      "grad_norm": 1.1295947062325606,
+      "learning_rate": 9.605116516133996e-06,
+      "loss": 1.0395,
+      "step": 16280
+    },
+    {
+      "epoch": 0.6442682275702506,
+      "grad_norm": 0.9511426189829553,
+      "learning_rate": 9.604219774290263e-06,
+      "loss": 1.049,
+      "step": 16290
+    },
+    {
+      "epoch": 0.6446637267891396,
+      "grad_norm": 1.033441089120287,
+      "learning_rate": 9.603322057351663e-06,
+      "loss": 1.0486,
+      "step": 16300
+    },
+    {
+      "epoch": 0.6450592260080287,
+      "grad_norm": 1.087141724206363,
+      "learning_rate": 9.602423365508316e-06,
+      "loss": 1.0286,
+      "step": 16310
+    },
+    {
+      "epoch": 0.6454547252269177,
+      "grad_norm": 1.0754477185360494,
+      "learning_rate": 9.60152369895055e-06,
+      "loss": 1.0429,
+      "step": 16320
+    },
+    {
+      "epoch": 0.6458502244458068,
+      "grad_norm": 0.8964924547534487,
+      "learning_rate": 9.600623057868897e-06,
+      "loss": 1.0425,
+      "step": 16330
+    },
+    {
+      "epoch": 0.6462457236646958,
+      "grad_norm": 1.0467445736018623,
+      "learning_rate": 9.5997214424541e-06,
+      "loss": 1.0566,
+      "step": 16340
+    },
+    {
+      "epoch": 0.6466412228835848,
+      "grad_norm": 1.0949200556415695,
+      "learning_rate": 9.598818852897107e-06,
+      "loss": 1.0559,
+      "step": 16350
+    },
+    {
+      "epoch": 0.6470367221024739,
+      "grad_norm": 0.9976408476191808,
+      "learning_rate": 9.597915289389067e-06,
+      "loss": 1.0463,
+      "step": 16360
+    },
+    {
+      "epoch": 0.6474322213213629,
+      "grad_norm": 1.0333798283970315,
+      "learning_rate": 9.59701075212134e-06,
+      "loss": 1.0312,
+      "step": 16370
+    },
+    {
+      "epoch": 0.647827720540252,
+      "grad_norm": 0.9829699655987383,
+      "learning_rate": 9.596105241285493e-06,
+      "loss": 1.0375,
+      "step": 16380
+    },
+    {
+      "epoch": 0.648223219759141,
+      "grad_norm": 1.0892861684008417,
+      "learning_rate": 9.595198757073299e-06,
+      "loss": 1.0554,
+      "step": 16390
+    },
+    {
+      "epoch": 0.64861871897803,
+      "grad_norm": 0.999061829672844,
+      "learning_rate": 9.594291299676732e-06,
+      "loss": 1.0378,
+      "step": 16400
+    },
+    {
+      "epoch": 0.6490142181969191,
+      "grad_norm": 1.0281322051068504,
+      "learning_rate": 9.59338286928798e-06,
+      "loss": 1.0491,
+      "step": 16410
+    },
+    {
+      "epoch": 0.6494097174158081,
+      "grad_norm": 1.0056547011966825,
+      "learning_rate": 9.59247346609943e-06,
+      "loss": 1.0389,
+      "step": 16420
+    },
+    {
+      "epoch": 0.6498052166346971,
+      "grad_norm": 1.0199144442063073,
+      "learning_rate": 9.591563090303679e-06,
+      "loss": 1.0243,
+      "step": 16430
+    },
+    {
+      "epoch": 0.6502007158535862,
+      "grad_norm": 0.9826809854077958,
+      "learning_rate": 9.59065174209353e-06,
+      "loss": 1.0548,
+      "step": 16440
+    },
+    {
+      "epoch": 0.6505962150724752,
+      "grad_norm": 1.0946445597528371,
+      "learning_rate": 9.589739421661987e-06,
+      "loss": 1.0564,
+      "step": 16450
+    },
+    {
+      "epoch": 0.6509917142913643,
+      "grad_norm": 0.9093306324224982,
+      "learning_rate": 9.588826129202269e-06,
+      "loss": 1.0359,
+      "step": 16460
+    },
+    {
+      "epoch": 0.6513872135102533,
+      "grad_norm": 1.032452702384602,
+      "learning_rate": 9.587911864907792e-06,
+      "loss": 1.0209,
+      "step": 16470
+    },
+    {
+      "epoch": 0.6517827127291423,
+      "grad_norm": 0.932606847135522,
+      "learning_rate": 9.586996628972185e-06,
+      "loss": 1.0423,
+      "step": 16480
+    },
+    {
+      "epoch": 0.6521782119480314,
+      "grad_norm": 1.0801753190629404,
+      "learning_rate": 9.586080421589277e-06,
+      "loss": 1.0416,
+      "step": 16490
+    },
+    {
+      "epoch": 0.6525737111669204,
+      "grad_norm": 1.0208328941703664,
+      "learning_rate": 9.585163242953103e-06,
+      "loss": 1.0451,
+      "step": 16500
+    },
+    {
+      "epoch": 0.6529692103858095,
+      "grad_norm": 1.017005203066039,
+      "learning_rate": 9.584245093257911e-06,
+      "loss": 1.0597,
+      "step": 16510
+    },
+    {
+      "epoch": 0.6533647096046985,
+      "grad_norm": 1.0374710579553934,
+      "learning_rate": 9.583325972698146e-06,
+      "loss": 1.0512,
+      "step": 16520
+    },
+    {
+      "epoch": 0.6537602088235875,
+      "grad_norm": 1.0556906737819625,
+      "learning_rate": 9.582405881468459e-06,
+      "loss": 1.0419,
+      "step": 16530
+    },
+    {
+      "epoch": 0.6541557080424766,
+      "grad_norm": 1.131883843769043,
+      "learning_rate": 9.581484819763717e-06,
+      "loss": 1.0403,
+      "step": 16540
+    },
+    {
+      "epoch": 0.6545512072613656,
+      "grad_norm": 1.0525520418730143,
+      "learning_rate": 9.58056278777898e-06,
+      "loss": 1.0486,
+      "step": 16550
+    },
+    {
+      "epoch": 0.6549467064802547,
+      "grad_norm": 1.013699744559807,
+      "learning_rate": 9.579639785709518e-06,
+      "loss": 1.0506,
+      "step": 16560
+    },
+    {
+      "epoch": 0.6553422056991437,
+      "grad_norm": 1.0837720964627076,
+      "learning_rate": 9.57871581375081e-06,
+      "loss": 1.0433,
+      "step": 16570
+    },
+    {
+      "epoch": 0.6557377049180327,
+      "grad_norm": 1.0012290947394766,
+      "learning_rate": 9.577790872098535e-06,
+      "loss": 1.0548,
+      "step": 16580
+    },
+    {
+      "epoch": 0.6561332041369218,
+      "grad_norm": 1.0050172501786707,
+      "learning_rate": 9.576864960948582e-06,
+      "loss": 1.042,
+      "step": 16590
+    },
+    {
+      "epoch": 0.6565287033558109,
+      "grad_norm": 0.9174429412546484,
+      "learning_rate": 9.575938080497042e-06,
+      "loss": 1.0386,
+      "step": 16600
+    },
+    {
+      "epoch": 0.6569242025747,
+      "grad_norm": 1.0086971851572313,
+      "learning_rate": 9.575010230940212e-06,
+      "loss": 1.0493,
+      "step": 16610
+    },
+    {
+      "epoch": 0.657319701793589,
+      "grad_norm": 0.9999297545927885,
+      "learning_rate": 9.574081412474596e-06,
+      "loss": 1.0311,
+      "step": 16620
+    },
+    {
+      "epoch": 0.657715201012478,
+      "grad_norm": 0.9703798632788256,
+      "learning_rate": 9.573151625296899e-06,
+      "loss": 1.0464,
+      "step": 16630
+    },
+    {
+      "epoch": 0.6581107002313671,
+      "grad_norm": 1.0220247718537434,
+      "learning_rate": 9.572220869604037e-06,
+      "loss": 1.0508,
+      "step": 16640
+    },
+    {
+      "epoch": 0.6585061994502561,
+      "grad_norm": 1.0162383358983795,
+      "learning_rate": 9.571289145593128e-06,
+      "loss": 1.0526,
+      "step": 16650
+    },
+    {
+      "epoch": 0.6589016986691452,
+      "grad_norm": 0.9960813334797269,
+      "learning_rate": 9.570356453461491e-06,
+      "loss": 1.0221,
+      "step": 16660
+    },
+    {
+      "epoch": 0.6592971978880342,
+      "grad_norm": 1.1137802002905846,
+      "learning_rate": 9.56942279340666e-06,
+      "loss": 1.0535,
+      "step": 16670
+    },
+    {
+      "epoch": 0.6596926971069232,
+      "grad_norm": 0.9464157499925506,
+      "learning_rate": 9.568488165626365e-06,
+      "loss": 1.0383,
+      "step": 16680
+    },
+    {
+      "epoch": 0.6600881963258123,
+      "grad_norm": 1.1130569372194852,
+      "learning_rate": 9.567552570318546e-06,
+      "loss": 1.0268,
+      "step": 16690
+    },
+    {
+      "epoch": 0.6604836955447013,
+      "grad_norm": 1.1228395235041873,
+      "learning_rate": 9.566616007681342e-06,
+      "loss": 1.0211,
+      "step": 16700
+    },
+    {
+      "epoch": 0.6608791947635904,
+      "grad_norm": 1.0249738775299035,
+      "learning_rate": 9.565678477913104e-06,
+      "loss": 1.0332,
+      "step": 16710
+    },
+    {
+      "epoch": 0.6612746939824794,
+      "grad_norm": 0.9400537428708831,
+      "learning_rate": 9.564739981212387e-06,
+      "loss": 1.0432,
+      "step": 16720
+    },
+    {
+      "epoch": 0.6616701932013684,
+      "grad_norm": 0.9582281152859005,
+      "learning_rate": 9.563800517777945e-06,
+      "loss": 1.0394,
+      "step": 16730
+    },
+    {
+      "epoch": 0.6620656924202575,
+      "grad_norm": 1.1088532835887537,
+      "learning_rate": 9.562860087808741e-06,
+      "loss": 1.0339,
+      "step": 16740
+    },
+    {
+      "epoch": 0.6624611916391465,
+      "grad_norm": 1.0273843070238693,
+      "learning_rate": 9.561918691503942e-06,
+      "loss": 1.0172,
+      "step": 16750
+    },
+    {
+      "epoch": 0.6628566908580356,
+      "grad_norm": 1.08441126176365,
+      "learning_rate": 9.560976329062918e-06,
+      "loss": 1.027,
+      "step": 16760
+    },
+    {
+      "epoch": 0.6632521900769246,
+      "grad_norm": 0.9853686375308722,
+      "learning_rate": 9.56003300068525e-06,
+      "loss": 1.0255,
+      "step": 16770
+    },
+    {
+      "epoch": 0.6636476892958136,
+      "grad_norm": 1.030571846165974,
+      "learning_rate": 9.559088706570714e-06,
+      "loss": 1.0322,
+      "step": 16780
+    },
+    {
+      "epoch": 0.6640431885147027,
+      "grad_norm": 0.9130511100608083,
+      "learning_rate": 9.558143446919298e-06,
+      "loss": 1.0336,
+      "step": 16790
+    },
+    {
+      "epoch": 0.6644386877335917,
+      "grad_norm": 1.1826514552688003,
+      "learning_rate": 9.55719722193119e-06,
+      "loss": 1.039,
+      "step": 16800
+    },
+    {
+      "epoch": 0.6648341869524808,
+      "grad_norm": 0.9902724700866942,
+      "learning_rate": 9.556250031806787e-06,
+      "loss": 1.0381,
+      "step": 16810
+    },
+    {
+      "epoch": 0.6652296861713698,
+      "grad_norm": 1.072785169008779,
+      "learning_rate": 9.555301876746683e-06,
+      "loss": 1.0312,
+      "step": 16820
+    },
+    {
+      "epoch": 0.6656251853902588,
+      "grad_norm": 0.9455694513191211,
+      "learning_rate": 9.554352756951686e-06,
+      "loss": 1.0436,
+      "step": 16830
+    },
+    {
+      "epoch": 0.6660206846091479,
+      "grad_norm": 1.0543375101177483,
+      "learning_rate": 9.5534026726228e-06,
+      "loss": 1.0439,
+      "step": 16840
+    },
+    {
+      "epoch": 0.6664161838280369,
+      "grad_norm": 1.0506783493197784,
+      "learning_rate": 9.552451623961238e-06,
+      "loss": 1.0373,
+      "step": 16850
+    },
+    {
+      "epoch": 0.666811683046926,
+      "grad_norm": 1.0599694755798597,
+      "learning_rate": 9.551499611168416e-06,
+      "loss": 1.0509,
+      "step": 16860
+    },
+    {
+      "epoch": 0.667207182265815,
+      "grad_norm": 1.0608087564396351,
+      "learning_rate": 9.550546634445953e-06,
+      "loss": 1.0555,
+      "step": 16870
+    },
+    {
+      "epoch": 0.667602681484704,
+      "grad_norm": 1.0642032289851944,
+      "learning_rate": 9.549592693995675e-06,
+      "loss": 1.0394,
+      "step": 16880
+    },
+    {
+      "epoch": 0.6679981807035931,
+      "grad_norm": 1.100815126634963,
+      "learning_rate": 9.548637790019607e-06,
+      "loss": 1.0464,
+      "step": 16890
+    },
+    {
+      "epoch": 0.6683936799224821,
+      "grad_norm": 1.0902493624158174,
+      "learning_rate": 9.547681922719984e-06,
+      "loss": 1.0379,
+      "step": 16900
+    },
+    {
+      "epoch": 0.6687891791413711,
+      "grad_norm": 1.0635566496235134,
+      "learning_rate": 9.546725092299242e-06,
+      "loss": 1.0451,
+      "step": 16910
+    },
+    {
+      "epoch": 0.6691846783602602,
+      "grad_norm": 0.9584549255738936,
+      "learning_rate": 9.54576729896002e-06,
+      "loss": 1.0391,
+      "step": 16920
+    },
+    {
+      "epoch": 0.6695801775791492,
+      "grad_norm": 1.070995269486395,
+      "learning_rate": 9.544808542905163e-06,
+      "loss": 1.0272,
+      "step": 16930
+    },
+    {
+      "epoch": 0.6699756767980384,
+      "grad_norm": 0.9790783814739064,
+      "learning_rate": 9.54384882433772e-06,
+      "loss": 1.0494,
+      "step": 16940
+    },
+    {
+      "epoch": 0.6703711760169274,
+      "grad_norm": 0.9465810737305382,
+      "learning_rate": 9.54288814346094e-06,
+      "loss": 1.0363,
+      "step": 16950
+    },
+    {
+      "epoch": 0.6707666752358165,
+      "grad_norm": 0.975396321916311,
+      "learning_rate": 9.541926500478284e-06,
+      "loss": 1.0542,
+      "step": 16960
+    },
+    {
+      "epoch": 0.6711621744547055,
+      "grad_norm": 1.0161602431604948,
+      "learning_rate": 9.540963895593407e-06,
+      "loss": 1.0564,
+      "step": 16970
+    },
+    {
+      "epoch": 0.6715576736735945,
+      "grad_norm": 0.9735537016129909,
+      "learning_rate": 9.540000329010172e-06,
+      "loss": 1.0238,
+      "step": 16980
+    },
+    {
+      "epoch": 0.6719531728924836,
+      "grad_norm": 0.9914811651341365,
+      "learning_rate": 9.53903580093265e-06,
+      "loss": 1.0561,
+      "step": 16990
+    },
+    {
+      "epoch": 0.6723486721113726,
+      "grad_norm": 1.1713963865178358,
+      "learning_rate": 9.538070311565107e-06,
+      "loss": 1.0223,
+      "step": 17000
+    },
+    {
+      "epoch": 0.6727441713302617,
+      "grad_norm": 0.9790382232291204,
+      "learning_rate": 9.53710386111202e-06,
+      "loss": 1.0351,
+      "step": 17010
+    },
+    {
+      "epoch": 0.6731396705491507,
+      "grad_norm": 1.0488028967827665,
+      "learning_rate": 9.536136449778065e-06,
+      "loss": 1.0388,
+      "step": 17020
+    },
+    {
+      "epoch": 0.6735351697680397,
+      "grad_norm": 0.9584055404084623,
+      "learning_rate": 9.535168077768125e-06,
+      "loss": 1.0415,
+      "step": 17030
+    },
+    {
+      "epoch": 0.6739306689869288,
+      "grad_norm": 1.0335393573503142,
+      "learning_rate": 9.534198745287282e-06,
+      "loss": 1.0378,
+      "step": 17040
+    },
+    {
+      "epoch": 0.6743261682058178,
+      "grad_norm": 1.0191950817036952,
+      "learning_rate": 9.533228452540827e-06,
+      "loss": 1.0341,
+      "step": 17050
+    },
+    {
+      "epoch": 0.6747216674247069,
+      "grad_norm": 1.0336588465213403,
+      "learning_rate": 9.53225719973425e-06,
+      "loss": 1.0291,
+      "step": 17060
+    },
+    {
+      "epoch": 0.6751171666435959,
+      "grad_norm": 1.059136311458042,
+      "learning_rate": 9.531284987073244e-06,
+      "loss": 1.0265,
+      "step": 17070
+    },
+    {
+      "epoch": 0.6755126658624849,
+      "grad_norm": 1.0433570581974203,
+      "learning_rate": 9.530311814763709e-06,
+      "loss": 1.0355,
+      "step": 17080
+    },
+    {
+      "epoch": 0.675908165081374,
+      "grad_norm": 1.0300428249677438,
+      "learning_rate": 9.529337683011746e-06,
+      "loss": 1.0409,
+      "step": 17090
+    },
+    {
+      "epoch": 0.676303664300263,
+      "grad_norm": 1.0033435144712768,
+      "learning_rate": 9.52836259202366e-06,
+      "loss": 1.0236,
+      "step": 17100
+    },
+    {
+      "epoch": 0.676699163519152,
+      "grad_norm": 1.1260516296950704,
+      "learning_rate": 9.527386542005956e-06,
+      "loss": 1.0409,
+      "step": 17110
+    },
+    {
+      "epoch": 0.6770946627380411,
+      "grad_norm": 1.0577775935051679,
+      "learning_rate": 9.526409533165348e-06,
+      "loss": 1.0375,
+      "step": 17120
+    },
+    {
+      "epoch": 0.6774901619569301,
+      "grad_norm": 1.1119972148675605,
+      "learning_rate": 9.52543156570875e-06,
+      "loss": 1.0368,
+      "step": 17130
+    },
+    {
+      "epoch": 0.6778856611758192,
+      "grad_norm": 1.0437027342498233,
+      "learning_rate": 9.524452639843273e-06,
+      "loss": 1.0314,
+      "step": 17140
+    },
+    {
+      "epoch": 0.6782811603947082,
+      "grad_norm": 1.13984957954556,
+      "learning_rate": 9.523472755776243e-06,
+      "loss": 1.0279,
+      "step": 17150
+    },
+    {
+      "epoch": 0.6786766596135972,
+      "grad_norm": 0.941921033197251,
+      "learning_rate": 9.52249191371518e-06,
+      "loss": 1.0372,
+      "step": 17160
+    },
+    {
+      "epoch": 0.6790721588324863,
+      "grad_norm": 1.029538895641745,
+      "learning_rate": 9.521510113867809e-06,
+      "loss": 1.0514,
+      "step": 17170
+    },
+    {
+      "epoch": 0.6794676580513753,
+      "grad_norm": 1.073026086443101,
+      "learning_rate": 9.52052735644206e-06,
+      "loss": 1.0293,
+      "step": 17180
+    },
+    {
+      "epoch": 0.6798631572702644,
+      "grad_norm": 1.0974218286313009,
+      "learning_rate": 9.519543641646064e-06,
+      "loss": 1.0277,
+      "step": 17190
+    },
+    {
+      "epoch": 0.6802586564891534,
+      "grad_norm": 0.9680793636990874,
+      "learning_rate": 9.518558969688154e-06,
+      "loss": 1.0394,
+      "step": 17200
+    },
+    {
+      "epoch": 0.6806541557080424,
+      "grad_norm": 1.0693855084118786,
+      "learning_rate": 9.517573340776865e-06,
+      "loss": 1.0393,
+      "step": 17210
+    },
+    {
+      "epoch": 0.6810496549269315,
+      "grad_norm": 1.083877243258302,
+      "learning_rate": 9.51658675512094e-06,
+      "loss": 1.028,
+      "step": 17220
+    },
+    {
+      "epoch": 0.6814451541458205,
+      "grad_norm": 1.078167232659275,
+      "learning_rate": 9.51559921292932e-06,
+      "loss": 1.0307,
+      "step": 17230
+    },
+    {
+      "epoch": 0.6818406533647096,
+      "grad_norm": 1.0774628650055083,
+      "learning_rate": 9.514610714411148e-06,
+      "loss": 1.0449,
+      "step": 17240
+    },
+    {
+      "epoch": 0.6822361525835986,
+      "grad_norm": 1.0685616706304606,
+      "learning_rate": 9.513621259775771e-06,
+      "loss": 1.0424,
+      "step": 17250
+    },
+    {
+      "epoch": 0.6826316518024876,
+      "grad_norm": 1.0697177888713763,
+      "learning_rate": 9.512630849232742e-06,
+      "loss": 1.0091,
+      "step": 17260
+    },
+    {
+      "epoch": 0.6830271510213768,
+      "grad_norm": 0.9881047420361636,
+      "learning_rate": 9.511639482991807e-06,
+      "loss": 1.0589,
+      "step": 17270
+    },
+    {
+      "epoch": 0.6834226502402658,
+      "grad_norm": 1.1784659215278936,
+      "learning_rate": 9.510647161262927e-06,
+      "loss": 1.0443,
+      "step": 17280
+    },
+    {
+      "epoch": 0.6838181494591549,
+      "grad_norm": 0.9895397807354788,
+      "learning_rate": 9.509653884256253e-06,
+      "loss": 1.0523,
+      "step": 17290
+    },
+    {
+      "epoch": 0.6842136486780439,
+      "grad_norm": 0.978584969504384,
+      "learning_rate": 9.508659652182148e-06,
+      "loss": 1.0253,
+      "step": 17300
+    },
+    {
+      "epoch": 0.684609147896933,
+      "grad_norm": 1.0203673517879768,
+      "learning_rate": 9.507664465251171e-06,
+      "loss": 1.0302,
+      "step": 17310
+    },
+    {
+      "epoch": 0.685004647115822,
+      "grad_norm": 1.0028477911852194,
+      "learning_rate": 9.506668323674089e-06,
+      "loss": 1.0585,
+      "step": 17320
+    },
+    {
+      "epoch": 0.685400146334711,
+      "grad_norm": 0.9760105742719073,
+      "learning_rate": 9.505671227661862e-06,
+      "loss": 1.0587,
+      "step": 17330
+    },
+    {
+      "epoch": 0.6857956455536001,
+      "grad_norm": 0.9562174540190501,
+      "learning_rate": 9.504673177425662e-06,
+      "loss": 1.0462,
+      "step": 17340
+    },
+    {
+      "epoch": 0.6861911447724891,
+      "grad_norm": 1.050267449722087,
+      "learning_rate": 9.503674173176857e-06,
+      "loss": 1.0068,
+      "step": 17350
+    },
+    {
+      "epoch": 0.6865866439913781,
+      "grad_norm": 1.0074076526995512,
+      "learning_rate": 9.502674215127021e-06,
+      "loss": 1.015,
+      "step": 17360
+    },
+    {
+      "epoch": 0.6869821432102672,
+      "grad_norm": 0.9719617490095388,
+      "learning_rate": 9.501673303487928e-06,
+      "loss": 1.0235,
+      "step": 17370
+    },
+    {
+      "epoch": 0.6873776424291562,
+      "grad_norm": 0.967603469030969,
+      "learning_rate": 9.500671438471551e-06,
+      "loss": 1.0266,
+      "step": 17380
+    },
+    {
+      "epoch": 0.6877731416480453,
+      "grad_norm": 1.0375852797271878,
+      "learning_rate": 9.49966862029007e-06,
+      "loss": 1.0442,
+      "step": 17390
+    },
+    {
+      "epoch": 0.6881686408669343,
+      "grad_norm": 1.0278951507874752,
+      "learning_rate": 9.498664849155865e-06,
+      "loss": 1.0312,
+      "step": 17400
+    },
+    {
+      "epoch": 0.6885641400858233,
+      "grad_norm": 1.1270130472250348,
+      "learning_rate": 9.497660125281517e-06,
+      "loss": 1.0487,
+      "step": 17410
+    },
+    {
+      "epoch": 0.6889596393047124,
+      "grad_norm": 1.0695361143024629,
+      "learning_rate": 9.496654448879809e-06,
+      "loss": 1.0208,
+      "step": 17420
+    },
+    {
+      "epoch": 0.6893551385236014,
+      "grad_norm": 0.9980861835302752,
+      "learning_rate": 9.495647820163725e-06,
+      "loss": 1.0176,
+      "step": 17430
+    },
+    {
+      "epoch": 0.6897506377424905,
+      "grad_norm": 0.9820629459969173,
+      "learning_rate": 9.494640239346456e-06,
+      "loss": 1.0517,
+      "step": 17440
+    },
+    {
+      "epoch": 0.6901461369613795,
+      "grad_norm": 0.9718859000115515,
+      "learning_rate": 9.493631706641385e-06,
+      "loss": 1.0342,
+      "step": 17450
+    },
+    {
+      "epoch": 0.6905416361802685,
+      "grad_norm": 1.0149575779819844,
+      "learning_rate": 9.492622222262104e-06,
+      "loss": 1.0298,
+      "step": 17460
+    },
+    {
+      "epoch": 0.6909371353991576,
+      "grad_norm": 0.9541546409456523,
+      "learning_rate": 9.491611786422406e-06,
+      "loss": 1.0183,
+      "step": 17470
+    },
+    {
+      "epoch": 0.6913326346180466,
+      "grad_norm": 0.8854836045074562,
+      "learning_rate": 9.490600399336282e-06,
+      "loss": 1.026,
+      "step": 17480
+    },
+    {
+      "epoch": 0.6917281338369357,
+      "grad_norm": 1.0141332501116707,
+      "learning_rate": 9.489588061217928e-06,
+      "loss": 1.0196,
+      "step": 17490
+    },
+    {
+      "epoch": 0.6921236330558247,
+      "grad_norm": 1.1396103122405974,
+      "learning_rate": 9.488574772281737e-06,
+      "loss": 1.0381,
+      "step": 17500
+    },
+    {
+      "epoch": 0.6925191322747137,
+      "grad_norm": 1.006966576458375,
+      "learning_rate": 9.487560532742312e-06,
+      "loss": 1.0423,
+      "step": 17510
+    },
+    {
+      "epoch": 0.6929146314936028,
+      "grad_norm": 1.0507603763860547,
+      "learning_rate": 9.486545342814445e-06,
+      "loss": 1.0113,
+      "step": 17520
+    },
+    {
+      "epoch": 0.6933101307124918,
+      "grad_norm": 1.1074279872076913,
+      "learning_rate": 9.48552920271314e-06,
+      "loss": 1.0393,
+      "step": 17530
+    },
+    {
+      "epoch": 0.6937056299313809,
+      "grad_norm": 0.9270751798264574,
+      "learning_rate": 9.484512112653596e-06,
+      "loss": 1.0542,
+      "step": 17540
+    },
+    {
+      "epoch": 0.6941011291502699,
+      "grad_norm": 1.0144057641303537,
+      "learning_rate": 9.483494072851215e-06,
+      "loss": 1.0301,
+      "step": 17550
+    },
+    {
+      "epoch": 0.6944966283691589,
+      "grad_norm": 0.9510545519842236,
+      "learning_rate": 9.482475083521605e-06,
+      "loss": 1.0418,
+      "step": 17560
+    },
+    {
+      "epoch": 0.694892127588048,
+      "grad_norm": 1.047926367227602,
+      "learning_rate": 9.481455144880565e-06,
+      "loss": 1.0341,
+      "step": 17570
+    },
+    {
+      "epoch": 0.695287626806937,
+      "grad_norm": 1.1945063966810148,
+      "learning_rate": 9.480434257144101e-06,
+      "loss": 1.0288,
+      "step": 17580
+    },
+    {
+      "epoch": 0.695683126025826,
+      "grad_norm": 1.0395791244806956,
+      "learning_rate": 9.479412420528421e-06,
+      "loss": 1.0305,
+      "step": 17590
+    },
+    {
+      "epoch": 0.6960786252447151,
+      "grad_norm": 0.9915137493645249,
+      "learning_rate": 9.478389635249935e-06,
+      "loss": 1.0462,
+      "step": 17600
+    },
+    {
+      "epoch": 0.6964741244636042,
+      "grad_norm": 0.9979567244351009,
+      "learning_rate": 9.477365901525248e-06,
+      "loss": 1.0195,
+      "step": 17610
+    },
+    {
+      "epoch": 0.6968696236824933,
+      "grad_norm": 1.0445929720914613,
+      "learning_rate": 9.47634121957117e-06,
+      "loss": 1.0311,
+      "step": 17620
+    },
+    {
+      "epoch": 0.6972651229013823,
+      "grad_norm": 0.9371362435676859,
+      "learning_rate": 9.475315589604711e-06,
+      "loss": 1.0279,
+      "step": 17630
+    },
+    {
+      "epoch": 0.6976606221202714,
+      "grad_norm": 1.0071781675755407,
+      "learning_rate": 9.474289011843083e-06,
+      "loss": 1.0402,
+      "step": 17640
+    },
+    {
+      "epoch": 0.6980561213391604,
+      "grad_norm": 1.001393852960122,
+      "learning_rate": 9.473261486503695e-06,
+      "loss": 1.0336,
+      "step": 17650
+    },
+    {
+      "epoch": 0.6984516205580494,
+      "grad_norm": 1.0062506554495403,
+      "learning_rate": 9.472233013804161e-06,
+      "loss": 1.0427,
+      "step": 17660
+    },
+    {
+      "epoch": 0.6988471197769385,
+      "grad_norm": 1.122064684887129,
+      "learning_rate": 9.471203593962295e-06,
+      "loss": 1.0094,
+      "step": 17670
+    },
+    {
+      "epoch": 0.6992426189958275,
+      "grad_norm": 1.0641755859905655,
+      "learning_rate": 9.47017322719611e-06,
+      "loss": 1.0521,
+      "step": 17680
+    },
+    {
+      "epoch": 0.6996381182147166,
+      "grad_norm": 0.996838203937507,
+      "learning_rate": 9.469141913723816e-06,
+      "loss": 1.0245,
+      "step": 17690
+    },
+    {
+      "epoch": 0.7000336174336056,
+      "grad_norm": 0.9977499259345749,
+      "learning_rate": 9.468109653763833e-06,
+      "loss": 1.0392,
+      "step": 17700
+    },
+    {
+      "epoch": 0.7004291166524946,
+      "grad_norm": 1.1159570566024226,
+      "learning_rate": 9.467076447534776e-06,
+      "loss": 1.0526,
+      "step": 17710
+    },
+    {
+      "epoch": 0.7008246158713837,
+      "grad_norm": 1.0381074540651054,
+      "learning_rate": 9.466042295255455e-06,
+      "loss": 1.0102,
+      "step": 17720
+    },
+    {
+      "epoch": 0.7012201150902727,
+      "grad_norm": 0.9714731712930081,
+      "learning_rate": 9.46500719714489e-06,
+      "loss": 1.0586,
+      "step": 17730
+    },
+    {
+      "epoch": 0.7016156143091618,
+      "grad_norm": 1.0007087728722992,
+      "learning_rate": 9.463971153422295e-06,
+      "loss": 1.0315,
+      "step": 17740
+    },
+    {
+      "epoch": 0.7020111135280508,
+      "grad_norm": 1.0839473564019655,
+      "learning_rate": 9.462934164307089e-06,
+      "loss": 1.025,
+      "step": 17750
+    },
+    {
+      "epoch": 0.7024066127469398,
+      "grad_norm": 1.0755172683031924,
+      "learning_rate": 9.461896230018886e-06,
+      "loss": 1.0277,
+      "step": 17760
+    },
+    {
+      "epoch": 0.7028021119658289,
+      "grad_norm": 1.009446176094145,
+      "learning_rate": 9.460857350777504e-06,
+      "loss": 1.0403,
+      "step": 17770
+    },
+    {
+      "epoch": 0.7031976111847179,
+      "grad_norm": 1.1764653577076687,
+      "learning_rate": 9.459817526802958e-06,
+      "loss": 1.0277,
+      "step": 17780
+    },
+    {
+      "epoch": 0.703593110403607,
+      "grad_norm": 1.0299138971363238,
+      "learning_rate": 9.458776758315468e-06,
+      "loss": 1.0182,
+      "step": 17790
+    },
+    {
+      "epoch": 0.703988609622496,
+      "grad_norm": 1.1607430809418804,
+      "learning_rate": 9.457735045535448e-06,
+      "loss": 1.0393,
+      "step": 17800
+    },
+    {
+      "epoch": 0.704384108841385,
+      "grad_norm": 0.952550365342849,
+      "learning_rate": 9.456692388683518e-06,
+      "loss": 1.0225,
+      "step": 17810
+    },
+    {
+      "epoch": 0.7047796080602741,
+      "grad_norm": 0.9704323216931742,
+      "learning_rate": 9.45564878798049e-06,
+      "loss": 1.0422,
+      "step": 17820
+    },
+    {
+      "epoch": 0.7051751072791631,
+      "grad_norm": 1.0917470952685522,
+      "learning_rate": 9.454604243647384e-06,
+      "loss": 1.0345,
+      "step": 17830
+    },
+    {
+      "epoch": 0.7055706064980521,
+      "grad_norm": 1.0811136087893778,
+      "learning_rate": 9.453558755905418e-06,
+      "loss": 1.0422,
+      "step": 17840
+    },
+    {
+      "epoch": 0.7059661057169412,
+      "grad_norm": 1.0564672808578592,
+      "learning_rate": 9.452512324976007e-06,
+      "loss": 1.0332,
+      "step": 17850
+    },
+    {
+      "epoch": 0.7063616049358302,
+      "grad_norm": 0.9802906844311005,
+      "learning_rate": 9.451464951080766e-06,
+      "loss": 1.012,
+      "step": 17860
+    },
+    {
+      "epoch": 0.7067571041547193,
+      "grad_norm": 1.1015278578599281,
+      "learning_rate": 9.450416634441512e-06,
+      "loss": 1.0363,
+      "step": 17870
+    },
+    {
+      "epoch": 0.7071526033736083,
+      "grad_norm": 1.0112824386472745,
+      "learning_rate": 9.44936737528026e-06,
+      "loss": 1.0244,
+      "step": 17880
+    },
+    {
+      "epoch": 0.7075481025924973,
+      "grad_norm": 1.0526738425427273,
+      "learning_rate": 9.448317173819225e-06,
+      "loss": 1.0109,
+      "step": 17890
+    },
+    {
+      "epoch": 0.7079436018113864,
+      "grad_norm": 0.9684882693538964,
+      "learning_rate": 9.447266030280824e-06,
+      "loss": 1.0378,
+      "step": 17900
+    },
+    {
+      "epoch": 0.7083391010302754,
+      "grad_norm": 1.0369246106958165,
+      "learning_rate": 9.446213944887667e-06,
+      "loss": 1.016,
+      "step": 17910
+    },
+    {
+      "epoch": 0.7087346002491645,
+      "grad_norm": 1.0507790450131054,
+      "learning_rate": 9.44516091786257e-06,
+      "loss": 1.032,
+      "step": 17920
+    },
+    {
+      "epoch": 0.7091300994680535,
+      "grad_norm": 1.0429180670917753,
+      "learning_rate": 9.444106949428546e-06,
+      "loss": 1.0403,
+      "step": 17930
+    },
+    {
+      "epoch": 0.7095255986869425,
+      "grad_norm": 1.0357371700575062,
+      "learning_rate": 9.443052039808809e-06,
+      "loss": 1.0297,
+      "step": 17940
+    },
+    {
+      "epoch": 0.7099210979058317,
+      "grad_norm": 1.001997955675374,
+      "learning_rate": 9.441996189226767e-06,
+      "loss": 1.027,
+      "step": 17950
+    },
+    {
+      "epoch": 0.7103165971247207,
+      "grad_norm": 1.0411255534164106,
+      "learning_rate": 9.440939397906034e-06,
+      "loss": 1.0437,
+      "step": 17960
+    },
+    {
+      "epoch": 0.7107120963436098,
+      "grad_norm": 1.0430212772449607,
+      "learning_rate": 9.43988166607042e-06,
+      "loss": 1.0406,
+      "step": 17970
+    },
+    {
+      "epoch": 0.7111075955624988,
+      "grad_norm": 1.0476161191690738,
+      "learning_rate": 9.438822993943933e-06,
+      "loss": 1.0385,
+      "step": 17980
+    },
+    {
+      "epoch": 0.7115030947813878,
+      "grad_norm": 0.9425366081957547,
+      "learning_rate": 9.437763381750783e-06,
+      "loss": 1.0203,
+      "step": 17990
+    },
+    {
+      "epoch": 0.7118985940002769,
+      "grad_norm": 0.9470071520270256,
+      "learning_rate": 9.436702829715378e-06,
+      "loss": 1.0378,
+      "step": 18000
+    },
+    {
+      "epoch": 0.7122940932191659,
+      "grad_norm": 1.0032513032935346,
+      "learning_rate": 9.435641338062325e-06,
+      "loss": 1.0335,
+      "step": 18010
+    },
+    {
+      "epoch": 0.712689592438055,
+      "grad_norm": 1.071046239224927,
+      "learning_rate": 9.434578907016427e-06,
+      "loss": 1.0183,
+      "step": 18020
+    },
+    {
+      "epoch": 0.713085091656944,
+      "grad_norm": 1.0072679422841402,
+      "learning_rate": 9.433515536802692e-06,
+      "loss": 1.0321,
+      "step": 18030
+    },
+    {
+      "epoch": 0.713480590875833,
+      "grad_norm": 1.0274835588693285,
+      "learning_rate": 9.432451227646321e-06,
+      "loss": 1.0286,
+      "step": 18040
+    },
+    {
+      "epoch": 0.7138760900947221,
+      "grad_norm": 1.1990568507239372,
+      "learning_rate": 9.431385979772719e-06,
+      "loss": 1.0249,
+      "step": 18050
+    },
+    {
+      "epoch": 0.7142715893136111,
+      "grad_norm": 0.9597939768988392,
+      "learning_rate": 9.430319793407483e-06,
+      "loss": 1.0284,
+      "step": 18060
+    },
+    {
+      "epoch": 0.7146670885325002,
+      "grad_norm": 0.9904046966771386,
+      "learning_rate": 9.429252668776419e-06,
+      "loss": 1.0383,
+      "step": 18070
+    },
+    {
+      "epoch": 0.7150625877513892,
+      "grad_norm": 1.075580767185948,
+      "learning_rate": 9.42818460610552e-06,
+      "loss": 1.0349,
+      "step": 18080
+    },
+    {
+      "epoch": 0.7154580869702782,
+      "grad_norm": 0.9251623183639143,
+      "learning_rate": 9.427115605620987e-06,
+      "loss": 1.0117,
+      "step": 18090
+    },
+    {
+      "epoch": 0.7158535861891673,
+      "grad_norm": 1.0155844395529972,
+      "learning_rate": 9.426045667549216e-06,
+      "loss": 1.0187,
+      "step": 18100
+    },
+    {
+      "epoch": 0.7162490854080563,
+      "grad_norm": 1.0912143845014943,
+      "learning_rate": 9.424974792116799e-06,
+      "loss": 1.0258,
+      "step": 18110
+    },
+    {
+      "epoch": 0.7166445846269454,
+      "grad_norm": 0.983404683787228,
+      "learning_rate": 9.42390297955053e-06,
+      "loss": 1.0415,
+      "step": 18120
+    },
+    {
+      "epoch": 0.7170400838458344,
+      "grad_norm": 1.0520129458409133,
+      "learning_rate": 9.422830230077402e-06,
+      "loss": 1.0362,
+      "step": 18130
+    },
+    {
+      "epoch": 0.7174355830647234,
+      "grad_norm": 1.0798743638958643,
+      "learning_rate": 9.421756543924606e-06,
+      "loss": 1.0202,
+      "step": 18140
+    },
+    {
+      "epoch": 0.7178310822836125,
+      "grad_norm": 1.0330469051053421,
+      "learning_rate": 9.420681921319525e-06,
+      "loss": 1.0146,
+      "step": 18150
+    },
+    {
+      "epoch": 0.7182265815025015,
+      "grad_norm": 0.9803559327822846,
+      "learning_rate": 9.41960636248975e-06,
+      "loss": 1.0032,
+      "step": 18160
+    },
+    {
+      "epoch": 0.7186220807213906,
+      "grad_norm": 1.0594579196245595,
+      "learning_rate": 9.418529867663066e-06,
+      "loss": 1.0392,
+      "step": 18170
+    },
+    {
+      "epoch": 0.7190175799402796,
+      "grad_norm": 1.073378619708236,
+      "learning_rate": 9.417452437067454e-06,
+      "loss": 1.034,
+      "step": 18180
+    },
+    {
+      "epoch": 0.7194130791591686,
+      "grad_norm": 0.996061568152992,
+      "learning_rate": 9.416374070931097e-06,
+      "loss": 1.0448,
+      "step": 18190
+    },
+    {
+      "epoch": 0.7198085783780577,
+      "grad_norm": 1.0149747384680066,
+      "learning_rate": 9.415294769482375e-06,
+      "loss": 1.0318,
+      "step": 18200
+    },
+    {
+      "epoch": 0.7202040775969467,
+      "grad_norm": 1.015167129650081,
+      "learning_rate": 9.414214532949863e-06,
+      "loss": 1.0321,
+      "step": 18210
+    },
+    {
+      "epoch": 0.7205995768158358,
+      "grad_norm": 1.0785394541752948,
+      "learning_rate": 9.41313336156234e-06,
+      "loss": 1.0324,
+      "step": 18220
+    },
+    {
+      "epoch": 0.7209950760347248,
+      "grad_norm": 0.9699972936380293,
+      "learning_rate": 9.412051255548775e-06,
+      "loss": 1.0289,
+      "step": 18230
+    },
+    {
+      "epoch": 0.7213905752536138,
+      "grad_norm": 1.0030239467096689,
+      "learning_rate": 9.410968215138343e-06,
+      "loss": 1.0295,
+      "step": 18240
+    },
+    {
+      "epoch": 0.7217860744725029,
+      "grad_norm": 1.0795386550609039,
+      "learning_rate": 9.409884240560412e-06,
+      "loss": 1.0282,
+      "step": 18250
+    },
+    {
+      "epoch": 0.7221815736913919,
+      "grad_norm": 1.0953579009158714,
+      "learning_rate": 9.408799332044552e-06,
+      "loss": 1.0201,
+      "step": 18260
+    },
+    {
+      "epoch": 0.722577072910281,
+      "grad_norm": 1.1092584838240036,
+      "learning_rate": 9.407713489820524e-06,
+      "loss": 1.0079,
+      "step": 18270
+    },
+    {
+      "epoch": 0.7229725721291701,
+      "grad_norm": 1.0798434259311,
+      "learning_rate": 9.406626714118292e-06,
+      "loss": 1.0328,
+      "step": 18280
+    },
+    {
+      "epoch": 0.7233680713480591,
+      "grad_norm": 0.9493139819260271,
+      "learning_rate": 9.405539005168019e-06,
+      "loss": 1.0406,
+      "step": 18290
+    },
+    {
+      "epoch": 0.7237635705669482,
+      "grad_norm": 0.986110374566842,
+      "learning_rate": 9.404450363200062e-06,
+      "loss": 1.0424,
+      "step": 18300
+    },
+    {
+      "epoch": 0.7241590697858372,
+      "grad_norm": 1.0529415191863654,
+      "learning_rate": 9.403360788444974e-06,
+      "loss": 1.0403,
+      "step": 18310
+    },
+    {
+      "epoch": 0.7245545690047263,
+      "grad_norm": 1.078810215390035,
+      "learning_rate": 9.40227028113351e-06,
+      "loss": 1.0233,
+      "step": 18320
+    },
+    {
+      "epoch": 0.7249500682236153,
+      "grad_norm": 1.0146987780513816,
+      "learning_rate": 9.401178841496622e-06,
+      "loss": 1.0082,
+      "step": 18330
+    },
+    {
+      "epoch": 0.7253455674425043,
+      "grad_norm": 1.0674040526570738,
+      "learning_rate": 9.400086469765457e-06,
+      "loss": 1.025,
+      "step": 18340
+    },
+    {
+      "epoch": 0.7257410666613934,
+      "grad_norm": 1.0661304663925135,
+      "learning_rate": 9.39899316617136e-06,
+      "loss": 1.0044,
+      "step": 18350
+    },
+    {
+      "epoch": 0.7261365658802824,
+      "grad_norm": 1.0068458744723774,
+      "learning_rate": 9.397898930945875e-06,
+      "loss": 1.0307,
+      "step": 18360
+    },
+    {
+      "epoch": 0.7265320650991715,
+      "grad_norm": 1.0781183364207743,
+      "learning_rate": 9.396803764320743e-06,
+      "loss": 1.024,
+      "step": 18370
+    },
+    {
+      "epoch": 0.7269275643180605,
+      "grad_norm": 1.0326553789168182,
+      "learning_rate": 9.3957076665279e-06,
+      "loss": 1.0284,
+      "step": 18380
+    },
+    {
+      "epoch": 0.7273230635369495,
+      "grad_norm": 1.001373259245197,
+      "learning_rate": 9.394610637799482e-06,
+      "loss": 1.0305,
+      "step": 18390
+    },
+    {
+      "epoch": 0.7277185627558386,
+      "grad_norm": 1.1376419765398862,
+      "learning_rate": 9.39351267836782e-06,
+      "loss": 1.0398,
+      "step": 18400
+    },
+    {
+      "epoch": 0.7281140619747276,
+      "grad_norm": 0.9661245920163976,
+      "learning_rate": 9.392413788465443e-06,
+      "loss": 1.0262,
+      "step": 18410
+    },
+    {
+      "epoch": 0.7285095611936167,
+      "grad_norm": 0.9975516455237873,
+      "learning_rate": 9.391313968325077e-06,
+      "loss": 1.0205,
+      "step": 18420
+    },
+    {
+      "epoch": 0.7289050604125057,
+      "grad_norm": 1.147619498671326,
+      "learning_rate": 9.390213218179645e-06,
+      "loss": 1.0025,
+      "step": 18430
+    },
+    {
+      "epoch": 0.7293005596313947,
+      "grad_norm": 1.1986029401913216,
+      "learning_rate": 9.389111538262268e-06,
+      "loss": 1.0211,
+      "step": 18440
+    },
+    {
+      "epoch": 0.7296960588502838,
+      "grad_norm": 1.189262096818869,
+      "learning_rate": 9.388008928806263e-06,
+      "loss": 1.0247,
+      "step": 18450
+    },
+    {
+      "epoch": 0.7300915580691728,
+      "grad_norm": 1.0667874555031875,
+      "learning_rate": 9.386905390045142e-06,
+      "loss": 1.0239,
+      "step": 18460
+    },
+    {
+      "epoch": 0.7304870572880618,
+      "grad_norm": 0.9547270237562987,
+      "learning_rate": 9.385800922212618e-06,
+      "loss": 1.0358,
+      "step": 18470
+    },
+    {
+      "epoch": 0.7308825565069509,
+      "grad_norm": 0.9695529496527657,
+      "learning_rate": 9.384695525542595e-06,
+      "loss": 1.0187,
+      "step": 18480
+    },
+    {
+      "epoch": 0.7312780557258399,
+      "grad_norm": 1.0152062952088161,
+      "learning_rate": 9.383589200269181e-06,
+      "loss": 1.0246,
+      "step": 18490
+    },
+    {
+      "epoch": 0.731673554944729,
+      "grad_norm": 1.0770326148334348,
+      "learning_rate": 9.382481946626673e-06,
+      "loss": 1.0261,
+      "step": 18500
+    },
+    {
+      "epoch": 0.732069054163618,
+      "grad_norm": 1.0345921166723826,
+      "learning_rate": 9.381373764849571e-06,
+      "loss": 1.0162,
+      "step": 18510
+    },
+    {
+      "epoch": 0.732464553382507,
+      "grad_norm": 1.0708522771601607,
+      "learning_rate": 9.380264655172569e-06,
+      "loss": 1.0291,
+      "step": 18520
+    },
+    {
+      "epoch": 0.7328600526013961,
+      "grad_norm": 1.0719678860846598,
+      "learning_rate": 9.379154617830556e-06,
+      "loss": 1.0309,
+      "step": 18530
+    },
+    {
+      "epoch": 0.7332555518202851,
+      "grad_norm": 1.059418646494692,
+      "learning_rate": 9.37804365305862e-06,
+      "loss": 1.0252,
+      "step": 18540
+    },
+    {
+      "epoch": 0.7336510510391742,
+      "grad_norm": 1.0923693514124921,
+      "learning_rate": 9.376931761092042e-06,
+      "loss": 1.0389,
+      "step": 18550
+    },
+    {
+      "epoch": 0.7340465502580632,
+      "grad_norm": 0.9773367022043173,
+      "learning_rate": 9.375818942166304e-06,
+      "loss": 1.0092,
+      "step": 18560
+    },
+    {
+      "epoch": 0.7344420494769522,
+      "grad_norm": 1.064751679259574,
+      "learning_rate": 9.374705196517082e-06,
+      "loss": 1.0183,
+      "step": 18570
+    },
+    {
+      "epoch": 0.7348375486958413,
+      "grad_norm": 1.0461734337854973,
+      "learning_rate": 9.373590524380248e-06,
+      "loss": 1.0296,
+      "step": 18580
+    },
+    {
+      "epoch": 0.7352330479147303,
+      "grad_norm": 1.0101405293159191,
+      "learning_rate": 9.37247492599187e-06,
+      "loss": 1.0221,
+      "step": 18590
+    },
+    {
+      "epoch": 0.7356285471336194,
+      "grad_norm": 1.020775319830127,
+      "learning_rate": 9.371358401588212e-06,
+      "loss": 1.0132,
+      "step": 18600
+    },
+    {
+      "epoch": 0.7360240463525084,
+      "grad_norm": 1.1127647281143733,
+      "learning_rate": 9.370240951405736e-06,
+      "loss": 1.0172,
+      "step": 18610
+    },
+    {
+      "epoch": 0.7364195455713975,
+      "grad_norm": 1.1383922818760752,
+      "learning_rate": 9.369122575681098e-06,
+      "loss": 1.0488,
+      "step": 18620
+    },
+    {
+      "epoch": 0.7368150447902866,
+      "grad_norm": 1.1171086386308282,
+      "learning_rate": 9.368003274651152e-06,
+      "loss": 1.0241,
+      "step": 18630
+    },
+    {
+      "epoch": 0.7372105440091756,
+      "grad_norm": 0.9939206769042934,
+      "learning_rate": 9.366883048552945e-06,
+      "loss": 1.0041,
+      "step": 18640
+    },
+    {
+      "epoch": 0.7376060432280647,
+      "grad_norm": 0.9339266962120722,
+      "learning_rate": 9.365761897623722e-06,
+      "loss": 1.0028,
+      "step": 18650
+    },
+    {
+      "epoch": 0.7380015424469537,
+      "grad_norm": 1.0657826513680748,
+      "learning_rate": 9.364639822100926e-06,
+      "loss": 1.0142,
+      "step": 18660
+    },
+    {
+      "epoch": 0.7383970416658427,
+      "grad_norm": 1.1114751812392274,
+      "learning_rate": 9.36351682222219e-06,
+      "loss": 1.0247,
+      "step": 18670
+    },
+    {
+      "epoch": 0.7387925408847318,
+      "grad_norm": 1.0182911899944536,
+      "learning_rate": 9.362392898225347e-06,
+      "loss": 1.035,
+      "step": 18680
+    },
+    {
+      "epoch": 0.7391880401036208,
+      "grad_norm": 1.0005635152557726,
+      "learning_rate": 9.361268050348428e-06,
+      "loss": 1.0395,
+      "step": 18690
+    },
+    {
+      "epoch": 0.7395835393225099,
+      "grad_norm": 1.0128993404430935,
+      "learning_rate": 9.360142278829653e-06,
+      "loss": 1.0243,
+      "step": 18700
+    },
+    {
+      "epoch": 0.7399790385413989,
+      "grad_norm": 1.004920029440176,
+      "learning_rate": 9.35901558390744e-06,
+      "loss": 1.0139,
+      "step": 18710
+    },
+    {
+      "epoch": 0.7403745377602879,
+      "grad_norm": 1.0002744150986418,
+      "learning_rate": 9.357887965820409e-06,
+      "loss": 1.0302,
+      "step": 18720
+    },
+    {
+      "epoch": 0.740770036979177,
+      "grad_norm": 1.0484190542947533,
+      "learning_rate": 9.356759424807365e-06,
+      "loss": 1.0363,
+      "step": 18730
+    },
+    {
+      "epoch": 0.741165536198066,
+      "grad_norm": 1.0076111366352989,
+      "learning_rate": 9.355629961107316e-06,
+      "loss": 1.0356,
+      "step": 18740
+    },
+    {
+      "epoch": 0.7415610354169551,
+      "grad_norm": 0.9036911244433983,
+      "learning_rate": 9.354499574959463e-06,
+      "loss": 1.031,
+      "step": 18750
+    },
+    {
+      "epoch": 0.7419565346358441,
+      "grad_norm": 1.0456535972012757,
+      "learning_rate": 9.353368266603202e-06,
+      "loss": 1.0181,
+      "step": 18760
+    },
+    {
+      "epoch": 0.7423520338547331,
+      "grad_norm": 0.9783379098922605,
+      "learning_rate": 9.352236036278127e-06,
+      "loss": 1.0272,
+      "step": 18770
+    },
+    {
+      "epoch": 0.7427475330736222,
+      "grad_norm": 1.1262272512320328,
+      "learning_rate": 9.351102884224019e-06,
+      "loss": 1.0172,
+      "step": 18780
+    },
+    {
+      "epoch": 0.7431430322925112,
+      "grad_norm": 1.0104228882303397,
+      "learning_rate": 9.349968810680866e-06,
+      "loss": 1.0143,
+      "step": 18790
+    },
+    {
+      "epoch": 0.7435385315114003,
+      "grad_norm": 0.9288780657972774,
+      "learning_rate": 9.348833815888843e-06,
+      "loss": 1.0241,
+      "step": 18800
+    },
+    {
+      "epoch": 0.7439340307302893,
+      "grad_norm": 0.9465221319852352,
+      "learning_rate": 9.347697900088323e-06,
+      "loss": 1.0227,
+      "step": 18810
+    },
+    {
+      "epoch": 0.7443295299491783,
+      "grad_norm": 1.0031384590934056,
+      "learning_rate": 9.346561063519873e-06,
+      "loss": 1.0332,
+      "step": 18820
+    },
+    {
+      "epoch": 0.7447250291680674,
+      "grad_norm": 1.1213781153150284,
+      "learning_rate": 9.345423306424257e-06,
+      "loss": 1.0326,
+      "step": 18830
+    },
+    {
+      "epoch": 0.7451205283869564,
+      "grad_norm": 1.0488927764672529,
+      "learning_rate": 9.344284629042431e-06,
+      "loss": 1.0351,
+      "step": 18840
+    },
+    {
+      "epoch": 0.7455160276058455,
+      "grad_norm": 1.012118316253596,
+      "learning_rate": 9.343145031615546e-06,
+      "loss": 1.0344,
+      "step": 18850
+    },
+    {
+      "epoch": 0.7459115268247345,
+      "grad_norm": 0.9856969792699805,
+      "learning_rate": 9.342004514384952e-06,
+      "loss": 1.0205,
+      "step": 18860
+    },
+    {
+      "epoch": 0.7463070260436235,
+      "grad_norm": 0.9575334310958326,
+      "learning_rate": 9.34086307759219e-06,
+      "loss": 1.0175,
+      "step": 18870
+    },
+    {
+      "epoch": 0.7467025252625126,
+      "grad_norm": 0.9601966119795331,
+      "learning_rate": 9.339720721478998e-06,
+      "loss": 1.0431,
+      "step": 18880
+    },
+    {
+      "epoch": 0.7470980244814016,
+      "grad_norm": 1.06373689786651,
+      "learning_rate": 9.338577446287305e-06,
+      "loss": 1.0356,
+      "step": 18890
+    },
+    {
+      "epoch": 0.7474935237002907,
+      "grad_norm": 1.155558342613977,
+      "learning_rate": 9.337433252259237e-06,
+      "loss": 1.0039,
+      "step": 18900
+    },
+    {
+      "epoch": 0.7478890229191797,
+      "grad_norm": 1.0242148425969468,
+      "learning_rate": 9.336288139637118e-06,
+      "loss": 1.0105,
+      "step": 18910
+    },
+    {
+      "epoch": 0.7482845221380687,
+      "grad_norm": 1.0165882156852342,
+      "learning_rate": 9.33514210866346e-06,
+      "loss": 1.0297,
+      "step": 18920
+    },
+    {
+      "epoch": 0.7486800213569578,
+      "grad_norm": 1.0668748228677531,
+      "learning_rate": 9.333995159580974e-06,
+      "loss": 1.0255,
+      "step": 18930
+    },
+    {
+      "epoch": 0.7490755205758468,
+      "grad_norm": 1.0927084015302633,
+      "learning_rate": 9.332847292632563e-06,
+      "loss": 1.0421,
+      "step": 18940
+    },
+    {
+      "epoch": 0.749471019794736,
+      "grad_norm": 1.0627145192401357,
+      "learning_rate": 9.331698508061326e-06,
+      "loss": 1.0158,
+      "step": 18950
+    },
+    {
+      "epoch": 0.749866519013625,
+      "grad_norm": 1.0023214644900786,
+      "learning_rate": 9.330548806110559e-06,
+      "loss": 1.0038,
+      "step": 18960
+    },
+    {
+      "epoch": 0.750262018232514,
+      "grad_norm": 1.0694419088452047,
+      "learning_rate": 9.329398187023745e-06,
+      "loss": 1.0188,
+      "step": 18970
+    },
+    {
+      "epoch": 0.7506575174514031,
+      "grad_norm": 0.9185680354699801,
+      "learning_rate": 9.328246651044567e-06,
+      "loss": 0.9952,
+      "step": 18980
+    },
+    {
+      "epoch": 0.7510530166702921,
+      "grad_norm": 1.006900840762992,
+      "learning_rate": 9.327094198416903e-06,
+      "loss": 1.013,
+      "step": 18990
+    },
+    {
+      "epoch": 0.7514485158891812,
+      "grad_norm": 1.0093577748613276,
+      "learning_rate": 9.325940829384819e-06,
+      "loss": 1.0281,
+      "step": 19000
+    },
+    {
+      "epoch": 0.7518440151080702,
+      "grad_norm": 1.0860462169035565,
+      "learning_rate": 9.32478654419258e-06,
+      "loss": 1.0198,
+      "step": 19010
+    },
+    {
+      "epoch": 0.7522395143269592,
+      "grad_norm": 0.994771402041234,
+      "learning_rate": 9.323631343084642e-06,
+      "loss": 1.0285,
+      "step": 19020
+    },
+    {
+      "epoch": 0.7526350135458483,
+      "grad_norm": 1.0531262016048346,
+      "learning_rate": 9.322475226305661e-06,
+      "loss": 1.0379,
+      "step": 19030
+    },
+    {
+      "epoch": 0.7530305127647373,
+      "grad_norm": 1.0673140331312077,
+      "learning_rate": 9.32131819410048e-06,
+      "loss": 1.0299,
+      "step": 19040
+    },
+    {
+      "epoch": 0.7534260119836264,
+      "grad_norm": 0.9409190989423175,
+      "learning_rate": 9.320160246714141e-06,
+      "loss": 1.0103,
+      "step": 19050
+    },
+    {
+      "epoch": 0.7538215112025154,
+      "grad_norm": 1.0828491007160792,
+      "learning_rate": 9.319001384391872e-06,
+      "loss": 1.0197,
+      "step": 19060
+    },
+    {
+      "epoch": 0.7542170104214044,
+      "grad_norm": 0.9563940190884601,
+      "learning_rate": 9.317841607379106e-06,
+      "loss": 1.021,
+      "step": 19070
+    },
+    {
+      "epoch": 0.7546125096402935,
+      "grad_norm": 0.9711286036666847,
+      "learning_rate": 9.316680915921461e-06,
+      "loss": 1.0233,
+      "step": 19080
+    },
+    {
+      "epoch": 0.7550080088591825,
+      "grad_norm": 0.9695610908794037,
+      "learning_rate": 9.315519310264753e-06,
+      "loss": 1.0154,
+      "step": 19090
+    },
+    {
+      "epoch": 0.7554035080780716,
+      "grad_norm": 1.1595785378587375,
+      "learning_rate": 9.31435679065499e-06,
+      "loss": 1.0238,
+      "step": 19100
+    },
+    {
+      "epoch": 0.7557990072969606,
+      "grad_norm": 1.005502156386906,
+      "learning_rate": 9.313193357338372e-06,
+      "loss": 1.0329,
+      "step": 19110
+    },
+    {
+      "epoch": 0.7561945065158496,
+      "grad_norm": 1.0772824039980156,
+      "learning_rate": 9.312029010561294e-06,
+      "loss": 1.0284,
+      "step": 19120
+    },
+    {
+      "epoch": 0.7565900057347387,
+      "grad_norm": 0.961761958552405,
+      "learning_rate": 9.310863750570348e-06,
+      "loss": 1.0285,
+      "step": 19130
+    },
+    {
+      "epoch": 0.7569855049536277,
+      "grad_norm": 1.0259406252479997,
+      "learning_rate": 9.309697577612312e-06,
+      "loss": 1.0045,
+      "step": 19140
+    },
+    {
+      "epoch": 0.7573810041725167,
+      "grad_norm": 0.9099565392120221,
+      "learning_rate": 9.308530491934167e-06,
+      "loss": 1.0199,
+      "step": 19150
+    },
+    {
+      "epoch": 0.7577765033914058,
+      "grad_norm": 1.020017637537644,
+      "learning_rate": 9.307362493783077e-06,
+      "loss": 1.0194,
+      "step": 19160
+    },
+    {
+      "epoch": 0.7581720026102948,
+      "grad_norm": 0.9384524429371721,
+      "learning_rate": 9.306193583406404e-06,
+      "loss": 1.012,
+      "step": 19170
+    },
+    {
+      "epoch": 0.7585675018291839,
+      "grad_norm": 1.0490319279654097,
+      "learning_rate": 9.305023761051706e-06,
+      "loss": 1.0247,
+      "step": 19180
+    },
+    {
+      "epoch": 0.7589630010480729,
+      "grad_norm": 1.0652328994301927,
+      "learning_rate": 9.30385302696673e-06,
+      "loss": 1.0309,
+      "step": 19190
+    },
+    {
+      "epoch": 0.7593585002669619,
+      "grad_norm": 1.0199138231431706,
+      "learning_rate": 9.302681381399415e-06,
+      "loss": 1.0272,
+      "step": 19200
+    },
+    {
+      "epoch": 0.759753999485851,
+      "grad_norm": 1.00140381770798,
+      "learning_rate": 9.3015088245979e-06,
+      "loss": 1.0154,
+      "step": 19210
+    },
+    {
+      "epoch": 0.76014949870474,
+      "grad_norm": 1.090021740909663,
+      "learning_rate": 9.30033535681051e-06,
+      "loss": 1.0096,
+      "step": 19220
+    },
+    {
+      "epoch": 0.7605449979236291,
+      "grad_norm": 1.152257779770484,
+      "learning_rate": 9.299160978285766e-06,
+      "loss": 1.0281,
+      "step": 19230
+    },
+    {
+      "epoch": 0.7609404971425181,
+      "grad_norm": 1.0165380948124314,
+      "learning_rate": 9.297985689272379e-06,
+      "loss": 1.0068,
+      "step": 19240
+    },
+    {
+      "epoch": 0.7613359963614071,
+      "grad_norm": 1.0619715659131068,
+      "learning_rate": 9.296809490019258e-06,
+      "loss": 1.0327,
+      "step": 19250
+    },
+    {
+      "epoch": 0.7617314955802962,
+      "grad_norm": 1.0359303110161961,
+      "learning_rate": 9.295632380775502e-06,
+      "loss": 1.0095,
+      "step": 19260
+    },
+    {
+      "epoch": 0.7621269947991852,
+      "grad_norm": 1.1420116482120743,
+      "learning_rate": 9.294454361790399e-06,
+      "loss": 1.0155,
+      "step": 19270
+    },
+    {
+      "epoch": 0.7625224940180743,
+      "grad_norm": 1.1108425689025752,
+      "learning_rate": 9.293275433313436e-06,
+      "loss": 1.0274,
+      "step": 19280
+    },
+    {
+      "epoch": 0.7629179932369634,
+      "grad_norm": 0.9690075577594031,
+      "learning_rate": 9.29209559559429e-06,
+      "loss": 1.047,
+      "step": 19290
+    },
+    {
+      "epoch": 0.7633134924558524,
+      "grad_norm": 0.9810714172727245,
+      "learning_rate": 9.290914848882831e-06,
+      "loss": 1.0434,
+      "step": 19300
+    },
+    {
+      "epoch": 0.7637089916747415,
+      "grad_norm": 0.9813211700219521,
+      "learning_rate": 9.289733193429119e-06,
+      "loss": 1.0228,
+      "step": 19310
+    },
+    {
+      "epoch": 0.7641044908936305,
+      "grad_norm": 0.9576164054215149,
+      "learning_rate": 9.288550629483408e-06,
+      "loss": 1.0018,
+      "step": 19320
+    },
+    {
+      "epoch": 0.7644999901125196,
+      "grad_norm": 1.0455231335606272,
+      "learning_rate": 9.287367157296146e-06,
+      "loss": 1.031,
+      "step": 19330
+    },
+    {
+      "epoch": 0.7648954893314086,
+      "grad_norm": 1.1297181918273749,
+      "learning_rate": 9.286182777117974e-06,
+      "loss": 1.0181,
+      "step": 19340
+    },
+    {
+      "epoch": 0.7652909885502976,
+      "grad_norm": 1.056291522635083,
+      "learning_rate": 9.28499748919972e-06,
+      "loss": 1.0262,
+      "step": 19350
+    },
+    {
+      "epoch": 0.7656864877691867,
+      "grad_norm": 0.9724494560184846,
+      "learning_rate": 9.28381129379241e-06,
+      "loss": 1.0141,
+      "step": 19360
+    },
+    {
+      "epoch": 0.7660819869880757,
+      "grad_norm": 1.043582624117857,
+      "learning_rate": 9.282624191147258e-06,
+      "loss": 1.0218,
+      "step": 19370
+    },
+    {
+      "epoch": 0.7664774862069648,
+      "grad_norm": 1.0650864660468717,
+      "learning_rate": 9.281436181515673e-06,
+      "loss": 1.008,
+      "step": 19380
+    },
+    {
+      "epoch": 0.7668729854258538,
+      "grad_norm": 1.0774913543212994,
+      "learning_rate": 9.280247265149256e-06,
+      "loss": 1.0093,
+      "step": 19390
+    },
+    {
+      "epoch": 0.7672684846447428,
+      "grad_norm": 1.036731805880924,
+      "learning_rate": 9.2790574422998e-06,
+      "loss": 1.018,
+      "step": 19400
+    },
+    {
+      "epoch": 0.7676639838636319,
+      "grad_norm": 1.144514484297006,
+      "learning_rate": 9.277866713219284e-06,
+      "loss": 1.0255,
+      "step": 19410
+    },
+    {
+      "epoch": 0.7680594830825209,
+      "grad_norm": 0.9593936439738194,
+      "learning_rate": 9.27667507815989e-06,
+      "loss": 1.0161,
+      "step": 19420
+    },
+    {
+      "epoch": 0.76845498230141,
+      "grad_norm": 0.9432364439032389,
+      "learning_rate": 9.27548253737398e-06,
+      "loss": 1.0168,
+      "step": 19430
+    },
+    {
+      "epoch": 0.768850481520299,
+      "grad_norm": 0.9577852173455976,
+      "learning_rate": 9.27428909111412e-06,
+      "loss": 1.0159,
+      "step": 19440
+    },
+    {
+      "epoch": 0.769245980739188,
+      "grad_norm": 1.16737808015621,
+      "learning_rate": 9.273094739633057e-06,
+      "loss": 0.9975,
+      "step": 19450
+    },
+    {
+      "epoch": 0.7696414799580771,
+      "grad_norm": 1.0686547602772396,
+      "learning_rate": 9.271899483183737e-06,
+      "loss": 0.9994,
+      "step": 19460
+    },
+    {
+      "epoch": 0.7700369791769661,
+      "grad_norm": 1.0717802552838147,
+      "learning_rate": 9.270703322019293e-06,
+      "loss": 1.0035,
+      "step": 19470
+    },
+    {
+      "epoch": 0.7704324783958552,
+      "grad_norm": 0.9113304418118345,
+      "learning_rate": 9.269506256393051e-06,
+      "loss": 1.0215,
+      "step": 19480
+    },
+    {
+      "epoch": 0.7708279776147442,
+      "grad_norm": 1.0505384794273,
+      "learning_rate": 9.26830828655853e-06,
+      "loss": 0.9917,
+      "step": 19490
+    },
+    {
+      "epoch": 0.7712234768336332,
+      "grad_norm": 1.0493878251717221,
+      "learning_rate": 9.26710941276944e-06,
+      "loss": 1.0098,
+      "step": 19500
+    },
+    {
+      "epoch": 0.7716189760525223,
+      "grad_norm": 0.9743329912014642,
+      "learning_rate": 9.26590963527968e-06,
+      "loss": 1.0393,
+      "step": 19510
+    },
+    {
+      "epoch": 0.7720144752714113,
+      "grad_norm": 1.0295075900448483,
+      "learning_rate": 9.264708954343344e-06,
+      "loss": 1.015,
+      "step": 19520
+    },
+    {
+      "epoch": 0.7724099744903004,
+      "grad_norm": 1.0715856241898993,
+      "learning_rate": 9.263507370214716e-06,
+      "loss": 1.0099,
+      "step": 19530
+    },
+    {
+      "epoch": 0.7728054737091894,
+      "grad_norm": 1.0928812797829903,
+      "learning_rate": 9.26230488314827e-06,
+      "loss": 1.0178,
+      "step": 19540
+    },
+    {
+      "epoch": 0.7732009729280784,
+      "grad_norm": 1.079469361362925,
+      "learning_rate": 9.261101493398672e-06,
+      "loss": 1.0125,
+      "step": 19550
+    },
+    {
+      "epoch": 0.7735964721469675,
+      "grad_norm": 1.094048467683584,
+      "learning_rate": 9.25989720122078e-06,
+      "loss": 1.0087,
+      "step": 19560
+    },
+    {
+      "epoch": 0.7739919713658565,
+      "grad_norm": 0.9829794624397377,
+      "learning_rate": 9.258692006869644e-06,
+      "loss": 1.026,
+      "step": 19570
+    },
+    {
+      "epoch": 0.7743874705847456,
+      "grad_norm": 0.9329725298405944,
+      "learning_rate": 9.2574859106005e-06,
+      "loss": 0.9901,
+      "step": 19580
+    },
+    {
+      "epoch": 0.7747829698036346,
+      "grad_norm": 1.0273996978263786,
+      "learning_rate": 9.25627891266878e-06,
+      "loss": 1.0258,
+      "step": 19590
+    },
+    {
+      "epoch": 0.7751784690225236,
+      "grad_norm": 1.0539329548331768,
+      "learning_rate": 9.255071013330104e-06,
+      "loss": 1.0119,
+      "step": 19600
+    },
+    {
+      "epoch": 0.7755739682414127,
+      "grad_norm": 1.0041432290156813,
+      "learning_rate": 9.253862212840288e-06,
+      "loss": 1.0112,
+      "step": 19610
+    },
+    {
+      "epoch": 0.7759694674603018,
+      "grad_norm": 0.9971852827921748,
+      "learning_rate": 9.252652511455333e-06,
+      "loss": 1.0056,
+      "step": 19620
+    },
+    {
+      "epoch": 0.7763649666791909,
+      "grad_norm": 0.9406095710545285,
+      "learning_rate": 9.251441909431433e-06,
+      "loss": 1.0154,
+      "step": 19630
+    },
+    {
+      "epoch": 0.7767604658980799,
+      "grad_norm": 1.142755100981078,
+      "learning_rate": 9.250230407024975e-06,
+      "loss": 1.0139,
+      "step": 19640
+    },
+    {
+      "epoch": 0.7771559651169689,
+      "grad_norm": 1.057150985151994,
+      "learning_rate": 9.249018004492529e-06,
+      "loss": 0.9967,
+      "step": 19650
+    },
+    {
+      "epoch": 0.777551464335858,
+      "grad_norm": 0.9795617707482281,
+      "learning_rate": 9.247804702090867e-06,
+      "loss": 1.0221,
+      "step": 19660
+    },
+    {
+      "epoch": 0.777946963554747,
+      "grad_norm": 1.015999182209421,
+      "learning_rate": 9.246590500076944e-06,
+      "loss": 1.0176,
+      "step": 19670
+    },
+    {
+      "epoch": 0.7783424627736361,
+      "grad_norm": 1.04158190346237,
+      "learning_rate": 9.245375398707905e-06,
+      "loss": 1.0209,
+      "step": 19680
+    },
+    {
+      "epoch": 0.7787379619925251,
+      "grad_norm": 1.122543189280439,
+      "learning_rate": 9.24415939824109e-06,
+      "loss": 1.0212,
+      "step": 19690
+    },
+    {
+      "epoch": 0.7791334612114141,
+      "grad_norm": 0.9261687506749237,
+      "learning_rate": 9.242942498934025e-06,
+      "loss": 1.0228,
+      "step": 19700
+    },
+    {
+      "epoch": 0.7795289604303032,
+      "grad_norm": 1.0028590439493865,
+      "learning_rate": 9.24172470104443e-06,
+      "loss": 1.002,
+      "step": 19710
+    },
+    {
+      "epoch": 0.7799244596491922,
+      "grad_norm": 1.0293759066344548,
+      "learning_rate": 9.240506004830214e-06,
+      "loss": 1.0101,
+      "step": 19720
+    },
+    {
+      "epoch": 0.7803199588680813,
+      "grad_norm": 1.0317117590147789,
+      "learning_rate": 9.239286410549475e-06,
+      "loss": 1.0067,
+      "step": 19730
+    },
+    {
+      "epoch": 0.7807154580869703,
+      "grad_norm": 1.0205275027954295,
+      "learning_rate": 9.238065918460503e-06,
+      "loss": 1.0034,
+      "step": 19740
+    },
+    {
+      "epoch": 0.7811109573058593,
+      "grad_norm": 0.9735273695279139,
+      "learning_rate": 9.236844528821776e-06,
+      "loss": 1.0179,
+      "step": 19750
+    },
+    {
+      "epoch": 0.7815064565247484,
+      "grad_norm": 1.0646165684795013,
+      "learning_rate": 9.235622241891964e-06,
+      "loss": 1.0046,
+      "step": 19760
+    },
+    {
+      "epoch": 0.7819019557436374,
+      "grad_norm": 1.079221881642748,
+      "learning_rate": 9.234399057929928e-06,
+      "loss": 1.0046,
+      "step": 19770
+    },
+    {
+      "epoch": 0.7822974549625265,
+      "grad_norm": 1.0613787563963717,
+      "learning_rate": 9.233174977194717e-06,
+      "loss": 1.0135,
+      "step": 19780
+    },
+    {
+      "epoch": 0.7826929541814155,
+      "grad_norm": 0.9600904675334615,
+      "learning_rate": 9.231949999945567e-06,
+      "loss": 1.021,
+      "step": 19790
+    },
+    {
+      "epoch": 0.7830884534003045,
+      "grad_norm": 1.027985703630233,
+      "learning_rate": 9.23072412644191e-06,
+      "loss": 1.0337,
+      "step": 19800
+    },
+    {
+      "epoch": 0.7834839526191936,
+      "grad_norm": 0.93390627761561,
+      "learning_rate": 9.229497356943364e-06,
+      "loss": 1.0027,
+      "step": 19810
+    },
+    {
+      "epoch": 0.7838794518380826,
+      "grad_norm": 1.0219463087472611,
+      "learning_rate": 9.228269691709739e-06,
+      "loss": 1.0225,
+      "step": 19820
+    },
+    {
+      "epoch": 0.7842749510569716,
+      "grad_norm": 1.002995355213593,
+      "learning_rate": 9.227041131001034e-06,
+      "loss": 1.0218,
+      "step": 19830
+    },
+    {
+      "epoch": 0.7846704502758607,
+      "grad_norm": 0.9860650591548502,
+      "learning_rate": 9.225811675077435e-06,
+      "loss": 1.0288,
+      "step": 19840
+    },
+    {
+      "epoch": 0.7850659494947497,
+      "grad_norm": 1.1148899724117105,
+      "learning_rate": 9.22458132419932e-06,
+      "loss": 1.033,
+      "step": 19850
+    },
+    {
+      "epoch": 0.7854614487136388,
+      "grad_norm": 0.9534438569029959,
+      "learning_rate": 9.223350078627258e-06,
+      "loss": 1.0056,
+      "step": 19860
+    },
+    {
+      "epoch": 0.7858569479325278,
+      "grad_norm": 0.9898571873086477,
+      "learning_rate": 9.222117938622004e-06,
+      "loss": 1.0206,
+      "step": 19870
+    },
+    {
+      "epoch": 0.7862524471514168,
+      "grad_norm": 1.0681159410788263,
+      "learning_rate": 9.220884904444505e-06,
+      "loss": 1.0117,
+      "step": 19880
+    },
+    {
+      "epoch": 0.7866479463703059,
+      "grad_norm": 0.992718103988424,
+      "learning_rate": 9.219650976355895e-06,
+      "loss": 1.0124,
+      "step": 19890
+    },
+    {
+      "epoch": 0.7870434455891949,
+      "grad_norm": 1.0062492875639375,
+      "learning_rate": 9.218416154617503e-06,
+      "loss": 1.0268,
+      "step": 19900
+    },
+    {
+      "epoch": 0.787438944808084,
+      "grad_norm": 1.0558858349412301,
+      "learning_rate": 9.217180439490836e-06,
+      "loss": 1.0039,
+      "step": 19910
+    },
+    {
+      "epoch": 0.787834444026973,
+      "grad_norm": 1.0229715091797993,
+      "learning_rate": 9.215943831237604e-06,
+      "loss": 1.0227,
+      "step": 19920
+    },
+    {
+      "epoch": 0.788229943245862,
+      "grad_norm": 0.9772301798391267,
+      "learning_rate": 9.214706330119697e-06,
+      "loss": 0.9968,
+      "step": 19930
+    },
+    {
+      "epoch": 0.7886254424647511,
+      "grad_norm": 0.9954296565633302,
+      "learning_rate": 9.213467936399196e-06,
+      "loss": 0.9874,
+      "step": 19940
+    },
+    {
+      "epoch": 0.7890209416836401,
+      "grad_norm": 1.158774509423265,
+      "learning_rate": 9.212228650338371e-06,
+      "loss": 1.0161,
+      "step": 19950
+    },
+    {
+      "epoch": 0.7894164409025293,
+      "grad_norm": 1.0023364016480276,
+      "learning_rate": 9.210988472199685e-06,
+      "loss": 1.012,
+      "step": 19960
+    },
+    {
+      "epoch": 0.7898119401214183,
+      "grad_norm": 1.021651821279293,
+      "learning_rate": 9.209747402245782e-06,
+      "loss": 1.0152,
+      "step": 19970
+    },
+    {
+      "epoch": 0.7902074393403073,
+      "grad_norm": 1.0512413445924476,
+      "learning_rate": 9.208505440739502e-06,
+      "loss": 1.0105,
+      "step": 19980
+    },
+    {
+      "epoch": 0.7906029385591964,
+      "grad_norm": 1.0602035686643103,
+      "learning_rate": 9.20726258794387e-06,
+      "loss": 1.0161,
+      "step": 19990
+    },
+    {
+      "epoch": 0.7909984377780854,
+      "grad_norm": 0.9953783236544859,
+      "learning_rate": 9.206018844122102e-06,
+      "loss": 1.0047,
+      "step": 20000
+    },
+    {
+      "epoch": 0.7913939369969745,
+      "grad_norm": 1.0587613276719485,
+      "learning_rate": 9.204774209537602e-06,
+      "loss": 1.0304,
+      "step": 20010
+    },
+    {
+      "epoch": 0.7917894362158635,
+      "grad_norm": 1.113710621677745,
+      "learning_rate": 9.203528684453961e-06,
+      "loss": 1.0176,
+      "step": 20020
+    },
+    {
+      "epoch": 0.7921849354347525,
+      "grad_norm": 1.0342528626614702,
+      "learning_rate": 9.202282269134959e-06,
+      "loss": 1.0158,
+      "step": 20030
+    },
+    {
+      "epoch": 0.7925804346536416,
+      "grad_norm": 1.116941707591573,
+      "learning_rate": 9.201034963844568e-06,
+      "loss": 1.0048,
+      "step": 20040
+    },
+    {
+      "epoch": 0.7929759338725306,
+      "grad_norm": 1.1317385552718788,
+      "learning_rate": 9.199786768846944e-06,
+      "loss": 1.0257,
+      "step": 20050
+    },
+    {
+      "epoch": 0.7933714330914197,
+      "grad_norm": 1.11888521603365,
+      "learning_rate": 9.198537684406434e-06,
+      "loss": 1.0037,
+      "step": 20060
+    },
+    {
+      "epoch": 0.7937669323103087,
+      "grad_norm": 1.1002390924797294,
+      "learning_rate": 9.197287710787573e-06,
+      "loss": 1.0333,
+      "step": 20070
+    },
+    {
+      "epoch": 0.7941624315291977,
+      "grad_norm": 1.1251850282361266,
+      "learning_rate": 9.196036848255084e-06,
+      "loss": 1.016,
+      "step": 20080
+    },
+    {
+      "epoch": 0.7945579307480868,
+      "grad_norm": 1.007544152452515,
+      "learning_rate": 9.194785097073875e-06,
+      "loss": 1.0109,
+      "step": 20090
+    },
+    {
+      "epoch": 0.7949534299669758,
+      "grad_norm": 1.0140803324390137,
+      "learning_rate": 9.193532457509051e-06,
+      "loss": 0.9777,
+      "step": 20100
+    },
+    {
+      "epoch": 0.7953489291858649,
+      "grad_norm": 1.1089816560064845,
+      "learning_rate": 9.192278929825896e-06,
+      "loss": 1.0106,
+      "step": 20110
+    },
+    {
+      "epoch": 0.7957444284047539,
+      "grad_norm": 1.0461554147005818,
+      "learning_rate": 9.191024514289887e-06,
+      "loss": 1.0157,
+      "step": 20120
+    },
+    {
+      "epoch": 0.7961399276236429,
+      "grad_norm": 0.948486945865474,
+      "learning_rate": 9.189769211166688e-06,
+      "loss": 1.0277,
+      "step": 20130
+    },
+    {
+      "epoch": 0.796535426842532,
+      "grad_norm": 1.0156445520609347,
+      "learning_rate": 9.188513020722149e-06,
+      "loss": 1.016,
+      "step": 20140
+    },
+    {
+      "epoch": 0.796930926061421,
+      "grad_norm": 1.0775912190846668,
+      "learning_rate": 9.187255943222311e-06,
+      "loss": 1.0016,
+      "step": 20150
+    },
+    {
+      "epoch": 0.7973264252803101,
+      "grad_norm": 1.0547485733442106,
+      "learning_rate": 9.1859979789334e-06,
+      "loss": 1.0107,
+      "step": 20160
+    },
+    {
+      "epoch": 0.7977219244991991,
+      "grad_norm": 0.9850939277351383,
+      "learning_rate": 9.184739128121833e-06,
+      "loss": 1.0135,
+      "step": 20170
+    },
+    {
+      "epoch": 0.7981174237180881,
+      "grad_norm": 1.0413362027888042,
+      "learning_rate": 9.183479391054212e-06,
+      "loss": 1.0206,
+      "step": 20180
+    },
+    {
+      "epoch": 0.7985129229369772,
+      "grad_norm": 1.0252657507839713,
+      "learning_rate": 9.182218767997329e-06,
+      "loss": 1.0061,
+      "step": 20190
+    },
+    {
+      "epoch": 0.7989084221558662,
+      "grad_norm": 1.1199126021828947,
+      "learning_rate": 9.180957259218162e-06,
+      "loss": 1.0145,
+      "step": 20200
+    },
+    {
+      "epoch": 0.7993039213747553,
+      "grad_norm": 1.030768294725857,
+      "learning_rate": 9.179694864983874e-06,
+      "loss": 0.9993,
+      "step": 20210
+    },
+    {
+      "epoch": 0.7996994205936443,
+      "grad_norm": 1.0615858418506352,
+      "learning_rate": 9.178431585561825e-06,
+      "loss": 1.0225,
+      "step": 20220
+    },
+    {
+      "epoch": 0.8000949198125333,
+      "grad_norm": 0.9416693770891857,
+      "learning_rate": 9.177167421219548e-06,
+      "loss": 1.0158,
+      "step": 20230
+    },
+    {
+      "epoch": 0.8004904190314224,
+      "grad_norm": 1.0000017417854856,
+      "learning_rate": 9.175902372224777e-06,
+      "loss": 0.9853,
+      "step": 20240
+    },
+    {
+      "epoch": 0.8008859182503114,
+      "grad_norm": 1.2554165506836938,
+      "learning_rate": 9.174636438845426e-06,
+      "loss": 1.0092,
+      "step": 20250
+    },
+    {
+      "epoch": 0.8012814174692005,
+      "grad_norm": 1.1318244966833997,
+      "learning_rate": 9.173369621349597e-06,
+      "loss": 1.0144,
+      "step": 20260
+    },
+    {
+      "epoch": 0.8016769166880895,
+      "grad_norm": 1.1375317281761548,
+      "learning_rate": 9.172101920005582e-06,
+      "loss": 1.0166,
+      "step": 20270
+    },
+    {
+      "epoch": 0.8020724159069785,
+      "grad_norm": 0.9756803045017065,
+      "learning_rate": 9.170833335081857e-06,
+      "loss": 1.0109,
+      "step": 20280
+    },
+    {
+      "epoch": 0.8024679151258677,
+      "grad_norm": 1.115143344151944,
+      "learning_rate": 9.169563866847086e-06,
+      "loss": 1.0031,
+      "step": 20290
+    },
+    {
+      "epoch": 0.8028634143447567,
+      "grad_norm": 1.083087175179526,
+      "learning_rate": 9.168293515570123e-06,
+      "loss": 1.0205,
+      "step": 20300
+    },
+    {
+      "epoch": 0.8032589135636458,
+      "grad_norm": 0.9408082881127293,
+      "learning_rate": 9.167022281520007e-06,
+      "loss": 1.0026,
+      "step": 20310
+    },
+    {
+      "epoch": 0.8036544127825348,
+      "grad_norm": 1.0748663499652444,
+      "learning_rate": 9.165750164965961e-06,
+      "loss": 1.0024,
+      "step": 20320
+    },
+    {
+      "epoch": 0.8040499120014238,
+      "grad_norm": 1.1439582786245392,
+      "learning_rate": 9.164477166177397e-06,
+      "loss": 0.9995,
+      "step": 20330
+    },
+    {
+      "epoch": 0.8044454112203129,
+      "grad_norm": 1.030662329697222,
+      "learning_rate": 9.163203285423917e-06,
+      "loss": 1.0135,
+      "step": 20340
+    },
+    {
+      "epoch": 0.8048409104392019,
+      "grad_norm": 1.1445213134877017,
+      "learning_rate": 9.161928522975305e-06,
+      "loss": 1.0036,
+      "step": 20350
+    },
+    {
+      "epoch": 0.805236409658091,
+      "grad_norm": 0.9941138888962111,
+      "learning_rate": 9.160652879101537e-06,
+      "loss": 1.0358,
+      "step": 20360
+    },
+    {
+      "epoch": 0.80563190887698,
+      "grad_norm": 0.9961305624271573,
+      "learning_rate": 9.159376354072769e-06,
+      "loss": 0.9897,
+      "step": 20370
+    },
+    {
+      "epoch": 0.806027408095869,
+      "grad_norm": 0.9991187306679739,
+      "learning_rate": 9.158098948159348e-06,
+      "loss": 1.0185,
+      "step": 20380
+    },
+    {
+      "epoch": 0.8064229073147581,
+      "grad_norm": 1.0520312040744422,
+      "learning_rate": 9.156820661631807e-06,
+      "loss": 0.9839,
+      "step": 20390
+    },
+    {
+      "epoch": 0.8068184065336471,
+      "grad_norm": 1.1059790334995376,
+      "learning_rate": 9.155541494760865e-06,
+      "loss": 1.0156,
+      "step": 20400
+    },
+    {
+      "epoch": 0.8072139057525362,
+      "grad_norm": 1.1258721010655148,
+      "learning_rate": 9.15426144781743e-06,
+      "loss": 1.0006,
+      "step": 20410
+    },
+    {
+      "epoch": 0.8076094049714252,
+      "grad_norm": 1.1017557773545024,
+      "learning_rate": 9.15298052107259e-06,
+      "loss": 0.9823,
+      "step": 20420
+    },
+    {
+      "epoch": 0.8080049041903142,
+      "grad_norm": 1.0281770283228173,
+      "learning_rate": 9.151698714797625e-06,
+      "loss": 1.0263,
+      "step": 20430
+    },
+    {
+      "epoch": 0.8084004034092033,
+      "grad_norm": 1.190826235966877,
+      "learning_rate": 9.150416029264e-06,
+      "loss": 1.0205,
+      "step": 20440
+    },
+    {
+      "epoch": 0.8087959026280923,
+      "grad_norm": 1.0144729083676585,
+      "learning_rate": 9.149132464743367e-06,
+      "loss": 1.0191,
+      "step": 20450
+    },
+    {
+      "epoch": 0.8091914018469814,
+      "grad_norm": 1.1409518677606938,
+      "learning_rate": 9.147848021507561e-06,
+      "loss": 1.0155,
+      "step": 20460
+    },
+    {
+      "epoch": 0.8095869010658704,
+      "grad_norm": 0.9853256029033983,
+      "learning_rate": 9.146562699828607e-06,
+      "loss": 1.0208,
+      "step": 20470
+    },
+    {
+      "epoch": 0.8099824002847594,
+      "grad_norm": 1.0521218898071147,
+      "learning_rate": 9.145276499978712e-06,
+      "loss": 1.0128,
+      "step": 20480
+    },
+    {
+      "epoch": 0.8103778995036485,
+      "grad_norm": 0.976138875676817,
+      "learning_rate": 9.14398942223027e-06,
+      "loss": 1.0112,
+      "step": 20490
+    },
+    {
+      "epoch": 0.8107733987225375,
+      "grad_norm": 1.1369504950315994,
+      "learning_rate": 9.142701466855867e-06,
+      "loss": 1.0001,
+      "step": 20500
+    },
+    {
+      "epoch": 0.8111688979414265,
+      "grad_norm": 1.0333480817123695,
+      "learning_rate": 9.141412634128266e-06,
+      "loss": 1.0107,
+      "step": 20510
+    },
+    {
+      "epoch": 0.8115643971603156,
+      "grad_norm": 1.0764740894511584,
+      "learning_rate": 9.140122924320419e-06,
+      "loss": 0.9936,
+      "step": 20520
+    },
+    {
+      "epoch": 0.8119598963792046,
+      "grad_norm": 0.9546016757818112,
+      "learning_rate": 9.138832337705467e-06,
+      "loss": 0.9954,
+      "step": 20530
+    },
+    {
+      "epoch": 0.8123553955980937,
+      "grad_norm": 0.9871714005710543,
+      "learning_rate": 9.137540874556734e-06,
+      "loss": 1.0117,
+      "step": 20540
+    },
+    {
+      "epoch": 0.8127508948169827,
+      "grad_norm": 0.945592475259025,
+      "learning_rate": 9.136248535147729e-06,
+      "loss": 0.999,
+      "step": 20550
+    },
+    {
+      "epoch": 0.8131463940358717,
+      "grad_norm": 1.0467075391893805,
+      "learning_rate": 9.134955319752146e-06,
+      "loss": 1.0215,
+      "step": 20560
+    },
+    {
+      "epoch": 0.8135418932547608,
+      "grad_norm": 1.04763511832311,
+      "learning_rate": 9.133661228643866e-06,
+      "loss": 1.0234,
+      "step": 20570
+    },
+    {
+      "epoch": 0.8139373924736498,
+      "grad_norm": 1.187121967630847,
+      "learning_rate": 9.132366262096959e-06,
+      "loss": 0.9904,
+      "step": 20580
+    },
+    {
+      "epoch": 0.8143328916925389,
+      "grad_norm": 1.0276544619131693,
+      "learning_rate": 9.131070420385673e-06,
+      "loss": 1.0153,
+      "step": 20590
+    },
+    {
+      "epoch": 0.8147283909114279,
+      "grad_norm": 0.957530284172485,
+      "learning_rate": 9.129773703784448e-06,
+      "loss": 1.0146,
+      "step": 20600
+    },
+    {
+      "epoch": 0.8151238901303169,
+      "grad_norm": 1.1495994091939181,
+      "learning_rate": 9.1284761125679e-06,
+      "loss": 1.029,
+      "step": 20610
+    },
+    {
+      "epoch": 0.815519389349206,
+      "grad_norm": 1.0103176916944374,
+      "learning_rate": 9.127177647010845e-06,
+      "loss": 1.0303,
+      "step": 20620
+    },
+    {
+      "epoch": 0.8159148885680951,
+      "grad_norm": 1.0050906077864001,
+      "learning_rate": 9.125878307388272e-06,
+      "loss": 1.0089,
+      "step": 20630
+    },
+    {
+      "epoch": 0.8163103877869842,
+      "grad_norm": 0.974353635775406,
+      "learning_rate": 9.124578093975358e-06,
+      "loss": 1.0041,
+      "step": 20640
+    },
+    {
+      "epoch": 0.8167058870058732,
+      "grad_norm": 1.0383845920556423,
+      "learning_rate": 9.123277007047467e-06,
+      "loss": 1.0038,
+      "step": 20650
+    },
+    {
+      "epoch": 0.8171013862247622,
+      "grad_norm": 1.1946496566770903,
+      "learning_rate": 9.121975046880146e-06,
+      "loss": 1.0082,
+      "step": 20660
+    },
+    {
+      "epoch": 0.8174968854436513,
+      "grad_norm": 1.102640351886497,
+      "learning_rate": 9.120672213749129e-06,
+      "loss": 0.9968,
+      "step": 20670
+    },
+    {
+      "epoch": 0.8178923846625403,
+      "grad_norm": 1.0274708984122392,
+      "learning_rate": 9.119368507930332e-06,
+      "loss": 1.0024,
+      "step": 20680
+    },
+    {
+      "epoch": 0.8182878838814294,
+      "grad_norm": 0.9487788908666108,
+      "learning_rate": 9.11806392969986e-06,
+      "loss": 1.0179,
+      "step": 20690
+    },
+    {
+      "epoch": 0.8186833831003184,
+      "grad_norm": 1.1192067496020226,
+      "learning_rate": 9.116758479334e-06,
+      "loss": 1.0051,
+      "step": 20700
+    },
+    {
+      "epoch": 0.8190788823192074,
+      "grad_norm": 1.1195359956028454,
+      "learning_rate": 9.115452157109223e-06,
+      "loss": 1.0231,
+      "step": 20710
+    },
+    {
+      "epoch": 0.8194743815380965,
+      "grad_norm": 0.9781688862836991,
+      "learning_rate": 9.114144963302185e-06,
+      "loss": 1.0001,
+      "step": 20720
+    },
+    {
+      "epoch": 0.8198698807569855,
+      "grad_norm": 1.0556996456247876,
+      "learning_rate": 9.112836898189728e-06,
+      "loss": 1.0067,
+      "step": 20730
+    },
+    {
+      "epoch": 0.8202653799758746,
+      "grad_norm": 1.0364740558005778,
+      "learning_rate": 9.111527962048878e-06,
+      "loss": 1.0026,
+      "step": 20740
+    },
+    {
+      "epoch": 0.8206608791947636,
+      "grad_norm": 1.029224935259368,
+      "learning_rate": 9.110218155156845e-06,
+      "loss": 0.9798,
+      "step": 20750
+    },
+    {
+      "epoch": 0.8210563784136526,
+      "grad_norm": 0.9687082383237153,
+      "learning_rate": 9.108907477791025e-06,
+      "loss": 0.9967,
+      "step": 20760
+    },
+    {
+      "epoch": 0.8214518776325417,
+      "grad_norm": 1.1099612391952516,
+      "learning_rate": 9.107595930228995e-06,
+      "loss": 1.0094,
+      "step": 20770
+    },
+    {
+      "epoch": 0.8218473768514307,
+      "grad_norm": 1.100023961165057,
+      "learning_rate": 9.106283512748518e-06,
+      "loss": 1.0056,
+      "step": 20780
+    },
+    {
+      "epoch": 0.8222428760703198,
+      "grad_norm": 1.1170383977979603,
+      "learning_rate": 9.104970225627544e-06,
+      "loss": 1.0035,
+      "step": 20790
+    },
+    {
+      "epoch": 0.8226383752892088,
+      "grad_norm": 1.0664944115887174,
+      "learning_rate": 9.103656069144203e-06,
+      "loss": 1.0076,
+      "step": 20800
+    },
+    {
+      "epoch": 0.8230338745080978,
+      "grad_norm": 1.057179360255666,
+      "learning_rate": 9.10234104357681e-06,
+      "loss": 1.0197,
+      "step": 20810
+    },
+    {
+      "epoch": 0.8234293737269869,
+      "grad_norm": 1.0514999811425032,
+      "learning_rate": 9.101025149203868e-06,
+      "loss": 1.0132,
+      "step": 20820
+    },
+    {
+      "epoch": 0.8238248729458759,
+      "grad_norm": 1.182089975356494,
+      "learning_rate": 9.099708386304059e-06,
+      "loss": 0.9924,
+      "step": 20830
+    },
+    {
+      "epoch": 0.824220372164765,
+      "grad_norm": 1.0404828424727715,
+      "learning_rate": 9.098390755156248e-06,
+      "loss": 1.0031,
+      "step": 20840
+    },
+    {
+      "epoch": 0.824615871383654,
+      "grad_norm": 1.0427993789695162,
+      "learning_rate": 9.097072256039493e-06,
+      "loss": 1.0207,
+      "step": 20850
+    },
+    {
+      "epoch": 0.825011370602543,
+      "grad_norm": 1.1547262038143493,
+      "learning_rate": 9.095752889233022e-06,
+      "loss": 1.0034,
+      "step": 20860
+    },
+    {
+      "epoch": 0.8254068698214321,
+      "grad_norm": 1.0519044488185854,
+      "learning_rate": 9.094432655016261e-06,
+      "loss": 0.9782,
+      "step": 20870
+    },
+    {
+      "epoch": 0.8258023690403211,
+      "grad_norm": 1.0126784299359057,
+      "learning_rate": 9.09311155366881e-06,
+      "loss": 1.008,
+      "step": 20880
+    },
+    {
+      "epoch": 0.8261978682592102,
+      "grad_norm": 0.9799142297044481,
+      "learning_rate": 9.091789585470455e-06,
+      "loss": 0.9959,
+      "step": 20890
+    },
+    {
+      "epoch": 0.8265933674780992,
+      "grad_norm": 1.0333492223830443,
+      "learning_rate": 9.090466750701168e-06,
+      "loss": 0.9882,
+      "step": 20900
+    },
+    {
+      "epoch": 0.8269888666969882,
+      "grad_norm": 1.0041615656713214,
+      "learning_rate": 9.089143049641101e-06,
+      "loss": 1.0112,
+      "step": 20910
+    },
+    {
+      "epoch": 0.8273843659158773,
+      "grad_norm": 1.1141159412870034,
+      "learning_rate": 9.087818482570593e-06,
+      "loss": 1.0058,
+      "step": 20920
+    },
+    {
+      "epoch": 0.8277798651347663,
+      "grad_norm": 1.0850635629839598,
+      "learning_rate": 9.086493049770165e-06,
+      "loss": 1.0137,
+      "step": 20930
+    },
+    {
+      "epoch": 0.8281753643536554,
+      "grad_norm": 1.1006933245055717,
+      "learning_rate": 9.085166751520517e-06,
+      "loss": 1.019,
+      "step": 20940
+    },
+    {
+      "epoch": 0.8285708635725444,
+      "grad_norm": 1.1516567615085185,
+      "learning_rate": 9.083839588102539e-06,
+      "loss": 1.0017,
+      "step": 20950
+    },
+    {
+      "epoch": 0.8289663627914335,
+      "grad_norm": 1.0424298342039158,
+      "learning_rate": 9.082511559797302e-06,
+      "loss": 0.9985,
+      "step": 20960
+    },
+    {
+      "epoch": 0.8293618620103226,
+      "grad_norm": 1.0673143560532994,
+      "learning_rate": 9.081182666886059e-06,
+      "loss": 0.9949,
+      "step": 20970
+    },
+    {
+      "epoch": 0.8297573612292116,
+      "grad_norm": 1.0369142970444467,
+      "learning_rate": 9.079852909650247e-06,
+      "loss": 0.999,
+      "step": 20980
+    },
+    {
+      "epoch": 0.8301528604481007,
+      "grad_norm": 1.0804231131520636,
+      "learning_rate": 9.078522288371486e-06,
+      "loss": 0.9986,
+      "step": 20990
+    },
+    {
+      "epoch": 0.8305483596669897,
+      "grad_norm": 1.0560859100914735,
+      "learning_rate": 9.077190803331578e-06,
+      "loss": 1.0005,
+      "step": 21000
+    },
+    {
+      "epoch": 0.8309438588858787,
+      "grad_norm": 0.9761793889965177,
+      "learning_rate": 9.075858454812509e-06,
+      "loss": 1.0147,
+      "step": 21010
+    },
+    {
+      "epoch": 0.8313393581047678,
+      "grad_norm": 1.0433326934312541,
+      "learning_rate": 9.074525243096448e-06,
+      "loss": 1.0127,
+      "step": 21020
+    },
+    {
+      "epoch": 0.8317348573236568,
+      "grad_norm": 1.0396964022444006,
+      "learning_rate": 9.073191168465744e-06,
+      "loss": 1.0276,
+      "step": 21030
+    },
+    {
+      "epoch": 0.8321303565425459,
+      "grad_norm": 0.8809520769538411,
+      "learning_rate": 9.071856231202935e-06,
+      "loss": 0.9986,
+      "step": 21040
+    },
+    {
+      "epoch": 0.8325258557614349,
+      "grad_norm": 0.9505033964683348,
+      "learning_rate": 9.070520431590738e-06,
+      "loss": 1.0031,
+      "step": 21050
+    },
+    {
+      "epoch": 0.8329213549803239,
+      "grad_norm": 1.0667653702305564,
+      "learning_rate": 9.069183769912047e-06,
+      "loss": 1.0074,
+      "step": 21060
+    },
+    {
+      "epoch": 0.833316854199213,
+      "grad_norm": 1.0369305863001432,
+      "learning_rate": 9.067846246449949e-06,
+      "loss": 0.9934,
+      "step": 21070
+    },
+    {
+      "epoch": 0.833712353418102,
+      "grad_norm": 1.1159581307715478,
+      "learning_rate": 9.066507861487706e-06,
+      "loss": 1.0011,
+      "step": 21080
+    },
+    {
+      "epoch": 0.834107852636991,
+      "grad_norm": 1.0835233058465736,
+      "learning_rate": 9.065168615308768e-06,
+      "loss": 0.9939,
+      "step": 21090
+    },
+    {
+      "epoch": 0.8345033518558801,
+      "grad_norm": 0.9610796517208031,
+      "learning_rate": 9.06382850819676e-06,
+      "loss": 0.9946,
+      "step": 21100
+    },
+    {
+      "epoch": 0.8348988510747691,
+      "grad_norm": 1.1627643870723747,
+      "learning_rate": 9.062487540435499e-06,
+      "loss": 0.9951,
+      "step": 21110
+    },
+    {
+      "epoch": 0.8352943502936582,
+      "grad_norm": 1.0511818022674042,
+      "learning_rate": 9.061145712308976e-06,
+      "loss": 0.9895,
+      "step": 21120
+    },
+    {
+      "epoch": 0.8356898495125472,
+      "grad_norm": 1.024891348022398,
+      "learning_rate": 9.059803024101366e-06,
+      "loss": 1.0033,
+      "step": 21130
+    },
+    {
+      "epoch": 0.8360853487314363,
+      "grad_norm": 1.0386602294141605,
+      "learning_rate": 9.05845947609703e-06,
+      "loss": 0.9991,
+      "step": 21140
+    },
+    {
+      "epoch": 0.8364808479503253,
+      "grad_norm": 0.944619408084996,
+      "learning_rate": 9.057115068580507e-06,
+      "loss": 1.0098,
+      "step": 21150
+    },
+    {
+      "epoch": 0.8368763471692143,
+      "grad_norm": 1.0457033829753168,
+      "learning_rate": 9.055769801836519e-06,
+      "loss": 0.9846,
+      "step": 21160
+    },
+    {
+      "epoch": 0.8372718463881034,
+      "grad_norm": 1.0629479327654794,
+      "learning_rate": 9.05442367614997e-06,
+      "loss": 1.0071,
+      "step": 21170
+    },
+    {
+      "epoch": 0.8376673456069924,
+      "grad_norm": 1.1416288433649697,
+      "learning_rate": 9.053076691805951e-06,
+      "loss": 1.0018,
+      "step": 21180
+    },
+    {
+      "epoch": 0.8380628448258814,
+      "grad_norm": 1.0397006095640355,
+      "learning_rate": 9.051728849089725e-06,
+      "loss": 1.0207,
+      "step": 21190
+    },
+    {
+      "epoch": 0.8384583440447705,
+      "grad_norm": 1.060035159193279,
+      "learning_rate": 9.050380148286742e-06,
+      "loss": 1.0111,
+      "step": 21200
+    },
+    {
+      "epoch": 0.8388538432636595,
+      "grad_norm": 0.9273487618593962,
+      "learning_rate": 9.049030589682638e-06,
+      "loss": 1.0094,
+      "step": 21210
+    },
+    {
+      "epoch": 0.8392493424825486,
+      "grad_norm": 1.0775456316393612,
+      "learning_rate": 9.047680173563222e-06,
+      "loss": 1.0008,
+      "step": 21220
+    },
+    {
+      "epoch": 0.8396448417014376,
+      "grad_norm": 0.9917818778239733,
+      "learning_rate": 9.046328900214492e-06,
+      "loss": 0.9977,
+      "step": 21230
+    },
+    {
+      "epoch": 0.8400403409203266,
+      "grad_norm": 1.0280943941438079,
+      "learning_rate": 9.044976769922624e-06,
+      "loss": 0.9925,
+      "step": 21240
+    },
+    {
+      "epoch": 0.8404358401392157,
+      "grad_norm": 1.0472990897542513,
+      "learning_rate": 9.043623782973975e-06,
+      "loss": 1.0208,
+      "step": 21250
+    },
+    {
+      "epoch": 0.8408313393581047,
+      "grad_norm": 1.1377024546107963,
+      "learning_rate": 9.042269939655084e-06,
+      "loss": 0.9907,
+      "step": 21260
+    },
+    {
+      "epoch": 0.8412268385769938,
+      "grad_norm": 1.060852444732565,
+      "learning_rate": 9.040915240252675e-06,
+      "loss": 1.001,
+      "step": 21270
+    },
+    {
+      "epoch": 0.8416223377958828,
+      "grad_norm": 1.1050320219907457,
+      "learning_rate": 9.039559685053644e-06,
+      "loss": 1.0054,
+      "step": 21280
+    },
+    {
+      "epoch": 0.8420178370147718,
+      "grad_norm": 1.0659929404608206,
+      "learning_rate": 9.03820327434508e-06,
+      "loss": 0.9991,
+      "step": 21290
+    },
+    {
+      "epoch": 0.842413336233661,
+      "grad_norm": 1.1021131685880639,
+      "learning_rate": 9.036846008414248e-06,
+      "loss": 1.0077,
+      "step": 21300
+    },
+    {
+      "epoch": 0.84280883545255,
+      "grad_norm": 1.1174778826419904,
+      "learning_rate": 9.03548788754859e-06,
+      "loss": 0.986,
+      "step": 21310
+    },
+    {
+      "epoch": 0.8432043346714391,
+      "grad_norm": 0.9888560736588828,
+      "learning_rate": 9.034128912035732e-06,
+      "loss": 1.0116,
+      "step": 21320
+    },
+    {
+      "epoch": 0.8435998338903281,
+      "grad_norm": 1.051027417292185,
+      "learning_rate": 9.032769082163486e-06,
+      "loss": 0.9965,
+      "step": 21330
+    },
+    {
+      "epoch": 0.8439953331092171,
+      "grad_norm": 0.9397173569391313,
+      "learning_rate": 9.031408398219838e-06,
+      "loss": 1.0008,
+      "step": 21340
+    },
+    {
+      "epoch": 0.8443908323281062,
+      "grad_norm": 1.0710147167031807,
+      "learning_rate": 9.030046860492959e-06,
+      "loss": 0.995,
+      "step": 21350
+    },
+    {
+      "epoch": 0.8447863315469952,
+      "grad_norm": 0.9900453053139646,
+      "learning_rate": 9.028684469271198e-06,
+      "loss": 0.9836,
+      "step": 21360
+    },
+    {
+      "epoch": 0.8451818307658843,
+      "grad_norm": 1.0061774957291256,
+      "learning_rate": 9.027321224843086e-06,
+      "loss": 1.0056,
+      "step": 21370
+    },
+    {
+      "epoch": 0.8455773299847733,
+      "grad_norm": 0.942951836667559,
+      "learning_rate": 9.025957127497336e-06,
+      "loss": 1.0091,
+      "step": 21380
+    },
+    {
+      "epoch": 0.8459728292036623,
+      "grad_norm": 1.1064081530376138,
+      "learning_rate": 9.024592177522839e-06,
+      "loss": 1.0043,
+      "step": 21390
+    },
+    {
+      "epoch": 0.8463683284225514,
+      "grad_norm": 1.0057545239273362,
+      "learning_rate": 9.02322637520867e-06,
+      "loss": 0.9977,
+      "step": 21400
+    },
+    {
+      "epoch": 0.8467638276414404,
+      "grad_norm": 1.0226611037397328,
+      "learning_rate": 9.02185972084408e-06,
+      "loss": 0.998,
+      "step": 21410
+    },
+    {
+      "epoch": 0.8471593268603295,
+      "grad_norm": 1.0030626165590641,
+      "learning_rate": 9.020492214718506e-06,
+      "loss": 1.0093,
+      "step": 21420
+    },
+    {
+      "epoch": 0.8475548260792185,
+      "grad_norm": 1.0146955727227316,
+      "learning_rate": 9.01912385712156e-06,
+      "loss": 0.9858,
+      "step": 21430
+    },
+    {
+      "epoch": 0.8479503252981075,
+      "grad_norm": 0.9844640421958475,
+      "learning_rate": 9.017754648343037e-06,
+      "loss": 0.9998,
+      "step": 21440
+    },
+    {
+      "epoch": 0.8483458245169966,
+      "grad_norm": 1.0082705120528728,
+      "learning_rate": 9.016384588672913e-06,
+      "loss": 0.9915,
+      "step": 21450
+    },
+    {
+      "epoch": 0.8487413237358856,
+      "grad_norm": 1.0431960598191798,
+      "learning_rate": 9.015013678401342e-06,
+      "loss": 1.0083,
+      "step": 21460
+    },
+    {
+      "epoch": 0.8491368229547747,
+      "grad_norm": 1.0864357377053249,
+      "learning_rate": 9.01364191781866e-06,
+      "loss": 1.0011,
+      "step": 21470
+    },
+    {
+      "epoch": 0.8495323221736637,
+      "grad_norm": 1.1351534671556414,
+      "learning_rate": 9.012269307215383e-06,
+      "loss": 1.0043,
+      "step": 21480
+    },
+    {
+      "epoch": 0.8499278213925527,
+      "grad_norm": 1.1022353161351826,
+      "learning_rate": 9.010895846882206e-06,
+      "loss": 1.0193,
+      "step": 21490
+    },
+    {
+      "epoch": 0.8503233206114418,
+      "grad_norm": 0.9740184358260867,
+      "learning_rate": 9.009521537110002e-06,
+      "loss": 0.9981,
+      "step": 21500
+    },
+    {
+      "epoch": 0.8507188198303308,
+      "grad_norm": 1.131563506295198,
+      "learning_rate": 9.00814637818983e-06,
+      "loss": 1.005,
+      "step": 21510
+    },
+    {
+      "epoch": 0.8511143190492199,
+      "grad_norm": 1.1540534837572083,
+      "learning_rate": 9.006770370412923e-06,
+      "loss": 1.0032,
+      "step": 21520
+    },
+    {
+      "epoch": 0.8515098182681089,
+      "grad_norm": 1.0073112416678964,
+      "learning_rate": 9.005393514070697e-06,
+      "loss": 1.0033,
+      "step": 21530
+    },
+    {
+      "epoch": 0.8519053174869979,
+      "grad_norm": 1.0622763490606484,
+      "learning_rate": 9.004015809454745e-06,
+      "loss": 0.9957,
+      "step": 21540
+    },
+    {
+      "epoch": 0.852300816705887,
+      "grad_norm": 1.1715352590525332,
+      "learning_rate": 9.002637256856843e-06,
+      "loss": 0.9959,
+      "step": 21550
+    },
+    {
+      "epoch": 0.852696315924776,
+      "grad_norm": 1.05505835647216,
+      "learning_rate": 9.001257856568943e-06,
+      "loss": 0.9907,
+      "step": 21560
+    },
+    {
+      "epoch": 0.853091815143665,
+      "grad_norm": 0.9607283176448762,
+      "learning_rate": 8.99987760888318e-06,
+      "loss": 1.004,
+      "step": 21570
+    },
+    {
+      "epoch": 0.8534873143625541,
+      "grad_norm": 1.0528036250343198,
+      "learning_rate": 8.998496514091866e-06,
+      "loss": 1.0027,
+      "step": 21580
+    },
+    {
+      "epoch": 0.8538828135814431,
+      "grad_norm": 1.0896795131708021,
+      "learning_rate": 8.997114572487493e-06,
+      "loss": 0.9782,
+      "step": 21590
+    },
+    {
+      "epoch": 0.8542783128003322,
+      "grad_norm": 1.1829755608166352,
+      "learning_rate": 8.995731784362732e-06,
+      "loss": 0.9768,
+      "step": 21600
+    },
+    {
+      "epoch": 0.8546738120192212,
+      "grad_norm": 1.0271548528462164,
+      "learning_rate": 8.994348150010437e-06,
+      "loss": 0.9919,
+      "step": 21610
+    },
+    {
+      "epoch": 0.8550693112381103,
+      "grad_norm": 1.0570025898072521,
+      "learning_rate": 8.992963669723634e-06,
+      "loss": 0.9893,
+      "step": 21620
+    },
+    {
+      "epoch": 0.8554648104569994,
+      "grad_norm": 0.9828930706020425,
+      "learning_rate": 8.991578343795534e-06,
+      "loss": 1.0223,
+      "step": 21630
+    },
+    {
+      "epoch": 0.8558603096758884,
+      "grad_norm": 1.0433199204674395,
+      "learning_rate": 8.990192172519526e-06,
+      "loss": 1.0084,
+      "step": 21640
+    },
+    {
+      "epoch": 0.8562558088947775,
+      "grad_norm": 1.0427539307500984,
+      "learning_rate": 8.988805156189175e-06,
+      "loss": 1.0067,
+      "step": 21650
+    },
+    {
+      "epoch": 0.8566513081136665,
+      "grad_norm": 1.0433525035536786,
+      "learning_rate": 8.987417295098232e-06,
+      "loss": 0.9811,
+      "step": 21660
+    },
+    {
+      "epoch": 0.8570468073325556,
+      "grad_norm": 1.0916431735324656,
+      "learning_rate": 8.986028589540617e-06,
+      "loss": 0.9871,
+      "step": 21670
+    },
+    {
+      "epoch": 0.8574423065514446,
+      "grad_norm": 1.0516037705700454,
+      "learning_rate": 8.984639039810434e-06,
+      "loss": 0.9926,
+      "step": 21680
+    },
+    {
+      "epoch": 0.8578378057703336,
+      "grad_norm": 1.2286205294296046,
+      "learning_rate": 8.983248646201971e-06,
+      "loss": 0.9908,
+      "step": 21690
+    },
+    {
+      "epoch": 0.8582333049892227,
+      "grad_norm": 0.9482057633397742,
+      "learning_rate": 8.981857409009686e-06,
+      "loss": 1.0063,
+      "step": 21700
+    },
+    {
+      "epoch": 0.8586288042081117,
+      "grad_norm": 1.0408234686934443,
+      "learning_rate": 8.98046532852822e-06,
+      "loss": 1.0007,
+      "step": 21710
+    },
+    {
+      "epoch": 0.8590243034270008,
+      "grad_norm": 1.1059918742404784,
+      "learning_rate": 8.97907240505239e-06,
+      "loss": 0.9865,
+      "step": 21720
+    },
+    {
+      "epoch": 0.8594198026458898,
+      "grad_norm": 0.9863986551725117,
+      "learning_rate": 8.977678638877196e-06,
+      "loss": 0.9989,
+      "step": 21730
+    },
+    {
+      "epoch": 0.8598153018647788,
+      "grad_norm": 1.0710026362571239,
+      "learning_rate": 8.976284030297813e-06,
+      "loss": 1.0047,
+      "step": 21740
+    },
+    {
+      "epoch": 0.8602108010836679,
+      "grad_norm": 0.9110605982412614,
+      "learning_rate": 8.974888579609594e-06,
+      "loss": 0.9844,
+      "step": 21750
+    },
+    {
+      "epoch": 0.8606063003025569,
+      "grad_norm": 1.1571559798879532,
+      "learning_rate": 8.973492287108071e-06,
+      "loss": 0.9954,
+      "step": 21760
+    },
+    {
+      "epoch": 0.861001799521446,
+      "grad_norm": 0.9501566723081118,
+      "learning_rate": 8.97209515308896e-06,
+      "loss": 1.001,
+      "step": 21770
+    },
+    {
+      "epoch": 0.861397298740335,
+      "grad_norm": 1.0770423546268713,
+      "learning_rate": 8.970697177848144e-06,
+      "loss": 1.0076,
+      "step": 21780
+    },
+    {
+      "epoch": 0.861792797959224,
+      "grad_norm": 1.0911217915233409,
+      "learning_rate": 8.969298361681693e-06,
+      "loss": 1.0011,
+      "step": 21790
+    },
+    {
+      "epoch": 0.8621882971781131,
+      "grad_norm": 1.124812243623449,
+      "learning_rate": 8.96789870488585e-06,
+      "loss": 0.9953,
+      "step": 21800
+    },
+    {
+      "epoch": 0.8625837963970021,
+      "grad_norm": 0.9903946628456698,
+      "learning_rate": 8.966498207757042e-06,
+      "loss": 0.9938,
+      "step": 21810
+    },
+    {
+      "epoch": 0.8629792956158912,
+      "grad_norm": 1.0302636106438623,
+      "learning_rate": 8.965096870591867e-06,
+      "loss": 0.9764,
+      "step": 21820
+    },
+    {
+      "epoch": 0.8633747948347802,
+      "grad_norm": 0.9598588924155196,
+      "learning_rate": 8.963694693687108e-06,
+      "loss": 0.9936,
+      "step": 21830
+    },
+    {
+      "epoch": 0.8637702940536692,
+      "grad_norm": 1.2027681313282046,
+      "learning_rate": 8.962291677339718e-06,
+      "loss": 0.9953,
+      "step": 21840
+    },
+    {
+      "epoch": 0.8641657932725583,
+      "grad_norm": 1.0398833695261345,
+      "learning_rate": 8.960887821846833e-06,
+      "loss": 1.0061,
+      "step": 21850
+    },
+    {
+      "epoch": 0.8645612924914473,
+      "grad_norm": 1.1869989079615797,
+      "learning_rate": 8.959483127505767e-06,
+      "loss": 0.9879,
+      "step": 21860
+    },
+    {
+      "epoch": 0.8649567917103363,
+      "grad_norm": 1.1522357731525883,
+      "learning_rate": 8.958077594614009e-06,
+      "loss": 0.9848,
+      "step": 21870
+    },
+    {
+      "epoch": 0.8653522909292254,
+      "grad_norm": 1.0823567076143104,
+      "learning_rate": 8.956671223469227e-06,
+      "loss": 0.996,
+      "step": 21880
+    },
+    {
+      "epoch": 0.8657477901481144,
+      "grad_norm": 1.0012564144573024,
+      "learning_rate": 8.955264014369265e-06,
+      "loss": 0.9962,
+      "step": 21890
+    },
+    {
+      "epoch": 0.8661432893670035,
+      "grad_norm": 1.0154924701018986,
+      "learning_rate": 8.953855967612149e-06,
+      "loss": 0.999,
+      "step": 21900
+    },
+    {
+      "epoch": 0.8665387885858925,
+      "grad_norm": 1.008104276895344,
+      "learning_rate": 8.952447083496076e-06,
+      "loss": 0.9836,
+      "step": 21910
+    },
+    {
+      "epoch": 0.8669342878047815,
+      "grad_norm": 1.063103455610592,
+      "learning_rate": 8.951037362319426e-06,
+      "loss": 1.002,
+      "step": 21920
+    },
+    {
+      "epoch": 0.8673297870236706,
+      "grad_norm": 1.129804248194133,
+      "learning_rate": 8.949626804380752e-06,
+      "loss": 0.9851,
+      "step": 21930
+    },
+    {
+      "epoch": 0.8677252862425596,
+      "grad_norm": 0.9930342968555023,
+      "learning_rate": 8.948215409978786e-06,
+      "loss": 0.9921,
+      "step": 21940
+    },
+    {
+      "epoch": 0.8681207854614487,
+      "grad_norm": 1.1075912092647413,
+      "learning_rate": 8.946803179412438e-06,
+      "loss": 0.9981,
+      "step": 21950
+    },
+    {
+      "epoch": 0.8685162846803377,
+      "grad_norm": 1.1044526463396946,
+      "learning_rate": 8.945390112980793e-06,
+      "loss": 0.9946,
+      "step": 21960
+    },
+    {
+      "epoch": 0.8689117838992269,
+      "grad_norm": 1.0844903696763522,
+      "learning_rate": 8.943976210983116e-06,
+      "loss": 0.9831,
+      "step": 21970
+    },
+    {
+      "epoch": 0.8693072831181159,
+      "grad_norm": 1.0284321244525942,
+      "learning_rate": 8.942561473718849e-06,
+      "loss": 0.9993,
+      "step": 21980
+    },
+    {
+      "epoch": 0.8697027823370049,
+      "grad_norm": 1.0408457188164864,
+      "learning_rate": 8.941145901487604e-06,
+      "loss": 0.9863,
+      "step": 21990
+    },
+    {
+      "epoch": 0.870098281555894,
+      "grad_norm": 1.1083249765702519,
+      "learning_rate": 8.939729494589178e-06,
+      "loss": 0.9806,
+      "step": 22000
+    },
+    {
+      "epoch": 0.870493780774783,
+      "grad_norm": 0.9955913062759518,
+      "learning_rate": 8.93831225332354e-06,
+      "loss": 1.0049,
+      "step": 22010
+    },
+    {
+      "epoch": 0.870889279993672,
+      "grad_norm": 1.159117440647705,
+      "learning_rate": 8.93689417799084e-06,
+      "loss": 0.9933,
+      "step": 22020
+    },
+    {
+      "epoch": 0.8712847792125611,
+      "grad_norm": 1.121029602431043,
+      "learning_rate": 8.9354752688914e-06,
+      "loss": 1.0084,
+      "step": 22030
+    },
+    {
+      "epoch": 0.8716802784314501,
+      "grad_norm": 1.0368559365343235,
+      "learning_rate": 8.934055526325723e-06,
+      "loss": 0.9996,
+      "step": 22040
+    },
+    {
+      "epoch": 0.8720757776503392,
+      "grad_norm": 0.9378608607239615,
+      "learning_rate": 8.932634950594483e-06,
+      "loss": 0.9906,
+      "step": 22050
+    },
+    {
+      "epoch": 0.8724712768692282,
+      "grad_norm": 0.9926488457193317,
+      "learning_rate": 8.931213541998535e-06,
+      "loss": 0.987,
+      "step": 22060
+    },
+    {
+      "epoch": 0.8728667760881172,
+      "grad_norm": 0.9791296220617621,
+      "learning_rate": 8.92979130083891e-06,
+      "loss": 0.992,
+      "step": 22070
+    },
+    {
+      "epoch": 0.8732622753070063,
+      "grad_norm": 1.1667133074922789,
+      "learning_rate": 8.928368227416813e-06,
+      "loss": 0.9867,
+      "step": 22080
+    },
+    {
+      "epoch": 0.8736577745258953,
+      "grad_norm": 1.045978411357206,
+      "learning_rate": 8.926944322033627e-06,
+      "loss": 1.0047,
+      "step": 22090
+    },
+    {
+      "epoch": 0.8740532737447844,
+      "grad_norm": 1.141466349392,
+      "learning_rate": 8.925519584990911e-06,
+      "loss": 0.9993,
+      "step": 22100
+    },
+    {
+      "epoch": 0.8744487729636734,
+      "grad_norm": 1.0436743151153005,
+      "learning_rate": 8.924094016590399e-06,
+      "loss": 0.9906,
+      "step": 22110
+    },
+    {
+      "epoch": 0.8748442721825624,
+      "grad_norm": 0.9231897957742409,
+      "learning_rate": 8.922667617134004e-06,
+      "loss": 0.9928,
+      "step": 22120
+    },
+    {
+      "epoch": 0.8752397714014515,
+      "grad_norm": 1.0864962003267222,
+      "learning_rate": 8.921240386923811e-06,
+      "loss": 0.9847,
+      "step": 22130
+    },
+    {
+      "epoch": 0.8756352706203405,
+      "grad_norm": 1.1081670266964114,
+      "learning_rate": 8.919812326262084e-06,
+      "loss": 0.9852,
+      "step": 22140
+    },
+    {
+      "epoch": 0.8760307698392296,
+      "grad_norm": 1.0863186277236239,
+      "learning_rate": 8.918383435451259e-06,
+      "loss": 1.0086,
+      "step": 22150
+    },
+    {
+      "epoch": 0.8764262690581186,
+      "grad_norm": 1.1158010931666167,
+      "learning_rate": 8.916953714793954e-06,
+      "loss": 0.9912,
+      "step": 22160
+    },
+    {
+      "epoch": 0.8768217682770076,
+      "grad_norm": 1.0135586503215224,
+      "learning_rate": 8.915523164592958e-06,
+      "loss": 0.9814,
+      "step": 22170
+    },
+    {
+      "epoch": 0.8772172674958967,
+      "grad_norm": 1.107413396930375,
+      "learning_rate": 8.914091785151238e-06,
+      "loss": 1.0031,
+      "step": 22180
+    },
+    {
+      "epoch": 0.8776127667147857,
+      "grad_norm": 1.0611835392923699,
+      "learning_rate": 8.912659576771935e-06,
+      "loss": 0.9865,
+      "step": 22190
+    },
+    {
+      "epoch": 0.8780082659336748,
+      "grad_norm": 0.943054047802915,
+      "learning_rate": 8.911226539758366e-06,
+      "loss": 0.9989,
+      "step": 22200
+    },
+    {
+      "epoch": 0.8784037651525638,
+      "grad_norm": 1.0351252352265388,
+      "learning_rate": 8.909792674414021e-06,
+      "loss": 0.9933,
+      "step": 22210
+    },
+    {
+      "epoch": 0.8787992643714528,
+      "grad_norm": 1.0626104462016321,
+      "learning_rate": 8.908357981042575e-06,
+      "loss": 1.003,
+      "step": 22220
+    },
+    {
+      "epoch": 0.8791947635903419,
+      "grad_norm": 1.0221346784061454,
+      "learning_rate": 8.906922459947865e-06,
+      "loss": 0.9885,
+      "step": 22230
+    },
+    {
+      "epoch": 0.8795902628092309,
+      "grad_norm": 1.0219969564048954,
+      "learning_rate": 8.905486111433913e-06,
+      "loss": 0.9972,
+      "step": 22240
+    },
+    {
+      "epoch": 0.87998576202812,
+      "grad_norm": 1.1497859667991182,
+      "learning_rate": 8.904048935804912e-06,
+      "loss": 0.9772,
+      "step": 22250
+    },
+    {
+      "epoch": 0.880381261247009,
+      "grad_norm": 1.0336302605879202,
+      "learning_rate": 8.90261093336523e-06,
+      "loss": 1.0016,
+      "step": 22260
+    },
+    {
+      "epoch": 0.880776760465898,
+      "grad_norm": 1.1374940246089795,
+      "learning_rate": 8.901172104419415e-06,
+      "loss": 0.9847,
+      "step": 22270
+    },
+    {
+      "epoch": 0.8811722596847871,
+      "grad_norm": 1.1175168071250212,
+      "learning_rate": 8.899732449272182e-06,
+      "loss": 1.0082,
+      "step": 22280
+    },
+    {
+      "epoch": 0.8815677589036761,
+      "grad_norm": 0.9234830732443703,
+      "learning_rate": 8.898291968228427e-06,
+      "loss": 1.0038,
+      "step": 22290
+    },
+    {
+      "epoch": 0.8819632581225653,
+      "grad_norm": 1.1343079009330828,
+      "learning_rate": 8.896850661593221e-06,
+      "loss": 0.9941,
+      "step": 22300
+    },
+    {
+      "epoch": 0.8823587573414543,
+      "grad_norm": 0.9966277801592895,
+      "learning_rate": 8.895408529671806e-06,
+      "loss": 0.9941,
+      "step": 22310
+    },
+    {
+      "epoch": 0.8827542565603433,
+      "grad_norm": 0.9822937140424421,
+      "learning_rate": 8.8939655727696e-06,
+      "loss": 0.9981,
+      "step": 22320
+    },
+    {
+      "epoch": 0.8831497557792324,
+      "grad_norm": 1.114945396356318,
+      "learning_rate": 8.8925217911922e-06,
+      "loss": 0.9664,
+      "step": 22330
+    },
+    {
+      "epoch": 0.8835452549981214,
+      "grad_norm": 1.0408984401113937,
+      "learning_rate": 8.89107718524537e-06,
+      "loss": 0.9946,
+      "step": 22340
+    },
+    {
+      "epoch": 0.8839407542170105,
+      "grad_norm": 1.0590325922700263,
+      "learning_rate": 8.889631755235056e-06,
+      "loss": 0.9922,
+      "step": 22350
+    },
+    {
+      "epoch": 0.8843362534358995,
+      "grad_norm": 1.0233741178165348,
+      "learning_rate": 8.888185501467372e-06,
+      "loss": 1.0061,
+      "step": 22360
+    },
+    {
+      "epoch": 0.8847317526547885,
+      "grad_norm": 1.139118241384216,
+      "learning_rate": 8.886738424248615e-06,
+      "loss": 1.006,
+      "step": 22370
+    },
+    {
+      "epoch": 0.8851272518736776,
+      "grad_norm": 1.0316951425657999,
+      "learning_rate": 8.885290523885247e-06,
+      "loss": 0.9996,
+      "step": 22380
+    },
+    {
+      "epoch": 0.8855227510925666,
+      "grad_norm": 1.0691554404761705,
+      "learning_rate": 8.88384180068391e-06,
+      "loss": 1.0152,
+      "step": 22390
+    },
+    {
+      "epoch": 0.8859182503114557,
+      "grad_norm": 1.1243988288814266,
+      "learning_rate": 8.882392254951418e-06,
+      "loss": 0.9823,
+      "step": 22400
+    },
+    {
+      "epoch": 0.8863137495303447,
+      "grad_norm": 1.1401770918338507,
+      "learning_rate": 8.880941886994758e-06,
+      "loss": 1.0003,
+      "step": 22410
+    },
+    {
+      "epoch": 0.8867092487492337,
+      "grad_norm": 1.059276264537003,
+      "learning_rate": 8.879490697121098e-06,
+      "loss": 1.0022,
+      "step": 22420
+    },
+    {
+      "epoch": 0.8871047479681228,
+      "grad_norm": 0.9936029389159007,
+      "learning_rate": 8.878038685637773e-06,
+      "loss": 0.9994,
+      "step": 22430
+    },
+    {
+      "epoch": 0.8875002471870118,
+      "grad_norm": 1.0137848924227966,
+      "learning_rate": 8.87658585285229e-06,
+      "loss": 0.9885,
+      "step": 22440
+    },
+    {
+      "epoch": 0.8878957464059009,
+      "grad_norm": 1.128129618563724,
+      "learning_rate": 8.875132199072341e-06,
+      "loss": 0.9987,
+      "step": 22450
+    },
+    {
+      "epoch": 0.8882912456247899,
+      "grad_norm": 0.9934591210218678,
+      "learning_rate": 8.873677724605781e-06,
+      "loss": 0.9862,
+      "step": 22460
+    },
+    {
+      "epoch": 0.8886867448436789,
+      "grad_norm": 1.0868858665689693,
+      "learning_rate": 8.872222429760644e-06,
+      "loss": 1.0092,
+      "step": 22470
+    },
+    {
+      "epoch": 0.889082244062568,
+      "grad_norm": 1.0104139361635571,
+      "learning_rate": 8.870766314845137e-06,
+      "loss": 0.987,
+      "step": 22480
+    },
+    {
+      "epoch": 0.889477743281457,
+      "grad_norm": 1.0123800474935396,
+      "learning_rate": 8.869309380167636e-06,
+      "loss": 1.0126,
+      "step": 22490
+    },
+    {
+      "epoch": 0.889873242500346,
+      "grad_norm": 1.0547210523317003,
+      "learning_rate": 8.867851626036701e-06,
+      "loss": 1.0037,
+      "step": 22500
+    },
+    {
+      "epoch": 0.8902687417192351,
+      "grad_norm": 0.9969373469238735,
+      "learning_rate": 8.866393052761055e-06,
+      "loss": 1.0102,
+      "step": 22510
+    },
+    {
+      "epoch": 0.8906642409381241,
+      "grad_norm": 1.047988136282342,
+      "learning_rate": 8.864933660649599e-06,
+      "loss": 0.9899,
+      "step": 22520
+    },
+    {
+      "epoch": 0.8910597401570132,
+      "grad_norm": 1.0123715780703257,
+      "learning_rate": 8.86347345001141e-06,
+      "loss": 0.9838,
+      "step": 22530
+    },
+    {
+      "epoch": 0.8914552393759022,
+      "grad_norm": 0.9985895829139116,
+      "learning_rate": 8.862012421155734e-06,
+      "loss": 0.9931,
+      "step": 22540
+    },
+    {
+      "epoch": 0.8918507385947912,
+      "grad_norm": 1.1836257401850125,
+      "learning_rate": 8.860550574391992e-06,
+      "loss": 0.9973,
+      "step": 22550
+    },
+    {
+      "epoch": 0.8922462378136803,
+      "grad_norm": 1.0252176862779547,
+      "learning_rate": 8.859087910029778e-06,
+      "loss": 0.995,
+      "step": 22560
+    },
+    {
+      "epoch": 0.8926417370325693,
+      "grad_norm": 1.0694766092330494,
+      "learning_rate": 8.85762442837886e-06,
+      "loss": 0.9908,
+      "step": 22570
+    },
+    {
+      "epoch": 0.8930372362514584,
+      "grad_norm": 1.1292543282932064,
+      "learning_rate": 8.856160129749177e-06,
+      "loss": 0.9853,
+      "step": 22580
+    },
+    {
+      "epoch": 0.8934327354703474,
+      "grad_norm": 1.0992567489255145,
+      "learning_rate": 8.854695014450843e-06,
+      "loss": 1.0135,
+      "step": 22590
+    },
+    {
+      "epoch": 0.8938282346892364,
+      "grad_norm": 1.0435087190150094,
+      "learning_rate": 8.853229082794143e-06,
+      "loss": 0.9895,
+      "step": 22600
+    },
+    {
+      "epoch": 0.8942237339081255,
+      "grad_norm": 0.9770812795964554,
+      "learning_rate": 8.851762335089539e-06,
+      "loss": 0.9798,
+      "step": 22610
+    },
+    {
+      "epoch": 0.8946192331270145,
+      "grad_norm": 1.1355912350823192,
+      "learning_rate": 8.850294771647661e-06,
+      "loss": 0.9941,
+      "step": 22620
+    },
+    {
+      "epoch": 0.8950147323459036,
+      "grad_norm": 0.9432287273154124,
+      "learning_rate": 8.848826392779315e-06,
+      "loss": 1.0076,
+      "step": 22630
+    },
+    {
+      "epoch": 0.8954102315647927,
+      "grad_norm": 1.0129628054599422,
+      "learning_rate": 8.847357198795477e-06,
+      "loss": 0.9795,
+      "step": 22640
+    },
+    {
+      "epoch": 0.8958057307836818,
+      "grad_norm": 1.1190507422850675,
+      "learning_rate": 8.845887190007299e-06,
+      "loss": 0.983,
+      "step": 22650
+    },
+    {
+      "epoch": 0.8962012300025708,
+      "grad_norm": 1.1013836601341722,
+      "learning_rate": 8.844416366726101e-06,
+      "loss": 1.0006,
+      "step": 22660
+    },
+    {
+      "epoch": 0.8965967292214598,
+      "grad_norm": 1.044186345233668,
+      "learning_rate": 8.84294472926338e-06,
+      "loss": 0.9907,
+      "step": 22670
+    },
+    {
+      "epoch": 0.8969922284403489,
+      "grad_norm": 1.0337971756226394,
+      "learning_rate": 8.841472277930805e-06,
+      "loss": 1.0174,
+      "step": 22680
+    },
+    {
+      "epoch": 0.8973877276592379,
+      "grad_norm": 1.1014760977075364,
+      "learning_rate": 8.839999013040212e-06,
+      "loss": 0.9955,
+      "step": 22690
+    },
+    {
+      "epoch": 0.897783226878127,
+      "grad_norm": 0.9774835640271742,
+      "learning_rate": 8.83852493490362e-06,
+      "loss": 0.9923,
+      "step": 22700
+    },
+    {
+      "epoch": 0.898178726097016,
+      "grad_norm": 0.95917912787041,
+      "learning_rate": 8.837050043833205e-06,
+      "loss": 0.989,
+      "step": 22710
+    },
+    {
+      "epoch": 0.898574225315905,
+      "grad_norm": 1.183673644318555,
+      "learning_rate": 8.835574340141328e-06,
+      "loss": 0.9855,
+      "step": 22720
+    },
+    {
+      "epoch": 0.8989697245347941,
+      "grad_norm": 0.9986675579473566,
+      "learning_rate": 8.834097824140519e-06,
+      "loss": 0.9927,
+      "step": 22730
+    },
+    {
+      "epoch": 0.8993652237536831,
+      "grad_norm": 1.0966403139465501,
+      "learning_rate": 8.832620496143476e-06,
+      "loss": 0.9846,
+      "step": 22740
+    },
+    {
+      "epoch": 0.8997607229725721,
+      "grad_norm": 1.1809364950312344,
+      "learning_rate": 8.831142356463075e-06,
+      "loss": 0.9883,
+      "step": 22750
+    },
+    {
+      "epoch": 0.9001562221914612,
+      "grad_norm": 1.0346068266663593,
+      "learning_rate": 8.829663405412356e-06,
+      "loss": 0.9945,
+      "step": 22760
+    },
+    {
+      "epoch": 0.9005517214103502,
+      "grad_norm": 1.167951301427672,
+      "learning_rate": 8.82818364330454e-06,
+      "loss": 0.9919,
+      "step": 22770
+    },
+    {
+      "epoch": 0.9009472206292393,
+      "grad_norm": 0.9233913719712421,
+      "learning_rate": 8.826703070453014e-06,
+      "loss": 0.9845,
+      "step": 22780
+    },
+    {
+      "epoch": 0.9013427198481283,
+      "grad_norm": 0.8991820237275926,
+      "learning_rate": 8.825221687171337e-06,
+      "loss": 1.0093,
+      "step": 22790
+    },
+    {
+      "epoch": 0.9017382190670173,
+      "grad_norm": 0.988284124934829,
+      "learning_rate": 8.823739493773242e-06,
+      "loss": 0.9918,
+      "step": 22800
+    },
+    {
+      "epoch": 0.9021337182859064,
+      "grad_norm": 1.2299112690149163,
+      "learning_rate": 8.82225649057263e-06,
+      "loss": 0.9755,
+      "step": 22810
+    },
+    {
+      "epoch": 0.9025292175047954,
+      "grad_norm": 1.170523119650909,
+      "learning_rate": 8.820772677883577e-06,
+      "loss": 0.9733,
+      "step": 22820
+    },
+    {
+      "epoch": 0.9029247167236845,
+      "grad_norm": 1.017746867765162,
+      "learning_rate": 8.819288056020329e-06,
+      "loss": 0.9919,
+      "step": 22830
+    },
+    {
+      "epoch": 0.9033202159425735,
+      "grad_norm": 1.0985751288824752,
+      "learning_rate": 8.817802625297304e-06,
+      "loss": 0.9822,
+      "step": 22840
+    },
+    {
+      "epoch": 0.9037157151614625,
+      "grad_norm": 1.0835501179096811,
+      "learning_rate": 8.816316386029089e-06,
+      "loss": 0.9795,
+      "step": 22850
+    },
+    {
+      "epoch": 0.9041112143803516,
+      "grad_norm": 0.9965732383709867,
+      "learning_rate": 8.814829338530446e-06,
+      "loss": 0.989,
+      "step": 22860
+    },
+    {
+      "epoch": 0.9045067135992406,
+      "grad_norm": 1.033560445468714,
+      "learning_rate": 8.813341483116307e-06,
+      "loss": 0.9715,
+      "step": 22870
+    },
+    {
+      "epoch": 0.9049022128181297,
+      "grad_norm": 0.94882157337787,
+      "learning_rate": 8.811852820101772e-06,
+      "loss": 0.9807,
+      "step": 22880
+    },
+    {
+      "epoch": 0.9052977120370187,
+      "grad_norm": 1.0828872992033372,
+      "learning_rate": 8.810363349802113e-06,
+      "loss": 0.9935,
+      "step": 22890
+    },
+    {
+      "epoch": 0.9056932112559077,
+      "grad_norm": 1.0020699385804437,
+      "learning_rate": 8.808873072532779e-06,
+      "loss": 0.9782,
+      "step": 22900
+    },
+    {
+      "epoch": 0.9060887104747968,
+      "grad_norm": 1.0293321370815471,
+      "learning_rate": 8.807381988609381e-06,
+      "loss": 0.9934,
+      "step": 22910
+    },
+    {
+      "epoch": 0.9064842096936858,
+      "grad_norm": 1.1112568202659678,
+      "learning_rate": 8.805890098347707e-06,
+      "loss": 0.9951,
+      "step": 22920
+    },
+    {
+      "epoch": 0.9068797089125749,
+      "grad_norm": 1.078258136675064,
+      "learning_rate": 8.804397402063714e-06,
+      "loss": 0.9854,
+      "step": 22930
+    },
+    {
+      "epoch": 0.9072752081314639,
+      "grad_norm": 1.064273334379174,
+      "learning_rate": 8.802903900073529e-06,
+      "loss": 0.9783,
+      "step": 22940
+    },
+    {
+      "epoch": 0.9076707073503529,
+      "grad_norm": 1.0606010733965434,
+      "learning_rate": 8.801409592693449e-06,
+      "loss": 1.0037,
+      "step": 22950
+    },
+    {
+      "epoch": 0.908066206569242,
+      "grad_norm": 1.0552623577993019,
+      "learning_rate": 8.799914480239944e-06,
+      "loss": 0.9915,
+      "step": 22960
+    },
+    {
+      "epoch": 0.908461705788131,
+      "grad_norm": 1.017323747809653,
+      "learning_rate": 8.798418563029654e-06,
+      "loss": 0.9785,
+      "step": 22970
+    },
+    {
+      "epoch": 0.9088572050070202,
+      "grad_norm": 1.0343661498631227,
+      "learning_rate": 8.796921841379386e-06,
+      "loss": 0.9997,
+      "step": 22980
+    },
+    {
+      "epoch": 0.9092527042259092,
+      "grad_norm": 1.0222293592540472,
+      "learning_rate": 8.795424315606122e-06,
+      "loss": 0.9866,
+      "step": 22990
+    },
+    {
+      "epoch": 0.9096482034447982,
+      "grad_norm": 1.075938189630379,
+      "learning_rate": 8.793925986027013e-06,
+      "loss": 1.0025,
+      "step": 23000
+    },
+    {
+      "epoch": 0.9100437026636873,
+      "grad_norm": 1.0456175519023878,
+      "learning_rate": 8.792426852959378e-06,
+      "loss": 0.9854,
+      "step": 23010
+    },
+    {
+      "epoch": 0.9104392018825763,
+      "grad_norm": 1.1417103357850213,
+      "learning_rate": 8.790926916720708e-06,
+      "loss": 0.9776,
+      "step": 23020
+    },
+    {
+      "epoch": 0.9108347011014654,
+      "grad_norm": 1.1437784834497344,
+      "learning_rate": 8.789426177628662e-06,
+      "loss": 0.9971,
+      "step": 23030
+    },
+    {
+      "epoch": 0.9112302003203544,
+      "grad_norm": 1.1907093030236078,
+      "learning_rate": 8.787924636001075e-06,
+      "loss": 0.9643,
+      "step": 23040
+    },
+    {
+      "epoch": 0.9116256995392434,
+      "grad_norm": 1.0252715750433767,
+      "learning_rate": 8.786422292155946e-06,
+      "loss": 1.0015,
+      "step": 23050
+    },
+    {
+      "epoch": 0.9120211987581325,
+      "grad_norm": 1.1575059298012786,
+      "learning_rate": 8.784919146411444e-06,
+      "loss": 0.9957,
+      "step": 23060
+    },
+    {
+      "epoch": 0.9124166979770215,
+      "grad_norm": 1.0400122077290705,
+      "learning_rate": 8.78341519908591e-06,
+      "loss": 0.98,
+      "step": 23070
+    },
+    {
+      "epoch": 0.9128121971959106,
+      "grad_norm": 1.149572413005648,
+      "learning_rate": 8.781910450497856e-06,
+      "loss": 0.9879,
+      "step": 23080
+    },
+    {
+      "epoch": 0.9132076964147996,
+      "grad_norm": 1.036139748253578,
+      "learning_rate": 8.780404900965962e-06,
+      "loss": 0.9703,
+      "step": 23090
+    },
+    {
+      "epoch": 0.9136031956336886,
+      "grad_norm": 0.9964524838102091,
+      "learning_rate": 8.778898550809074e-06,
+      "loss": 0.9995,
+      "step": 23100
+    },
+    {
+      "epoch": 0.9139986948525777,
+      "grad_norm": 1.1437294182809956,
+      "learning_rate": 8.777391400346216e-06,
+      "loss": 0.9753,
+      "step": 23110
+    },
+    {
+      "epoch": 0.9143941940714667,
+      "grad_norm": 1.0205371380592758,
+      "learning_rate": 8.775883449896575e-06,
+      "loss": 0.975,
+      "step": 23120
+    },
+    {
+      "epoch": 0.9147896932903558,
+      "grad_norm": 1.0625168977073607,
+      "learning_rate": 8.774374699779506e-06,
+      "loss": 0.9739,
+      "step": 23130
+    },
+    {
+      "epoch": 0.9151851925092448,
+      "grad_norm": 1.0242426008012278,
+      "learning_rate": 8.772865150314541e-06,
+      "loss": 0.992,
+      "step": 23140
+    },
+    {
+      "epoch": 0.9155806917281338,
+      "grad_norm": 1.0816904413373665,
+      "learning_rate": 8.771354801821372e-06,
+      "loss": 0.9896,
+      "step": 23150
+    },
+    {
+      "epoch": 0.9159761909470229,
+      "grad_norm": 1.148176143444153,
+      "learning_rate": 8.76984365461987e-06,
+      "loss": 0.9872,
+      "step": 23160
+    },
+    {
+      "epoch": 0.9163716901659119,
+      "grad_norm": 1.012599377789182,
+      "learning_rate": 8.768331709030067e-06,
+      "loss": 0.9883,
+      "step": 23170
+    },
+    {
+      "epoch": 0.916767189384801,
+      "grad_norm": 1.1342241805843325,
+      "learning_rate": 8.766818965372167e-06,
+      "loss": 0.9872,
+      "step": 23180
+    },
+    {
+      "epoch": 0.91716268860369,
+      "grad_norm": 1.1393767369318468,
+      "learning_rate": 8.765305423966544e-06,
+      "loss": 0.9763,
+      "step": 23190
+    },
+    {
+      "epoch": 0.917558187822579,
+      "grad_norm": 1.1539912560718792,
+      "learning_rate": 8.763791085133741e-06,
+      "loss": 0.9845,
+      "step": 23200
+    },
+    {
+      "epoch": 0.9179536870414681,
+      "grad_norm": 1.0008074199913217,
+      "learning_rate": 8.762275949194468e-06,
+      "loss": 0.9932,
+      "step": 23210
+    },
+    {
+      "epoch": 0.9183491862603571,
+      "grad_norm": 1.1259556514957458,
+      "learning_rate": 8.760760016469605e-06,
+      "loss": 1.0121,
+      "step": 23220
+    },
+    {
+      "epoch": 0.9187446854792461,
+      "grad_norm": 1.1576577910927688,
+      "learning_rate": 8.759243287280201e-06,
+      "loss": 0.9682,
+      "step": 23230
+    },
+    {
+      "epoch": 0.9191401846981352,
+      "grad_norm": 1.1919388309296537,
+      "learning_rate": 8.75772576194747e-06,
+      "loss": 0.9736,
+      "step": 23240
+    },
+    {
+      "epoch": 0.9195356839170242,
+      "grad_norm": 1.0147416187741292,
+      "learning_rate": 8.756207440792805e-06,
+      "loss": 0.9872,
+      "step": 23250
+    },
+    {
+      "epoch": 0.9199311831359133,
+      "grad_norm": 1.2393222594099442,
+      "learning_rate": 8.754688324137754e-06,
+      "loss": 0.9885,
+      "step": 23260
+    },
+    {
+      "epoch": 0.9203266823548023,
+      "grad_norm": 1.2370378621450326,
+      "learning_rate": 8.753168412304041e-06,
+      "loss": 0.9827,
+      "step": 23270
+    },
+    {
+      "epoch": 0.9207221815736913,
+      "grad_norm": 1.020160393878756,
+      "learning_rate": 8.75164770561356e-06,
+      "loss": 0.9807,
+      "step": 23280
+    },
+    {
+      "epoch": 0.9211176807925804,
+      "grad_norm": 1.1519746009622465,
+      "learning_rate": 8.750126204388368e-06,
+      "loss": 0.9715,
+      "step": 23290
+    },
+    {
+      "epoch": 0.9215131800114694,
+      "grad_norm": 1.0942413536629974,
+      "learning_rate": 8.748603908950694e-06,
+      "loss": 1.004,
+      "step": 23300
+    },
+    {
+      "epoch": 0.9219086792303586,
+      "grad_norm": 0.9655570124129816,
+      "learning_rate": 8.747080819622932e-06,
+      "loss": 0.9854,
+      "step": 23310
+    },
+    {
+      "epoch": 0.9223041784492476,
+      "grad_norm": 1.0299210507179013,
+      "learning_rate": 8.74555693672765e-06,
+      "loss": 0.9746,
+      "step": 23320
+    },
+    {
+      "epoch": 0.9226996776681367,
+      "grad_norm": 0.9807506906193965,
+      "learning_rate": 8.744032260587581e-06,
+      "loss": 1.002,
+      "step": 23330
+    },
+    {
+      "epoch": 0.9230951768870257,
+      "grad_norm": 1.0181924212006077,
+      "learning_rate": 8.742506791525617e-06,
+      "loss": 0.989,
+      "step": 23340
+    },
+    {
+      "epoch": 0.9234906761059147,
+      "grad_norm": 1.1182118898424565,
+      "learning_rate": 8.740980529864837e-06,
+      "loss": 0.9671,
+      "step": 23350
+    },
+    {
+      "epoch": 0.9238861753248038,
+      "grad_norm": 0.9842730408912936,
+      "learning_rate": 8.73945347592847e-06,
+      "loss": 0.9998,
+      "step": 23360
+    },
+    {
+      "epoch": 0.9242816745436928,
+      "grad_norm": 1.0289373437877,
+      "learning_rate": 8.737925630039921e-06,
+      "loss": 0.9718,
+      "step": 23370
+    },
+    {
+      "epoch": 0.9246771737625818,
+      "grad_norm": 1.0897506857407533,
+      "learning_rate": 8.736396992522762e-06,
+      "loss": 0.9625,
+      "step": 23380
+    },
+    {
+      "epoch": 0.9250726729814709,
+      "grad_norm": 0.9733091800848298,
+      "learning_rate": 8.734867563700734e-06,
+      "loss": 0.9711,
+      "step": 23390
+    },
+    {
+      "epoch": 0.9254681722003599,
+      "grad_norm": 1.1750819878640861,
+      "learning_rate": 8.733337343897743e-06,
+      "loss": 0.9839,
+      "step": 23400
+    },
+    {
+      "epoch": 0.925863671419249,
+      "grad_norm": 0.9919572977820524,
+      "learning_rate": 8.731806333437862e-06,
+      "loss": 1.0007,
+      "step": 23410
+    },
+    {
+      "epoch": 0.926259170638138,
+      "grad_norm": 1.0891715728498546,
+      "learning_rate": 8.730274532645333e-06,
+      "loss": 0.986,
+      "step": 23420
+    },
+    {
+      "epoch": 0.926654669857027,
+      "grad_norm": 1.0431658941727613,
+      "learning_rate": 8.728741941844568e-06,
+      "loss": 0.9692,
+      "step": 23430
+    },
+    {
+      "epoch": 0.9270501690759161,
+      "grad_norm": 1.0720953935857238,
+      "learning_rate": 8.727208561360139e-06,
+      "loss": 1.0115,
+      "step": 23440
+    },
+    {
+      "epoch": 0.9274456682948051,
+      "grad_norm": 1.0509206784225917,
+      "learning_rate": 8.725674391516791e-06,
+      "loss": 0.9812,
+      "step": 23450
+    },
+    {
+      "epoch": 0.9278411675136942,
+      "grad_norm": 1.1325001148401703,
+      "learning_rate": 8.724139432639439e-06,
+      "loss": 0.9902,
+      "step": 23460
+    },
+    {
+      "epoch": 0.9282366667325832,
+      "grad_norm": 0.9422616361715284,
+      "learning_rate": 8.722603685053155e-06,
+      "loss": 0.9776,
+      "step": 23470
+    },
+    {
+      "epoch": 0.9286321659514722,
+      "grad_norm": 1.06866448296265,
+      "learning_rate": 8.72106714908319e-06,
+      "loss": 0.9776,
+      "step": 23480
+    },
+    {
+      "epoch": 0.9290276651703613,
+      "grad_norm": 0.9826613241611012,
+      "learning_rate": 8.719529825054949e-06,
+      "loss": 0.9803,
+      "step": 23490
+    },
+    {
+      "epoch": 0.9294231643892503,
+      "grad_norm": 1.0254324719318577,
+      "learning_rate": 8.717991713294016e-06,
+      "loss": 1.015,
+      "step": 23500
+    },
+    {
+      "epoch": 0.9298186636081394,
+      "grad_norm": 1.184924481312643,
+      "learning_rate": 8.716452814126138e-06,
+      "loss": 0.978,
+      "step": 23510
+    },
+    {
+      "epoch": 0.9302141628270284,
+      "grad_norm": 1.0873957863450652,
+      "learning_rate": 8.714913127877222e-06,
+      "loss": 0.9772,
+      "step": 23520
+    },
+    {
+      "epoch": 0.9306096620459174,
+      "grad_norm": 0.9526286358172754,
+      "learning_rate": 8.71337265487335e-06,
+      "loss": 0.9753,
+      "step": 23530
+    },
+    {
+      "epoch": 0.9310051612648065,
+      "grad_norm": 1.0317168449837404,
+      "learning_rate": 8.71183139544077e-06,
+      "loss": 0.9674,
+      "step": 23540
+    },
+    {
+      "epoch": 0.9314006604836955,
+      "grad_norm": 1.0917615948441148,
+      "learning_rate": 8.710289349905892e-06,
+      "loss": 0.9989,
+      "step": 23550
+    },
+    {
+      "epoch": 0.9317961597025846,
+      "grad_norm": 1.0454900947985744,
+      "learning_rate": 8.708746518595293e-06,
+      "loss": 0.9726,
+      "step": 23560
+    },
+    {
+      "epoch": 0.9321916589214736,
+      "grad_norm": 1.0730227821194152,
+      "learning_rate": 8.707202901835722e-06,
+      "loss": 0.9848,
+      "step": 23570
+    },
+    {
+      "epoch": 0.9325871581403626,
+      "grad_norm": 1.0243397941306367,
+      "learning_rate": 8.705658499954088e-06,
+      "loss": 0.9718,
+      "step": 23580
+    },
+    {
+      "epoch": 0.9329826573592517,
+      "grad_norm": 1.0710727387135093,
+      "learning_rate": 8.70411331327747e-06,
+      "loss": 0.9802,
+      "step": 23590
+    },
+    {
+      "epoch": 0.9333781565781407,
+      "grad_norm": 1.0059987512029682,
+      "learning_rate": 8.702567342133114e-06,
+      "loss": 0.9802,
+      "step": 23600
+    },
+    {
+      "epoch": 0.9337736557970298,
+      "grad_norm": 1.076635607057341,
+      "learning_rate": 8.701020586848426e-06,
+      "loss": 1.0014,
+      "step": 23610
+    },
+    {
+      "epoch": 0.9341691550159188,
+      "grad_norm": 1.1075009135277165,
+      "learning_rate": 8.699473047750984e-06,
+      "loss": 0.9748,
+      "step": 23620
+    },
+    {
+      "epoch": 0.9345646542348078,
+      "grad_norm": 1.0445746467086114,
+      "learning_rate": 8.69792472516853e-06,
+      "loss": 0.9829,
+      "step": 23630
+    },
+    {
+      "epoch": 0.9349601534536969,
+      "grad_norm": 1.2571801568986372,
+      "learning_rate": 8.696375619428976e-06,
+      "loss": 0.9951,
+      "step": 23640
+    },
+    {
+      "epoch": 0.935355652672586,
+      "grad_norm": 1.0088603359783315,
+      "learning_rate": 8.69482573086039e-06,
+      "loss": 0.9786,
+      "step": 23650
+    },
+    {
+      "epoch": 0.9357511518914751,
+      "grad_norm": 1.0623255392530664,
+      "learning_rate": 8.693275059791016e-06,
+      "loss": 0.9814,
+      "step": 23660
+    },
+    {
+      "epoch": 0.9361466511103641,
+      "grad_norm": 0.959154552157258,
+      "learning_rate": 8.691723606549256e-06,
+      "loss": 0.9791,
+      "step": 23670
+    },
+    {
+      "epoch": 0.9365421503292531,
+      "grad_norm": 1.0706723029268512,
+      "learning_rate": 8.690171371463684e-06,
+      "loss": 0.9888,
+      "step": 23680
+    },
+    {
+      "epoch": 0.9369376495481422,
+      "grad_norm": 1.0728027680501875,
+      "learning_rate": 8.688618354863038e-06,
+      "loss": 0.9848,
+      "step": 23690
+    },
+    {
+      "epoch": 0.9373331487670312,
+      "grad_norm": 1.0981777666780366,
+      "learning_rate": 8.687064557076217e-06,
+      "loss": 0.9624,
+      "step": 23700
+    },
+    {
+      "epoch": 0.9377286479859203,
+      "grad_norm": 1.1610255849448627,
+      "learning_rate": 8.685509978432292e-06,
+      "loss": 0.9919,
+      "step": 23710
+    },
+    {
+      "epoch": 0.9381241472048093,
+      "grad_norm": 1.1192051968153376,
+      "learning_rate": 8.683954619260493e-06,
+      "loss": 1.0051,
+      "step": 23720
+    },
+    {
+      "epoch": 0.9385196464236983,
+      "grad_norm": 1.0934737964644337,
+      "learning_rate": 8.682398479890219e-06,
+      "loss": 0.9745,
+      "step": 23730
+    },
+    {
+      "epoch": 0.9389151456425874,
+      "grad_norm": 1.0796311371704077,
+      "learning_rate": 8.680841560651037e-06,
+      "loss": 0.9927,
+      "step": 23740
+    },
+    {
+      "epoch": 0.9393106448614764,
+      "grad_norm": 1.030331097033002,
+      "learning_rate": 8.679283861872672e-06,
+      "loss": 0.9906,
+      "step": 23750
+    },
+    {
+      "epoch": 0.9397061440803655,
+      "grad_norm": 1.070847869700182,
+      "learning_rate": 8.67772538388502e-06,
+      "loss": 0.9945,
+      "step": 23760
+    },
+    {
+      "epoch": 0.9401016432992545,
+      "grad_norm": 1.1692486255761738,
+      "learning_rate": 8.676166127018137e-06,
+      "loss": 0.9874,
+      "step": 23770
+    },
+    {
+      "epoch": 0.9404971425181435,
+      "grad_norm": 1.172331981822452,
+      "learning_rate": 8.67460609160225e-06,
+      "loss": 0.9917,
+      "step": 23780
+    },
+    {
+      "epoch": 0.9408926417370326,
+      "grad_norm": 1.0936880809301424,
+      "learning_rate": 8.67304527796775e-06,
+      "loss": 0.9775,
+      "step": 23790
+    },
+    {
+      "epoch": 0.9412881409559216,
+      "grad_norm": 1.043218347758308,
+      "learning_rate": 8.671483686445184e-06,
+      "loss": 0.9932,
+      "step": 23800
+    },
+    {
+      "epoch": 0.9416836401748107,
+      "grad_norm": 1.0317258621196275,
+      "learning_rate": 8.669921317365274e-06,
+      "loss": 0.9591,
+      "step": 23810
+    },
+    {
+      "epoch": 0.9420791393936997,
+      "grad_norm": 1.1741181931341496,
+      "learning_rate": 8.668358171058903e-06,
+      "loss": 0.9728,
+      "step": 23820
+    },
+    {
+      "epoch": 0.9424746386125887,
+      "grad_norm": 1.1974144409006249,
+      "learning_rate": 8.66679424785712e-06,
+      "loss": 0.9783,
+      "step": 23830
+    },
+    {
+      "epoch": 0.9428701378314778,
+      "grad_norm": 1.0861471357077015,
+      "learning_rate": 8.665229548091134e-06,
+      "loss": 0.9693,
+      "step": 23840
+    },
+    {
+      "epoch": 0.9432656370503668,
+      "grad_norm": 0.9479709508096992,
+      "learning_rate": 8.663664072092324e-06,
+      "loss": 0.99,
+      "step": 23850
+    },
+    {
+      "epoch": 0.9436611362692559,
+      "grad_norm": 1.1996719805140812,
+      "learning_rate": 8.66209782019223e-06,
+      "loss": 0.9642,
+      "step": 23860
+    },
+    {
+      "epoch": 0.9440566354881449,
+      "grad_norm": 1.1358990074467126,
+      "learning_rate": 8.660530792722555e-06,
+      "loss": 0.9717,
+      "step": 23870
+    },
+    {
+      "epoch": 0.9444521347070339,
+      "grad_norm": 1.0318011536159686,
+      "learning_rate": 8.658962990015174e-06,
+      "loss": 0.9931,
+      "step": 23880
+    },
+    {
+      "epoch": 0.944847633925923,
+      "grad_norm": 0.9843461295627615,
+      "learning_rate": 8.657394412402115e-06,
+      "loss": 0.9715,
+      "step": 23890
+    },
+    {
+      "epoch": 0.945243133144812,
+      "grad_norm": 1.0198154769378152,
+      "learning_rate": 8.655825060215582e-06,
+      "loss": 0.9822,
+      "step": 23900
+    },
+    {
+      "epoch": 0.945638632363701,
+      "grad_norm": 1.1072586596436313,
+      "learning_rate": 8.65425493378793e-06,
+      "loss": 0.9825,
+      "step": 23910
+    },
+    {
+      "epoch": 0.9460341315825901,
+      "grad_norm": 1.0291072786851772,
+      "learning_rate": 8.652684033451693e-06,
+      "loss": 0.9724,
+      "step": 23920
+    },
+    {
+      "epoch": 0.9464296308014791,
+      "grad_norm": 1.0030405398765163,
+      "learning_rate": 8.651112359539554e-06,
+      "loss": 0.9692,
+      "step": 23930
+    },
+    {
+      "epoch": 0.9468251300203682,
+      "grad_norm": 0.9796792892649463,
+      "learning_rate": 8.649539912384367e-06,
+      "loss": 0.9793,
+      "step": 23940
+    },
+    {
+      "epoch": 0.9472206292392572,
+      "grad_norm": 1.0989814736494155,
+      "learning_rate": 8.647966692319152e-06,
+      "loss": 0.9675,
+      "step": 23950
+    },
+    {
+      "epoch": 0.9476161284581462,
+      "grad_norm": 1.043570875984775,
+      "learning_rate": 8.646392699677089e-06,
+      "loss": 0.9921,
+      "step": 23960
+    },
+    {
+      "epoch": 0.9480116276770353,
+      "grad_norm": 0.9667944501064308,
+      "learning_rate": 8.644817934791526e-06,
+      "loss": 0.9578,
+      "step": 23970
+    },
+    {
+      "epoch": 0.9484071268959244,
+      "grad_norm": 1.2399687314982235,
+      "learning_rate": 8.643242397995964e-06,
+      "loss": 0.9816,
+      "step": 23980
+    },
+    {
+      "epoch": 0.9488026261148135,
+      "grad_norm": 1.0944353925235228,
+      "learning_rate": 8.641666089624081e-06,
+      "loss": 0.9823,
+      "step": 23990
+    },
+    {
+      "epoch": 0.9491981253337025,
+      "grad_norm": 1.0536797595473253,
+      "learning_rate": 8.640089010009709e-06,
+      "loss": 0.9935,
+      "step": 24000
+    },
+    {
+      "epoch": 0.9495936245525916,
+      "grad_norm": 1.0801568145664902,
+      "learning_rate": 8.638511159486848e-06,
+      "loss": 0.9915,
+      "step": 24010
+    },
+    {
+      "epoch": 0.9499891237714806,
+      "grad_norm": 1.0780547137127836,
+      "learning_rate": 8.63693253838966e-06,
+      "loss": 0.9813,
+      "step": 24020
+    },
+    {
+      "epoch": 0.9503846229903696,
+      "grad_norm": 1.1720263913753735,
+      "learning_rate": 8.635353147052467e-06,
+      "loss": 0.9906,
+      "step": 24030
+    },
+    {
+      "epoch": 0.9507801222092587,
+      "grad_norm": 0.9704723456162168,
+      "learning_rate": 8.63377298580976e-06,
+      "loss": 0.9928,
+      "step": 24040
+    },
+    {
+      "epoch": 0.9511756214281477,
+      "grad_norm": 1.0667577133791284,
+      "learning_rate": 8.632192054996189e-06,
+      "loss": 0.9732,
+      "step": 24050
+    },
+    {
+      "epoch": 0.9515711206470368,
+      "grad_norm": 1.0962143236501385,
+      "learning_rate": 8.630610354946569e-06,
+      "loss": 0.9815,
+      "step": 24060
+    },
+    {
+      "epoch": 0.9519666198659258,
+      "grad_norm": 1.1299046191477977,
+      "learning_rate": 8.629027885995874e-06,
+      "loss": 0.9832,
+      "step": 24070
+    },
+    {
+      "epoch": 0.9523621190848148,
+      "grad_norm": 1.176366568083045,
+      "learning_rate": 8.627444648479248e-06,
+      "loss": 0.9848,
+      "step": 24080
+    },
+    {
+      "epoch": 0.9527576183037039,
+      "grad_norm": 1.063162096802908,
+      "learning_rate": 8.62586064273199e-06,
+      "loss": 0.9888,
+      "step": 24090
+    },
+    {
+      "epoch": 0.9531531175225929,
+      "grad_norm": 0.9984923004865964,
+      "learning_rate": 8.624275869089568e-06,
+      "loss": 0.994,
+      "step": 24100
+    },
+    {
+      "epoch": 0.953548616741482,
+      "grad_norm": 1.0155433138591725,
+      "learning_rate": 8.622690327887608e-06,
+      "loss": 0.9878,
+      "step": 24110
+    },
+    {
+      "epoch": 0.953944115960371,
+      "grad_norm": 1.083293640402409,
+      "learning_rate": 8.6211040194619e-06,
+      "loss": 0.9767,
+      "step": 24120
+    },
+    {
+      "epoch": 0.95433961517926,
+      "grad_norm": 1.0392009909154236,
+      "learning_rate": 8.6195169441484e-06,
+      "loss": 0.9729,
+      "step": 24130
+    },
+    {
+      "epoch": 0.9547351143981491,
+      "grad_norm": 1.2102955927932995,
+      "learning_rate": 8.617929102283222e-06,
+      "loss": 1.0018,
+      "step": 24140
+    },
+    {
+      "epoch": 0.9551306136170381,
+      "grad_norm": 0.9991388656786047,
+      "learning_rate": 8.616340494202642e-06,
+      "loss": 0.9835,
+      "step": 24150
+    },
+    {
+      "epoch": 0.9555261128359271,
+      "grad_norm": 1.0697285322514845,
+      "learning_rate": 8.614751120243102e-06,
+      "loss": 0.9757,
+      "step": 24160
+    },
+    {
+      "epoch": 0.9559216120548162,
+      "grad_norm": 0.9300497076805125,
+      "learning_rate": 8.613160980741202e-06,
+      "loss": 0.9572,
+      "step": 24170
+    },
+    {
+      "epoch": 0.9563171112737052,
+      "grad_norm": 1.153186051041131,
+      "learning_rate": 8.611570076033708e-06,
+      "loss": 0.9805,
+      "step": 24180
+    },
+    {
+      "epoch": 0.9567126104925943,
+      "grad_norm": 1.0006879664570016,
+      "learning_rate": 8.609978406457547e-06,
+      "loss": 0.9948,
+      "step": 24190
+    },
+    {
+      "epoch": 0.9571081097114833,
+      "grad_norm": 1.0861062914435131,
+      "learning_rate": 8.608385972349806e-06,
+      "loss": 0.9758,
+      "step": 24200
+    },
+    {
+      "epoch": 0.9575036089303723,
+      "grad_norm": 1.167037693779475,
+      "learning_rate": 8.606792774047735e-06,
+      "loss": 0.9654,
+      "step": 24210
+    },
+    {
+      "epoch": 0.9578991081492614,
+      "grad_norm": 1.149283586498159,
+      "learning_rate": 8.605198811888747e-06,
+      "loss": 0.9926,
+      "step": 24220
+    },
+    {
+      "epoch": 0.9582946073681504,
+      "grad_norm": 0.9599030301682894,
+      "learning_rate": 8.603604086210415e-06,
+      "loss": 0.9914,
+      "step": 24230
+    },
+    {
+      "epoch": 0.9586901065870395,
+      "grad_norm": 1.0951540139118099,
+      "learning_rate": 8.602008597350477e-06,
+      "loss": 0.9833,
+      "step": 24240
+    },
+    {
+      "epoch": 0.9590856058059285,
+      "grad_norm": 1.1108657195436034,
+      "learning_rate": 8.600412345646827e-06,
+      "loss": 0.9634,
+      "step": 24250
+    },
+    {
+      "epoch": 0.9594811050248175,
+      "grad_norm": 1.0971766303144515,
+      "learning_rate": 8.598815331437525e-06,
+      "loss": 0.9856,
+      "step": 24260
+    },
+    {
+      "epoch": 0.9598766042437066,
+      "grad_norm": 1.124539805228262,
+      "learning_rate": 8.597217555060791e-06,
+      "loss": 0.9611,
+      "step": 24270
+    },
+    {
+      "epoch": 0.9602721034625956,
+      "grad_norm": 1.0741751479171762,
+      "learning_rate": 8.595619016855008e-06,
+      "loss": 0.986,
+      "step": 24280
+    },
+    {
+      "epoch": 0.9606676026814847,
+      "grad_norm": 1.001879046646722,
+      "learning_rate": 8.594019717158718e-06,
+      "loss": 0.9743,
+      "step": 24290
+    },
+    {
+      "epoch": 0.9610631019003737,
+      "grad_norm": 1.1423274618036203,
+      "learning_rate": 8.592419656310626e-06,
+      "loss": 0.9738,
+      "step": 24300
+    },
+    {
+      "epoch": 0.9614586011192627,
+      "grad_norm": 1.0571012396736137,
+      "learning_rate": 8.590818834649595e-06,
+      "loss": 0.9812,
+      "step": 24310
+    },
+    {
+      "epoch": 0.9618541003381519,
+      "grad_norm": 1.183819711378045,
+      "learning_rate": 8.589217252514654e-06,
+      "loss": 0.9813,
+      "step": 24320
+    },
+    {
+      "epoch": 0.9622495995570409,
+      "grad_norm": 1.1489117380124345,
+      "learning_rate": 8.58761491024499e-06,
+      "loss": 0.9657,
+      "step": 24330
+    },
+    {
+      "epoch": 0.96264509877593,
+      "grad_norm": 1.0566611151640706,
+      "learning_rate": 8.586011808179953e-06,
+      "loss": 0.9893,
+      "step": 24340
+    },
+    {
+      "epoch": 0.963040597994819,
+      "grad_norm": 1.14223900394507,
+      "learning_rate": 8.58440794665905e-06,
+      "loss": 0.9692,
+      "step": 24350
+    },
+    {
+      "epoch": 0.963436097213708,
+      "grad_norm": 1.0985227241314044,
+      "learning_rate": 8.582803326021953e-06,
+      "loss": 0.9698,
+      "step": 24360
+    },
+    {
+      "epoch": 0.9638315964325971,
+      "grad_norm": 1.0039282774950415,
+      "learning_rate": 8.581197946608492e-06,
+      "loss": 0.9597,
+      "step": 24370
+    },
+    {
+      "epoch": 0.9642270956514861,
+      "grad_norm": 1.1665420747024093,
+      "learning_rate": 8.579591808758661e-06,
+      "loss": 0.9658,
+      "step": 24380
+    },
+    {
+      "epoch": 0.9646225948703752,
+      "grad_norm": 1.119336610269564,
+      "learning_rate": 8.57798491281261e-06,
+      "loss": 0.9712,
+      "step": 24390
+    },
+    {
+      "epoch": 0.9650180940892642,
+      "grad_norm": 1.0942144775036178,
+      "learning_rate": 8.576377259110655e-06,
+      "loss": 0.9634,
+      "step": 24400
+    },
+    {
+      "epoch": 0.9654135933081532,
+      "grad_norm": 1.2251357057638173,
+      "learning_rate": 8.574768847993266e-06,
+      "loss": 0.9811,
+      "step": 24410
+    },
+    {
+      "epoch": 0.9658090925270423,
+      "grad_norm": 1.0900044430802518,
+      "learning_rate": 8.57315967980108e-06,
+      "loss": 0.9531,
+      "step": 24420
+    },
+    {
+      "epoch": 0.9662045917459313,
+      "grad_norm": 1.0441010310095726,
+      "learning_rate": 8.571549754874888e-06,
+      "loss": 0.9768,
+      "step": 24430
+    },
+    {
+      "epoch": 0.9666000909648204,
+      "grad_norm": 1.1353781094471103,
+      "learning_rate": 8.569939073555649e-06,
+      "loss": 0.9868,
+      "step": 24440
+    },
+    {
+      "epoch": 0.9669955901837094,
+      "grad_norm": 1.1748225953794131,
+      "learning_rate": 8.568327636184472e-06,
+      "loss": 0.961,
+      "step": 24450
+    },
+    {
+      "epoch": 0.9673910894025984,
+      "grad_norm": 1.0340001923572182,
+      "learning_rate": 8.566715443102638e-06,
+      "loss": 0.9923,
+      "step": 24460
+    },
+    {
+      "epoch": 0.9677865886214875,
+      "grad_norm": 1.15050439192543,
+      "learning_rate": 8.565102494651575e-06,
+      "loss": 0.9606,
+      "step": 24470
+    },
+    {
+      "epoch": 0.9681820878403765,
+      "grad_norm": 1.0292956727015647,
+      "learning_rate": 8.563488791172885e-06,
+      "loss": 0.9568,
+      "step": 24480
+    },
+    {
+      "epoch": 0.9685775870592656,
+      "grad_norm": 1.0981091313247309,
+      "learning_rate": 8.561874333008317e-06,
+      "loss": 0.9501,
+      "step": 24490
+    },
+    {
+      "epoch": 0.9689730862781546,
+      "grad_norm": 1.0658403921725377,
+      "learning_rate": 8.56025912049979e-06,
+      "loss": 0.9939,
+      "step": 24500
+    },
+    {
+      "epoch": 0.9693685854970436,
+      "grad_norm": 1.0464554939391117,
+      "learning_rate": 8.558643153989376e-06,
+      "loss": 0.9719,
+      "step": 24510
+    },
+    {
+      "epoch": 0.9697640847159327,
+      "grad_norm": 0.9584950091854825,
+      "learning_rate": 8.557026433819309e-06,
+      "loss": 0.9841,
+      "step": 24520
+    },
+    {
+      "epoch": 0.9701595839348217,
+      "grad_norm": 1.0516021723383688,
+      "learning_rate": 8.555408960331984e-06,
+      "loss": 0.9923,
+      "step": 24530
+    },
+    {
+      "epoch": 0.9705550831537108,
+      "grad_norm": 0.9971277263320242,
+      "learning_rate": 8.55379073386995e-06,
+      "loss": 0.9893,
+      "step": 24540
+    },
+    {
+      "epoch": 0.9709505823725998,
+      "grad_norm": 0.9255117927107546,
+      "learning_rate": 8.552171754775926e-06,
+      "loss": 0.9746,
+      "step": 24550
+    },
+    {
+      "epoch": 0.9713460815914888,
+      "grad_norm": 1.2434192573745426,
+      "learning_rate": 8.55055202339278e-06,
+      "loss": 0.973,
+      "step": 24560
+    },
+    {
+      "epoch": 0.9717415808103779,
+      "grad_norm": 1.0887798133883928,
+      "learning_rate": 8.548931540063544e-06,
+      "loss": 0.9661,
+      "step": 24570
+    },
+    {
+      "epoch": 0.9721370800292669,
+      "grad_norm": 1.0779137810621702,
+      "learning_rate": 8.547310305131408e-06,
+      "loss": 0.9641,
+      "step": 24580
+    },
+    {
+      "epoch": 0.972532579248156,
+      "grad_norm": 1.172646876455584,
+      "learning_rate": 8.545688318939722e-06,
+      "loss": 0.9662,
+      "step": 24590
+    },
+    {
+      "epoch": 0.972928078467045,
+      "grad_norm": 0.9646992412708512,
+      "learning_rate": 8.544065581831998e-06,
+      "loss": 0.9756,
+      "step": 24600
+    },
+    {
+      "epoch": 0.973323577685934,
+      "grad_norm": 1.003010231878538,
+      "learning_rate": 8.5424420941519e-06,
+      "loss": 0.9759,
+      "step": 24610
+    },
+    {
+      "epoch": 0.9737190769048231,
+      "grad_norm": 1.073681049884641,
+      "learning_rate": 8.540817856243256e-06,
+      "loss": 0.9859,
+      "step": 24620
+    },
+    {
+      "epoch": 0.9741145761237121,
+      "grad_norm": 1.0940896551155155,
+      "learning_rate": 8.539192868450051e-06,
+      "loss": 0.9715,
+      "step": 24630
+    },
+    {
+      "epoch": 0.9745100753426011,
+      "grad_norm": 1.1119498644474028,
+      "learning_rate": 8.537567131116432e-06,
+      "loss": 0.9851,
+      "step": 24640
+    },
+    {
+      "epoch": 0.9749055745614903,
+      "grad_norm": 1.2037715625381082,
+      "learning_rate": 8.5359406445867e-06,
+      "loss": 0.9765,
+      "step": 24650
+    },
+    {
+      "epoch": 0.9753010737803793,
+      "grad_norm": 1.1158633096753356,
+      "learning_rate": 8.53431340920532e-06,
+      "loss": 0.9851,
+      "step": 24660
+    },
+    {
+      "epoch": 0.9756965729992684,
+      "grad_norm": 0.9851811030875438,
+      "learning_rate": 8.53268542531691e-06,
+      "loss": 0.9832,
+      "step": 24670
+    },
+    {
+      "epoch": 0.9760920722181574,
+      "grad_norm": 1.0082758887117738,
+      "learning_rate": 8.531056693266247e-06,
+      "loss": 0.9912,
+      "step": 24680
+    },
+    {
+      "epoch": 0.9764875714370465,
+      "grad_norm": 1.0469525366784656,
+      "learning_rate": 8.529427213398273e-06,
+      "loss": 0.9874,
+      "step": 24690
+    },
+    {
+      "epoch": 0.9768830706559355,
+      "grad_norm": 1.0626592652464433,
+      "learning_rate": 8.527796986058082e-06,
+      "loss": 0.9743,
+      "step": 24700
+    },
+    {
+      "epoch": 0.9772785698748245,
+      "grad_norm": 0.9594083953456605,
+      "learning_rate": 8.526166011590926e-06,
+      "loss": 0.9688,
+      "step": 24710
+    },
+    {
+      "epoch": 0.9776740690937136,
+      "grad_norm": 1.0130924977436224,
+      "learning_rate": 8.524534290342224e-06,
+      "loss": 0.9663,
+      "step": 24720
+    },
+    {
+      "epoch": 0.9780695683126026,
+      "grad_norm": 1.256564726703701,
+      "learning_rate": 8.522901822657538e-06,
+      "loss": 0.9711,
+      "step": 24730
+    },
+    {
+      "epoch": 0.9784650675314917,
+      "grad_norm": 0.9845687923933368,
+      "learning_rate": 8.521268608882602e-06,
+      "loss": 0.9935,
+      "step": 24740
+    },
+    {
+      "epoch": 0.9788605667503807,
+      "grad_norm": 1.0044882677272104,
+      "learning_rate": 8.519634649363305e-06,
+      "loss": 0.9772,
+      "step": 24750
+    },
+    {
+      "epoch": 0.9792560659692697,
+      "grad_norm": 0.9990586224870897,
+      "learning_rate": 8.517999944445684e-06,
+      "loss": 0.9783,
+      "step": 24760
+    },
+    {
+      "epoch": 0.9796515651881588,
+      "grad_norm": 1.0824122734800394,
+      "learning_rate": 8.516364494475947e-06,
+      "loss": 0.9742,
+      "step": 24770
+    },
+    {
+      "epoch": 0.9800470644070478,
+      "grad_norm": 1.0743659396923315,
+      "learning_rate": 8.514728299800456e-06,
+      "loss": 0.9874,
+      "step": 24780
+    },
+    {
+      "epoch": 0.9804425636259368,
+      "grad_norm": 1.1588839163241136,
+      "learning_rate": 8.513091360765724e-06,
+      "loss": 0.9568,
+      "step": 24790
+    },
+    {
+      "epoch": 0.9808380628448259,
+      "grad_norm": 1.0525018969474194,
+      "learning_rate": 8.511453677718428e-06,
+      "loss": 0.9588,
+      "step": 24800
+    },
+    {
+      "epoch": 0.9812335620637149,
+      "grad_norm": 1.122657093279453,
+      "learning_rate": 8.509815251005402e-06,
+      "loss": 0.9544,
+      "step": 24810
+    },
+    {
+      "epoch": 0.981629061282604,
+      "grad_norm": 1.0003572985318947,
+      "learning_rate": 8.508176080973636e-06,
+      "loss": 0.9693,
+      "step": 24820
+    },
+    {
+      "epoch": 0.982024560501493,
+      "grad_norm": 1.0647895252488284,
+      "learning_rate": 8.506536167970282e-06,
+      "loss": 0.9867,
+      "step": 24830
+    },
+    {
+      "epoch": 0.982420059720382,
+      "grad_norm": 1.0339369710015192,
+      "learning_rate": 8.504895512342639e-06,
+      "loss": 0.9844,
+      "step": 24840
+    },
+    {
+      "epoch": 0.9828155589392711,
+      "grad_norm": 1.0970746775487668,
+      "learning_rate": 8.503254114438176e-06,
+      "loss": 0.9542,
+      "step": 24850
+    },
+    {
+      "epoch": 0.9832110581581601,
+      "grad_norm": 1.166215584521932,
+      "learning_rate": 8.501611974604507e-06,
+      "loss": 0.9581,
+      "step": 24860
+    },
+    {
+      "epoch": 0.9836065573770492,
+      "grad_norm": 0.9409443143951971,
+      "learning_rate": 8.499969093189413e-06,
+      "loss": 0.9698,
+      "step": 24870
+    },
+    {
+      "epoch": 0.9840020565959382,
+      "grad_norm": 0.9956033294425854,
+      "learning_rate": 8.498325470540829e-06,
+      "loss": 0.9549,
+      "step": 24880
+    },
+    {
+      "epoch": 0.9843975558148272,
+      "grad_norm": 1.1131349280853753,
+      "learning_rate": 8.49668110700684e-06,
+      "loss": 0.9797,
+      "step": 24890
+    },
+    {
+      "epoch": 0.9847930550337163,
+      "grad_norm": 0.9820467687229171,
+      "learning_rate": 8.495036002935704e-06,
+      "loss": 0.9807,
+      "step": 24900
+    },
+    {
+      "epoch": 0.9851885542526053,
+      "grad_norm": 1.0392446418209116,
+      "learning_rate": 8.493390158675815e-06,
+      "loss": 0.9869,
+      "step": 24910
+    },
+    {
+      "epoch": 0.9855840534714944,
+      "grad_norm": 1.0887242468295657,
+      "learning_rate": 8.491743574575743e-06,
+      "loss": 0.9764,
+      "step": 24920
+    },
+    {
+      "epoch": 0.9859795526903834,
+      "grad_norm": 0.9735300480010712,
+      "learning_rate": 8.490096250984203e-06,
+      "loss": 0.9906,
+      "step": 24930
+    },
+    {
+      "epoch": 0.9863750519092724,
+      "grad_norm": 1.1269026560005961,
+      "learning_rate": 8.488448188250068e-06,
+      "loss": 0.9725,
+      "step": 24940
+    },
+    {
+      "epoch": 0.9867705511281615,
+      "grad_norm": 1.2244448423161338,
+      "learning_rate": 8.486799386722372e-06,
+      "loss": 0.9834,
+      "step": 24950
+    },
+    {
+      "epoch": 0.9871660503470505,
+      "grad_norm": 1.1517443881734524,
+      "learning_rate": 8.485149846750304e-06,
+      "loss": 0.995,
+      "step": 24960
+    },
+    {
+      "epoch": 0.9875615495659396,
+      "grad_norm": 0.9742346488245709,
+      "learning_rate": 8.483499568683206e-06,
+      "loss": 0.9619,
+      "step": 24970
+    },
+    {
+      "epoch": 0.9879570487848286,
+      "grad_norm": 1.116800309731723,
+      "learning_rate": 8.48184855287058e-06,
+      "loss": 0.9678,
+      "step": 24980
+    },
+    {
+      "epoch": 0.9883525480037177,
+      "grad_norm": 1.2221451131700947,
+      "learning_rate": 8.480196799662082e-06,
+      "loss": 0.9789,
+      "step": 24990
+    },
+    {
+      "epoch": 0.9887480472226068,
+      "grad_norm": 1.1721500740274573,
+      "learning_rate": 8.478544309407524e-06,
+      "loss": 0.9934,
+      "step": 25000
+    },
+    {
+      "epoch": 0.9891435464414958,
+      "grad_norm": 1.0531231798318392,
+      "learning_rate": 8.476891082456877e-06,
+      "loss": 0.9677,
+      "step": 25010
+    },
+    {
+      "epoch": 0.9895390456603849,
+      "grad_norm": 1.0300403205195796,
+      "learning_rate": 8.475237119160267e-06,
+      "loss": 0.9681,
+      "step": 25020
+    },
+    {
+      "epoch": 0.9899345448792739,
+      "grad_norm": 1.083815401921792,
+      "learning_rate": 8.473582419867971e-06,
+      "loss": 0.9817,
+      "step": 25030
+    },
+    {
+      "epoch": 0.9903300440981629,
+      "grad_norm": 1.098402138660783,
+      "learning_rate": 8.47192698493043e-06,
+      "loss": 0.9798,
+      "step": 25040
+    },
+    {
+      "epoch": 0.990725543317052,
+      "grad_norm": 1.2358762454533718,
+      "learning_rate": 8.470270814698234e-06,
+      "loss": 0.9798,
+      "step": 25050
+    },
+    {
+      "epoch": 0.991121042535941,
+      "grad_norm": 1.0047991855222802,
+      "learning_rate": 8.468613909522135e-06,
+      "loss": 0.9715,
+      "step": 25060
+    },
+    {
+      "epoch": 0.9915165417548301,
+      "grad_norm": 1.1436589937445054,
+      "learning_rate": 8.466956269753033e-06,
+      "loss": 0.9625,
+      "step": 25070
+    },
+    {
+      "epoch": 0.9919120409737191,
+      "grad_norm": 1.135032816869507,
+      "learning_rate": 8.465297895741989e-06,
+      "loss": 0.9725,
+      "step": 25080
+    },
+    {
+      "epoch": 0.9923075401926081,
+      "grad_norm": 1.2465170306999038,
+      "learning_rate": 8.46363878784022e-06,
+      "loss": 0.9682,
+      "step": 25090
+    },
+    {
+      "epoch": 0.9927030394114972,
+      "grad_norm": 0.9622293138397663,
+      "learning_rate": 8.461978946399097e-06,
+      "loss": 0.9597,
+      "step": 25100
+    },
+    {
+      "epoch": 0.9930985386303862,
+      "grad_norm": 0.9983130861800154,
+      "learning_rate": 8.460318371770142e-06,
+      "loss": 0.9671,
+      "step": 25110
+    },
+    {
+      "epoch": 0.9934940378492753,
+      "grad_norm": 1.100857497232352,
+      "learning_rate": 8.45865706430504e-06,
+      "loss": 0.9736,
+      "step": 25120
+    },
+    {
+      "epoch": 0.9938895370681643,
+      "grad_norm": 1.0431628846721765,
+      "learning_rate": 8.456995024355626e-06,
+      "loss": 0.9754,
+      "step": 25130
+    },
+    {
+      "epoch": 0.9942850362870533,
+      "grad_norm": 1.1679608148087055,
+      "learning_rate": 8.45533225227389e-06,
+      "loss": 0.9899,
+      "step": 25140
+    },
+    {
+      "epoch": 0.9946805355059424,
+      "grad_norm": 1.2493484344888548,
+      "learning_rate": 8.453668748411982e-06,
+      "loss": 0.9602,
+      "step": 25150
+    },
+    {
+      "epoch": 0.9950760347248314,
+      "grad_norm": 1.0460490783985266,
+      "learning_rate": 8.452004513122203e-06,
+      "loss": 0.9846,
+      "step": 25160
+    },
+    {
+      "epoch": 0.9954715339437205,
+      "grad_norm": 1.1157475160160253,
+      "learning_rate": 8.450339546757007e-06,
+      "loss": 0.978,
+      "step": 25170
+    },
+    {
+      "epoch": 0.9958670331626095,
+      "grad_norm": 1.047670583168386,
+      "learning_rate": 8.448673849669007e-06,
+      "loss": 0.9714,
+      "step": 25180
+    },
+    {
+      "epoch": 0.9962625323814985,
+      "grad_norm": 1.0795638452773237,
+      "learning_rate": 8.447007422210969e-06,
+      "loss": 0.9727,
+      "step": 25190
+    },
+    {
+      "epoch": 0.9966580316003876,
+      "grad_norm": 1.182375761433743,
+      "learning_rate": 8.445340264735816e-06,
+      "loss": 0.9726,
+      "step": 25200
+    },
+    {
+      "epoch": 0.9970535308192766,
+      "grad_norm": 1.1092415996793215,
+      "learning_rate": 8.443672377596619e-06,
+      "loss": 0.9676,
+      "step": 25210
+    },
+    {
+      "epoch": 0.9974490300381657,
+      "grad_norm": 1.0483620430525118,
+      "learning_rate": 8.442003761146608e-06,
+      "loss": 0.9462,
+      "step": 25220
+    },
+    {
+      "epoch": 0.9978445292570547,
+      "grad_norm": 1.0521489228971246,
+      "learning_rate": 8.440334415739174e-06,
+      "loss": 0.9664,
+      "step": 25230
+    },
+    {
+      "epoch": 0.9982400284759437,
+      "grad_norm": 1.0559306373608404,
+      "learning_rate": 8.438664341727847e-06,
+      "loss": 0.9725,
+      "step": 25240
+    },
+    {
+      "epoch": 0.9986355276948328,
+      "grad_norm": 1.294081154449968,
+      "learning_rate": 8.436993539466327e-06,
+      "loss": 0.9673,
+      "step": 25250
+    },
+    {
+      "epoch": 0.9990310269137218,
+      "grad_norm": 1.285149072661124,
+      "learning_rate": 8.435322009308457e-06,
+      "loss": 0.9432,
+      "step": 25260
+    },
+    {
+      "epoch": 0.9994265261326108,
+      "grad_norm": 1.0958739312464687,
+      "learning_rate": 8.433649751608242e-06,
+      "loss": 0.9786,
+      "step": 25270
+    },
+    {
+      "epoch": 0.9998220253514999,
+      "grad_norm": 1.0560425402901619,
+      "learning_rate": 8.431976766719834e-06,
+      "loss": 0.9726,
+      "step": 25280
+    },
+    {
+      "epoch": 1.000217524570389,
+      "grad_norm": 1.1122063131120121,
+      "learning_rate": 8.430303054997544e-06,
+      "loss": 0.9292,
+      "step": 25290
+    },
+    {
+      "epoch": 1.000613023789278,
+      "grad_norm": 1.0965049679869467,
+      "learning_rate": 8.428628616795835e-06,
+      "loss": 0.906,
+      "step": 25300
+    },
+    {
+      "epoch": 1.0010085230081671,
+      "grad_norm": 1.1176026427279113,
+      "learning_rate": 8.426953452469326e-06,
+      "loss": 0.9058,
+      "step": 25310
+    },
+    {
+      "epoch": 1.0014040222270562,
+      "grad_norm": 1.2102590984394073,
+      "learning_rate": 8.425277562372786e-06,
+      "loss": 0.9061,
+      "step": 25320
+    },
+    {
+      "epoch": 1.0017995214459452,
+      "grad_norm": 1.044705965380849,
+      "learning_rate": 8.423600946861144e-06,
+      "loss": 0.892,
+      "step": 25330
+    },
+    {
+      "epoch": 1.0021950206648342,
+      "grad_norm": 1.1302544296221577,
+      "learning_rate": 8.421923606289473e-06,
+      "loss": 0.8991,
+      "step": 25340
+    },
+    {
+      "epoch": 1.0025905198837233,
+      "grad_norm": 1.0912614317007048,
+      "learning_rate": 8.420245541013006e-06,
+      "loss": 0.9054,
+      "step": 25350
+    },
+    {
+      "epoch": 1.0029860191026123,
+      "grad_norm": 1.0532253533864584,
+      "learning_rate": 8.41856675138713e-06,
+      "loss": 0.915,
+      "step": 25360
+    },
+    {
+      "epoch": 1.0033815183215014,
+      "grad_norm": 1.1055684310333207,
+      "learning_rate": 8.416887237767385e-06,
+      "loss": 0.8978,
+      "step": 25370
+    },
+    {
+      "epoch": 1.0037770175403904,
+      "grad_norm": 1.108742767866052,
+      "learning_rate": 8.415207000509461e-06,
+      "loss": 0.9115,
+      "step": 25380
+    },
+    {
+      "epoch": 1.0041725167592794,
+      "grad_norm": 1.1769804228345317,
+      "learning_rate": 8.413526039969204e-06,
+      "loss": 0.8913,
+      "step": 25390
+    },
+    {
+      "epoch": 1.0045680159781685,
+      "grad_norm": 1.0650812806565177,
+      "learning_rate": 8.411844356502615e-06,
+      "loss": 0.8955,
+      "step": 25400
+    },
+    {
+      "epoch": 1.0049635151970575,
+      "grad_norm": 1.2571698043878294,
+      "learning_rate": 8.41016195046584e-06,
+      "loss": 0.8945,
+      "step": 25410
+    },
+    {
+      "epoch": 1.0053590144159466,
+      "grad_norm": 1.1928729441937398,
+      "learning_rate": 8.408478822215191e-06,
+      "loss": 0.8842,
+      "step": 25420
+    },
+    {
+      "epoch": 1.0057545136348356,
+      "grad_norm": 1.1196771377639678,
+      "learning_rate": 8.406794972107119e-06,
+      "loss": 0.9332,
+      "step": 25430
+    },
+    {
+      "epoch": 1.0061500128537246,
+      "grad_norm": 1.0632532577603537,
+      "learning_rate": 8.405110400498239e-06,
+      "loss": 0.8966,
+      "step": 25440
+    },
+    {
+      "epoch": 1.0065455120726137,
+      "grad_norm": 0.9924201140796625,
+      "learning_rate": 8.403425107745315e-06,
+      "loss": 0.9148,
+      "step": 25450
+    },
+    {
+      "epoch": 1.0069410112915027,
+      "grad_norm": 1.124075974689088,
+      "learning_rate": 8.401739094205259e-06,
+      "loss": 0.9151,
+      "step": 25460
+    },
+    {
+      "epoch": 1.0073365105103917,
+      "grad_norm": 1.2155242175758245,
+      "learning_rate": 8.400052360235143e-06,
+      "loss": 0.9092,
+      "step": 25470
+    },
+    {
+      "epoch": 1.0077320097292808,
+      "grad_norm": 1.130226587808288,
+      "learning_rate": 8.398364906192189e-06,
+      "loss": 0.9222,
+      "step": 25480
+    },
+    {
+      "epoch": 1.0081275089481698,
+      "grad_norm": 1.029056255979358,
+      "learning_rate": 8.396676732433767e-06,
+      "loss": 0.9224,
+      "step": 25490
+    },
+    {
+      "epoch": 1.0085230081670589,
+      "grad_norm": 1.142710985123221,
+      "learning_rate": 8.394987839317405e-06,
+      "loss": 0.8994,
+      "step": 25500
+    },
+    {
+      "epoch": 1.008918507385948,
+      "grad_norm": 1.2459966870815842,
+      "learning_rate": 8.393298227200783e-06,
+      "loss": 0.9037,
+      "step": 25510
+    },
+    {
+      "epoch": 1.009314006604837,
+      "grad_norm": 1.1217583580700818,
+      "learning_rate": 8.391607896441733e-06,
+      "loss": 0.9044,
+      "step": 25520
+    },
+    {
+      "epoch": 1.009709505823726,
+      "grad_norm": 1.1583672071361588,
+      "learning_rate": 8.389916847398235e-06,
+      "loss": 0.8974,
+      "step": 25530
+    },
+    {
+      "epoch": 1.010105005042615,
+      "grad_norm": 1.0629845412090806,
+      "learning_rate": 8.388225080428425e-06,
+      "loss": 0.9019,
+      "step": 25540
+    },
+    {
+      "epoch": 1.010500504261504,
+      "grad_norm": 1.1274331366613304,
+      "learning_rate": 8.38653259589059e-06,
+      "loss": 0.9138,
+      "step": 25550
+    },
+    {
+      "epoch": 1.010896003480393,
+      "grad_norm": 1.1008028318047414,
+      "learning_rate": 8.38483939414317e-06,
+      "loss": 0.8925,
+      "step": 25560
+    },
+    {
+      "epoch": 1.0112915026992821,
+      "grad_norm": 1.1759268056993244,
+      "learning_rate": 8.383145475544757e-06,
+      "loss": 0.9015,
+      "step": 25570
+    },
+    {
+      "epoch": 1.0116870019181712,
+      "grad_norm": 1.1414444035725138,
+      "learning_rate": 8.381450840454092e-06,
+      "loss": 0.9018,
+      "step": 25580
+    },
+    {
+      "epoch": 1.0120825011370602,
+      "grad_norm": 1.0825001927420355,
+      "learning_rate": 8.379755489230073e-06,
+      "loss": 0.9242,
+      "step": 25590
+    },
+    {
+      "epoch": 1.0124780003559493,
+      "grad_norm": 1.2755566730808183,
+      "learning_rate": 8.378059422231741e-06,
+      "loss": 0.8869,
+      "step": 25600
+    },
+    {
+      "epoch": 1.0128734995748383,
+      "grad_norm": 1.0901785165508477,
+      "learning_rate": 8.376362639818298e-06,
+      "loss": 0.9001,
+      "step": 25610
+    },
+    {
+      "epoch": 1.0132689987937273,
+      "grad_norm": 1.1192391210756936,
+      "learning_rate": 8.374665142349095e-06,
+      "loss": 0.9208,
+      "step": 25620
+    },
+    {
+      "epoch": 1.0136644980126164,
+      "grad_norm": 1.1452051245208987,
+      "learning_rate": 8.372966930183625e-06,
+      "loss": 0.8938,
+      "step": 25630
+    },
+    {
+      "epoch": 1.0140599972315054,
+      "grad_norm": 0.9900050914642096,
+      "learning_rate": 8.371268003681549e-06,
+      "loss": 0.8953,
+      "step": 25640
+    },
+    {
+      "epoch": 1.0144554964503945,
+      "grad_norm": 1.0762038225120603,
+      "learning_rate": 8.369568363202667e-06,
+      "loss": 0.8986,
+      "step": 25650
+    },
+    {
+      "epoch": 1.0148509956692835,
+      "grad_norm": 1.1200986578948064,
+      "learning_rate": 8.367868009106935e-06,
+      "loss": 0.9106,
+      "step": 25660
+    },
+    {
+      "epoch": 1.0152464948881725,
+      "grad_norm": 1.0539551484285923,
+      "learning_rate": 8.366166941754455e-06,
+      "loss": 0.9012,
+      "step": 25670
+    },
+    {
+      "epoch": 1.0156419941070616,
+      "grad_norm": 1.1051340982271542,
+      "learning_rate": 8.364465161505487e-06,
+      "loss": 0.8996,
+      "step": 25680
+    },
+    {
+      "epoch": 1.0160374933259506,
+      "grad_norm": 1.2476102193040963,
+      "learning_rate": 8.362762668720438e-06,
+      "loss": 0.9111,
+      "step": 25690
+    },
+    {
+      "epoch": 1.0164329925448397,
+      "grad_norm": 1.127198128985044,
+      "learning_rate": 8.36105946375987e-06,
+      "loss": 0.9054,
+      "step": 25700
+    },
+    {
+      "epoch": 1.0168284917637287,
+      "grad_norm": 1.1633383076226893,
+      "learning_rate": 8.359355546984487e-06,
+      "loss": 0.9122,
+      "step": 25710
+    },
+    {
+      "epoch": 1.0172239909826177,
+      "grad_norm": 1.0110877777537237,
+      "learning_rate": 8.357650918755153e-06,
+      "loss": 0.8986,
+      "step": 25720
+    },
+    {
+      "epoch": 1.0176194902015068,
+      "grad_norm": 1.0071923202531416,
+      "learning_rate": 8.355945579432878e-06,
+      "loss": 0.8927,
+      "step": 25730
+    },
+    {
+      "epoch": 1.0180149894203958,
+      "grad_norm": 1.163110831712327,
+      "learning_rate": 8.354239529378825e-06,
+      "loss": 0.8991,
+      "step": 25740
+    },
+    {
+      "epoch": 1.0184104886392848,
+      "grad_norm": 1.1454397337178799,
+      "learning_rate": 8.352532768954305e-06,
+      "loss": 0.9082,
+      "step": 25750
+    },
+    {
+      "epoch": 1.018805987858174,
+      "grad_norm": 1.0282578570312622,
+      "learning_rate": 8.35082529852078e-06,
+      "loss": 0.9113,
+      "step": 25760
+    },
+    {
+      "epoch": 1.019201487077063,
+      "grad_norm": 1.1644705857999138,
+      "learning_rate": 8.349117118439864e-06,
+      "loss": 0.9056,
+      "step": 25770
+    },
+    {
+      "epoch": 1.019596986295952,
+      "grad_norm": 1.0553504386081243,
+      "learning_rate": 8.347408229073321e-06,
+      "loss": 0.9207,
+      "step": 25780
+    },
+    {
+      "epoch": 1.019992485514841,
+      "grad_norm": 1.049931458978996,
+      "learning_rate": 8.345698630783062e-06,
+      "loss": 0.9034,
+      "step": 25790
+    },
+    {
+      "epoch": 1.02038798473373,
+      "grad_norm": 1.0646536273269438,
+      "learning_rate": 8.343988323931155e-06,
+      "loss": 0.8817,
+      "step": 25800
+    },
+    {
+      "epoch": 1.020783483952619,
+      "grad_norm": 1.1382528685128661,
+      "learning_rate": 8.342277308879807e-06,
+      "loss": 0.8863,
+      "step": 25810
+    },
+    {
+      "epoch": 1.0211789831715081,
+      "grad_norm": 1.082035748051446,
+      "learning_rate": 8.340565585991386e-06,
+      "loss": 0.9063,
+      "step": 25820
+    },
+    {
+      "epoch": 1.0215744823903974,
+      "grad_norm": 1.0543652850417833,
+      "learning_rate": 8.338853155628408e-06,
+      "loss": 0.8878,
+      "step": 25830
+    },
+    {
+      "epoch": 1.0219699816092864,
+      "grad_norm": 1.2583980088512,
+      "learning_rate": 8.337140018153532e-06,
+      "loss": 0.9034,
+      "step": 25840
+    },
+    {
+      "epoch": 1.0223654808281755,
+      "grad_norm": 1.1363987916154439,
+      "learning_rate": 8.33542617392957e-06,
+      "loss": 0.9007,
+      "step": 25850
+    },
+    {
+      "epoch": 1.0227609800470645,
+      "grad_norm": 1.1322430291594914,
+      "learning_rate": 8.333711623319492e-06,
+      "loss": 0.9015,
+      "step": 25860
+    },
+    {
+      "epoch": 1.0231564792659535,
+      "grad_norm": 1.2583851440179512,
+      "learning_rate": 8.331996366686404e-06,
+      "loss": 0.9121,
+      "step": 25870
+    },
+    {
+      "epoch": 1.0235519784848426,
+      "grad_norm": 1.1672767543728206,
+      "learning_rate": 8.330280404393569e-06,
+      "loss": 0.9036,
+      "step": 25880
+    },
+    {
+      "epoch": 1.0239474777037316,
+      "grad_norm": 1.0233772454658783,
+      "learning_rate": 8.3285637368044e-06,
+      "loss": 0.9091,
+      "step": 25890
+    },
+    {
+      "epoch": 1.0243429769226207,
+      "grad_norm": 1.11875252850936,
+      "learning_rate": 8.326846364282457e-06,
+      "loss": 0.8915,
+      "step": 25900
+    },
+    {
+      "epoch": 1.0247384761415097,
+      "grad_norm": 1.2000842182902896,
+      "learning_rate": 8.325128287191451e-06,
+      "loss": 0.902,
+      "step": 25910
+    },
+    {
+      "epoch": 1.0251339753603987,
+      "grad_norm": 1.1814098772590507,
+      "learning_rate": 8.32340950589524e-06,
+      "loss": 0.9104,
+      "step": 25920
+    },
+    {
+      "epoch": 1.0255294745792878,
+      "grad_norm": 1.1477897706659193,
+      "learning_rate": 8.321690020757833e-06,
+      "loss": 0.924,
+      "step": 25930
+    },
+    {
+      "epoch": 1.0259249737981768,
+      "grad_norm": 1.0810906223097685,
+      "learning_rate": 8.319969832143389e-06,
+      "loss": 0.8895,
+      "step": 25940
+    },
+    {
+      "epoch": 1.0263204730170659,
+      "grad_norm": 1.038721590468152,
+      "learning_rate": 8.31824894041621e-06,
+      "loss": 0.9234,
+      "step": 25950
+    },
+    {
+      "epoch": 1.026715972235955,
+      "grad_norm": 1.1338856682034621,
+      "learning_rate": 8.316527345940754e-06,
+      "loss": 0.9097,
+      "step": 25960
+    },
+    {
+      "epoch": 1.027111471454844,
+      "grad_norm": 1.0908569789747333,
+      "learning_rate": 8.31480504908163e-06,
+      "loss": 0.9036,
+      "step": 25970
+    },
+    {
+      "epoch": 1.027506970673733,
+      "grad_norm": 1.1858118165224072,
+      "learning_rate": 8.313082050203581e-06,
+      "loss": 0.888,
+      "step": 25980
+    },
+    {
+      "epoch": 1.027902469892622,
+      "grad_norm": 1.0937115220729168,
+      "learning_rate": 8.311358349671516e-06,
+      "loss": 0.8987,
+      "step": 25990
+    },
+    {
+      "epoch": 1.028297969111511,
+      "grad_norm": 1.1735515640484673,
+      "learning_rate": 8.309633947850486e-06,
+      "loss": 0.9019,
+      "step": 26000
+    },
+    {
+      "epoch": 1.0286934683304,
+      "grad_norm": 1.0710227994306132,
+      "learning_rate": 8.307908845105685e-06,
+      "loss": 0.9147,
+      "step": 26010
+    },
+    {
+      "epoch": 1.0290889675492891,
+      "grad_norm": 1.1215359325055878,
+      "learning_rate": 8.306183041802462e-06,
+      "loss": 0.9011,
+      "step": 26020
+    },
+    {
+      "epoch": 1.0294844667681782,
+      "grad_norm": 1.0669939726035618,
+      "learning_rate": 8.304456538306314e-06,
+      "loss": 0.8788,
+      "step": 26030
+    },
+    {
+      "epoch": 1.0298799659870672,
+      "grad_norm": 1.2722986858375807,
+      "learning_rate": 8.302729334982883e-06,
+      "loss": 0.9052,
+      "step": 26040
+    },
+    {
+      "epoch": 1.0302754652059563,
+      "grad_norm": 1.0800191926325546,
+      "learning_rate": 8.301001432197962e-06,
+      "loss": 0.919,
+      "step": 26050
+    },
+    {
+      "epoch": 1.0306709644248453,
+      "grad_norm": 1.042226298377939,
+      "learning_rate": 8.299272830317491e-06,
+      "loss": 0.9213,
+      "step": 26060
+    },
+    {
+      "epoch": 1.0310664636437343,
+      "grad_norm": 1.1974991756960707,
+      "learning_rate": 8.297543529707558e-06,
+      "loss": 0.9123,
+      "step": 26070
+    },
+    {
+      "epoch": 1.0314619628626234,
+      "grad_norm": 1.2329459284704791,
+      "learning_rate": 8.2958135307344e-06,
+      "loss": 0.914,
+      "step": 26080
+    },
+    {
+      "epoch": 1.0318574620815124,
+      "grad_norm": 1.103551851670795,
+      "learning_rate": 8.294082833764401e-06,
+      "loss": 0.9046,
+      "step": 26090
+    },
+    {
+      "epoch": 1.0322529613004015,
+      "grad_norm": 1.142944376839717,
+      "learning_rate": 8.29235143916409e-06,
+      "loss": 0.9165,
+      "step": 26100
+    },
+    {
+      "epoch": 1.0326484605192905,
+      "grad_norm": 1.0294926939216598,
+      "learning_rate": 8.290619347300153e-06,
+      "loss": 0.9108,
+      "step": 26110
+    },
+    {
+      "epoch": 1.0330439597381795,
+      "grad_norm": 1.0870483219904943,
+      "learning_rate": 8.288886558539414e-06,
+      "loss": 0.9048,
+      "step": 26120
+    },
+    {
+      "epoch": 1.0334394589570686,
+      "grad_norm": 1.1548781079038755,
+      "learning_rate": 8.287153073248845e-06,
+      "loss": 0.9127,
+      "step": 26130
+    },
+    {
+      "epoch": 1.0338349581759576,
+      "grad_norm": 1.0016720999598157,
+      "learning_rate": 8.285418891795572e-06,
+      "loss": 0.9009,
+      "step": 26140
+    },
+    {
+      "epoch": 1.0342304573948466,
+      "grad_norm": 1.1700391722248944,
+      "learning_rate": 8.283684014546864e-06,
+      "loss": 0.8815,
+      "step": 26150
+    },
+    {
+      "epoch": 1.0346259566137357,
+      "grad_norm": 1.1628013271247277,
+      "learning_rate": 8.281948441870138e-06,
+      "loss": 0.8992,
+      "step": 26160
+    },
+    {
+      "epoch": 1.0350214558326247,
+      "grad_norm": 1.1572643485272085,
+      "learning_rate": 8.28021217413296e-06,
+      "loss": 0.9151,
+      "step": 26170
+    },
+    {
+      "epoch": 1.0354169550515138,
+      "grad_norm": 1.2056700413286618,
+      "learning_rate": 8.278475211703041e-06,
+      "loss": 0.8934,
+      "step": 26180
+    },
+    {
+      "epoch": 1.0358124542704028,
+      "grad_norm": 1.0951288087858853,
+      "learning_rate": 8.27673755494824e-06,
+      "loss": 0.9179,
+      "step": 26190
+    },
+    {
+      "epoch": 1.0362079534892918,
+      "grad_norm": 1.1243189476912894,
+      "learning_rate": 8.274999204236562e-06,
+      "loss": 0.924,
+      "step": 26200
+    },
+    {
+      "epoch": 1.0366034527081809,
+      "grad_norm": 1.2070529113877762,
+      "learning_rate": 8.27326015993616e-06,
+      "loss": 0.9066,
+      "step": 26210
+    },
+    {
+      "epoch": 1.03699895192707,
+      "grad_norm": 1.1319268547196213,
+      "learning_rate": 8.271520422415333e-06,
+      "loss": 0.9044,
+      "step": 26220
+    },
+    {
+      "epoch": 1.037394451145959,
+      "grad_norm": 1.226502204292547,
+      "learning_rate": 8.26977999204253e-06,
+      "loss": 0.9174,
+      "step": 26230
+    },
+    {
+      "epoch": 1.037789950364848,
+      "grad_norm": 1.1405096597113105,
+      "learning_rate": 8.268038869186345e-06,
+      "loss": 0.9003,
+      "step": 26240
+    },
+    {
+      "epoch": 1.038185449583737,
+      "grad_norm": 1.316048883514623,
+      "learning_rate": 8.266297054215515e-06,
+      "loss": 0.9029,
+      "step": 26250
+    },
+    {
+      "epoch": 1.038580948802626,
+      "grad_norm": 1.2984908387966145,
+      "learning_rate": 8.264554547498927e-06,
+      "loss": 0.8923,
+      "step": 26260
+    },
+    {
+      "epoch": 1.0389764480215151,
+      "grad_norm": 1.1693070516574628,
+      "learning_rate": 8.262811349405616e-06,
+      "loss": 0.888,
+      "step": 26270
+    },
+    {
+      "epoch": 1.0393719472404042,
+      "grad_norm": 1.1580902130950566,
+      "learning_rate": 8.261067460304759e-06,
+      "loss": 0.905,
+      "step": 26280
+    },
+    {
+      "epoch": 1.0397674464592932,
+      "grad_norm": 1.2112393609040144,
+      "learning_rate": 8.259322880565683e-06,
+      "loss": 0.9152,
+      "step": 26290
+    },
+    {
+      "epoch": 1.0401629456781822,
+      "grad_norm": 1.0918316136989545,
+      "learning_rate": 8.257577610557861e-06,
+      "loss": 0.9063,
+      "step": 26300
+    },
+    {
+      "epoch": 1.0405584448970713,
+      "grad_norm": 1.188777233244895,
+      "learning_rate": 8.25583165065091e-06,
+      "loss": 0.9069,
+      "step": 26310
+    },
+    {
+      "epoch": 1.0409539441159603,
+      "grad_norm": 1.2420338199548104,
+      "learning_rate": 8.254085001214596e-06,
+      "loss": 0.9133,
+      "step": 26320
+    },
+    {
+      "epoch": 1.0413494433348494,
+      "grad_norm": 1.3712209946739753,
+      "learning_rate": 8.252337662618826e-06,
+      "loss": 0.9187,
+      "step": 26330
+    },
+    {
+      "epoch": 1.0417449425537384,
+      "grad_norm": 1.1484534682367094,
+      "learning_rate": 8.250589635233662e-06,
+      "loss": 0.9072,
+      "step": 26340
+    },
+    {
+      "epoch": 1.0421404417726274,
+      "grad_norm": 1.1937789648256916,
+      "learning_rate": 8.248840919429301e-06,
+      "loss": 0.8974,
+      "step": 26350
+    },
+    {
+      "epoch": 1.0425359409915165,
+      "grad_norm": 1.0002180706825603,
+      "learning_rate": 8.247091515576093e-06,
+      "loss": 0.9027,
+      "step": 26360
+    },
+    {
+      "epoch": 1.0429314402104055,
+      "grad_norm": 1.2674330472529245,
+      "learning_rate": 8.245341424044532e-06,
+      "loss": 0.8807,
+      "step": 26370
+    },
+    {
+      "epoch": 1.0433269394292946,
+      "grad_norm": 1.154459992349424,
+      "learning_rate": 8.243590645205256e-06,
+      "loss": 0.8949,
+      "step": 26380
+    },
+    {
+      "epoch": 1.0437224386481836,
+      "grad_norm": 1.0375522349338302,
+      "learning_rate": 8.241839179429054e-06,
+      "loss": 0.8975,
+      "step": 26390
+    },
+    {
+      "epoch": 1.0441179378670726,
+      "grad_norm": 1.0463465357097628,
+      "learning_rate": 8.240087027086852e-06,
+      "loss": 0.914,
+      "step": 26400
+    },
+    {
+      "epoch": 1.0445134370859617,
+      "grad_norm": 1.3768072345518934,
+      "learning_rate": 8.238334188549727e-06,
+      "loss": 0.9035,
+      "step": 26410
+    },
+    {
+      "epoch": 1.0449089363048507,
+      "grad_norm": 1.1738597367096681,
+      "learning_rate": 8.2365806641889e-06,
+      "loss": 0.8897,
+      "step": 26420
+    },
+    {
+      "epoch": 1.0453044355237398,
+      "grad_norm": 1.0828780802961187,
+      "learning_rate": 8.234826454375741e-06,
+      "loss": 0.9035,
+      "step": 26430
+    },
+    {
+      "epoch": 1.0456999347426288,
+      "grad_norm": 1.0407314262519847,
+      "learning_rate": 8.233071559481755e-06,
+      "loss": 0.9062,
+      "step": 26440
+    },
+    {
+      "epoch": 1.0460954339615178,
+      "grad_norm": 1.1602763104533675,
+      "learning_rate": 8.231315979878604e-06,
+      "loss": 0.8907,
+      "step": 26450
+    },
+    {
+      "epoch": 1.0464909331804069,
+      "grad_norm": 1.0824471121944337,
+      "learning_rate": 8.22955971593809e-06,
+      "loss": 0.8885,
+      "step": 26460
+    },
+    {
+      "epoch": 1.046886432399296,
+      "grad_norm": 1.3661926987365036,
+      "learning_rate": 8.227802768032154e-06,
+      "loss": 0.906,
+      "step": 26470
+    },
+    {
+      "epoch": 1.047281931618185,
+      "grad_norm": 1.0936388123559422,
+      "learning_rate": 8.226045136532894e-06,
+      "loss": 0.9039,
+      "step": 26480
+    },
+    {
+      "epoch": 1.0476774308370742,
+      "grad_norm": 1.1445401938325779,
+      "learning_rate": 8.224286821812541e-06,
+      "loss": 0.8909,
+      "step": 26490
+    },
+    {
+      "epoch": 1.0480729300559632,
+      "grad_norm": 1.3011384987255774,
+      "learning_rate": 8.22252782424348e-06,
+      "loss": 0.9089,
+      "step": 26500
+    },
+    {
+      "epoch": 1.0484684292748523,
+      "grad_norm": 1.1488955314782079,
+      "learning_rate": 8.220768144198235e-06,
+      "loss": 0.9083,
+      "step": 26510
+    },
+    {
+      "epoch": 1.0488639284937413,
+      "grad_norm": 1.1573743425888623,
+      "learning_rate": 8.219007782049474e-06,
+      "loss": 0.9198,
+      "step": 26520
+    },
+    {
+      "epoch": 1.0492594277126304,
+      "grad_norm": 1.1255650197311504,
+      "learning_rate": 8.217246738170014e-06,
+      "loss": 0.8934,
+      "step": 26530
+    },
+    {
+      "epoch": 1.0496549269315194,
+      "grad_norm": 1.0875887225361511,
+      "learning_rate": 8.215485012932813e-06,
+      "loss": 0.8849,
+      "step": 26540
+    },
+    {
+      "epoch": 1.0500504261504084,
+      "grad_norm": 1.0776725283215887,
+      "learning_rate": 8.213722606710975e-06,
+      "loss": 0.9023,
+      "step": 26550
+    },
+    {
+      "epoch": 1.0504459253692975,
+      "grad_norm": 1.094791239471887,
+      "learning_rate": 8.211959519877749e-06,
+      "loss": 0.8915,
+      "step": 26560
+    },
+    {
+      "epoch": 1.0508414245881865,
+      "grad_norm": 1.2152553885248114,
+      "learning_rate": 8.210195752806523e-06,
+      "loss": 0.8836,
+      "step": 26570
+    },
+    {
+      "epoch": 1.0512369238070756,
+      "grad_norm": 1.251656420660928,
+      "learning_rate": 8.208431305870832e-06,
+      "loss": 0.9095,
+      "step": 26580
+    },
+    {
+      "epoch": 1.0516324230259646,
+      "grad_norm": 1.1012829413081515,
+      "learning_rate": 8.206666179444361e-06,
+      "loss": 0.9029,
+      "step": 26590
+    },
+    {
+      "epoch": 1.0520279222448536,
+      "grad_norm": 1.1199261024941962,
+      "learning_rate": 8.204900373900928e-06,
+      "loss": 0.8948,
+      "step": 26600
+    },
+    {
+      "epoch": 1.0524234214637427,
+      "grad_norm": 1.161610127231663,
+      "learning_rate": 8.203133889614504e-06,
+      "loss": 0.8763,
+      "step": 26610
+    },
+    {
+      "epoch": 1.0528189206826317,
+      "grad_norm": 1.0866075528266157,
+      "learning_rate": 8.2013667269592e-06,
+      "loss": 0.9017,
+      "step": 26620
+    },
+    {
+      "epoch": 1.0532144199015208,
+      "grad_norm": 1.0359004203083304,
+      "learning_rate": 8.199598886309268e-06,
+      "loss": 0.8823,
+      "step": 26630
+    },
+    {
+      "epoch": 1.0536099191204098,
+      "grad_norm": 1.333290592930516,
+      "learning_rate": 8.197830368039109e-06,
+      "loss": 0.9019,
+      "step": 26640
+    },
+    {
+      "epoch": 1.0540054183392988,
+      "grad_norm": 1.2852055663424773,
+      "learning_rate": 8.196061172523263e-06,
+      "loss": 0.8986,
+      "step": 26650
+    },
+    {
+      "epoch": 1.0544009175581879,
+      "grad_norm": 1.2210669835170829,
+      "learning_rate": 8.194291300136417e-06,
+      "loss": 0.9148,
+      "step": 26660
+    },
+    {
+      "epoch": 1.054796416777077,
+      "grad_norm": 1.1571294959315643,
+      "learning_rate": 8.192520751253399e-06,
+      "loss": 0.8965,
+      "step": 26670
+    },
+    {
+      "epoch": 1.055191915995966,
+      "grad_norm": 1.190394692001253,
+      "learning_rate": 8.19074952624918e-06,
+      "loss": 0.8978,
+      "step": 26680
+    },
+    {
+      "epoch": 1.055587415214855,
+      "grad_norm": 1.038681535401097,
+      "learning_rate": 8.188977625498876e-06,
+      "loss": 0.9082,
+      "step": 26690
+    },
+    {
+      "epoch": 1.055982914433744,
+      "grad_norm": 1.026335290544012,
+      "learning_rate": 8.187205049377746e-06,
+      "loss": 0.9192,
+      "step": 26700
+    },
+    {
+      "epoch": 1.056378413652633,
+      "grad_norm": 1.1822187498552572,
+      "learning_rate": 8.18543179826119e-06,
+      "loss": 0.8938,
+      "step": 26710
+    },
+    {
+      "epoch": 1.0567739128715221,
+      "grad_norm": 1.1552711851772264,
+      "learning_rate": 8.183657872524751e-06,
+      "loss": 0.9199,
+      "step": 26720
+    },
+    {
+      "epoch": 1.0571694120904112,
+      "grad_norm": 1.2539855798373403,
+      "learning_rate": 8.181883272544119e-06,
+      "loss": 0.8907,
+      "step": 26730
+    },
+    {
+      "epoch": 1.0575649113093002,
+      "grad_norm": 1.1759232523817262,
+      "learning_rate": 8.180107998695122e-06,
+      "loss": 0.9169,
+      "step": 26740
+    },
+    {
+      "epoch": 1.0579604105281892,
+      "grad_norm": 1.2925226259078264,
+      "learning_rate": 8.178332051353734e-06,
+      "loss": 0.9003,
+      "step": 26750
+    },
+    {
+      "epoch": 1.0583559097470783,
+      "grad_norm": 1.058633064738671,
+      "learning_rate": 8.176555430896068e-06,
+      "loss": 0.895,
+      "step": 26760
+    },
+    {
+      "epoch": 1.0587514089659673,
+      "grad_norm": 1.2178054428790546,
+      "learning_rate": 8.174778137698384e-06,
+      "loss": 0.9016,
+      "step": 26770
+    },
+    {
+      "epoch": 1.0591469081848564,
+      "grad_norm": 1.0291043090941907,
+      "learning_rate": 8.17300017213708e-06,
+      "loss": 0.8949,
+      "step": 26780
+    },
+    {
+      "epoch": 1.0595424074037454,
+      "grad_norm": 1.149950623218104,
+      "learning_rate": 8.171221534588702e-06,
+      "loss": 0.896,
+      "step": 26790
+    },
+    {
+      "epoch": 1.0599379066226344,
+      "grad_norm": 1.214800080595436,
+      "learning_rate": 8.169442225429931e-06,
+      "loss": 0.8848,
+      "step": 26800
+    },
+    {
+      "epoch": 1.0603334058415235,
+      "grad_norm": 1.0834996282959277,
+      "learning_rate": 8.167662245037598e-06,
+      "loss": 0.927,
+      "step": 26810
+    },
+    {
+      "epoch": 1.0607289050604125,
+      "grad_norm": 1.084241228228272,
+      "learning_rate": 8.165881593788669e-06,
+      "loss": 0.9023,
+      "step": 26820
+    },
+    {
+      "epoch": 1.0611244042793015,
+      "grad_norm": 1.184474320344194,
+      "learning_rate": 8.164100272060258e-06,
+      "loss": 0.9039,
+      "step": 26830
+    },
+    {
+      "epoch": 1.0615199034981906,
+      "grad_norm": 1.2346490645276604,
+      "learning_rate": 8.162318280229618e-06,
+      "loss": 0.9239,
+      "step": 26840
+    },
+    {
+      "epoch": 1.0619154027170796,
+      "grad_norm": 1.1511896245603048,
+      "learning_rate": 8.160535618674142e-06,
+      "loss": 0.896,
+      "step": 26850
+    },
+    {
+      "epoch": 1.0623109019359687,
+      "grad_norm": 1.519220099755732,
+      "learning_rate": 8.158752287771369e-06,
+      "loss": 0.8957,
+      "step": 26860
+    },
+    {
+      "epoch": 1.0627064011548577,
+      "grad_norm": 1.0775623234113063,
+      "learning_rate": 8.156968287898978e-06,
+      "loss": 0.8811,
+      "step": 26870
+    },
+    {
+      "epoch": 1.0631019003737467,
+      "grad_norm": 1.163800431730651,
+      "learning_rate": 8.15518361943479e-06,
+      "loss": 0.909,
+      "step": 26880
+    },
+    {
+      "epoch": 1.0634973995926358,
+      "grad_norm": 1.062163936008161,
+      "learning_rate": 8.153398282756766e-06,
+      "loss": 0.9041,
+      "step": 26890
+    },
+    {
+      "epoch": 1.0638928988115248,
+      "grad_norm": 1.3120477412719362,
+      "learning_rate": 8.151612278243011e-06,
+      "loss": 0.9005,
+      "step": 26900
+    },
+    {
+      "epoch": 1.0642883980304139,
+      "grad_norm": 1.1731405590931228,
+      "learning_rate": 8.149825606271768e-06,
+      "loss": 0.9022,
+      "step": 26910
+    },
+    {
+      "epoch": 1.064683897249303,
+      "grad_norm": 1.2209189179323687,
+      "learning_rate": 8.148038267221427e-06,
+      "loss": 0.889,
+      "step": 26920
+    },
+    {
+      "epoch": 1.065079396468192,
+      "grad_norm": 1.036859905224877,
+      "learning_rate": 8.146250261470513e-06,
+      "loss": 0.9275,
+      "step": 26930
+    },
+    {
+      "epoch": 1.065474895687081,
+      "grad_norm": 1.0441229722824374,
+      "learning_rate": 8.144461589397695e-06,
+      "loss": 0.8953,
+      "step": 26940
+    },
+    {
+      "epoch": 1.06587039490597,
+      "grad_norm": 1.0483457702917496,
+      "learning_rate": 8.142672251381785e-06,
+      "loss": 0.8969,
+      "step": 26950
+    },
+    {
+      "epoch": 1.066265894124859,
+      "grad_norm": 1.1660382719042404,
+      "learning_rate": 8.140882247801731e-06,
+      "loss": 0.8956,
+      "step": 26960
+    },
+    {
+      "epoch": 1.066661393343748,
+      "grad_norm": 1.1728388099187352,
+      "learning_rate": 8.139091579036629e-06,
+      "loss": 0.9167,
+      "step": 26970
+    },
+    {
+      "epoch": 1.0670568925626371,
+      "grad_norm": 1.0583233252609623,
+      "learning_rate": 8.13730024546571e-06,
+      "loss": 0.9089,
+      "step": 26980
+    },
+    {
+      "epoch": 1.0674523917815262,
+      "grad_norm": 1.112016547589213,
+      "learning_rate": 8.135508247468348e-06,
+      "loss": 0.902,
+      "step": 26990
+    },
+    {
+      "epoch": 1.0678478910004152,
+      "grad_norm": 1.1472180772779197,
+      "learning_rate": 8.133715585424058e-06,
+      "loss": 0.9044,
+      "step": 27000
+    },
+    {
+      "epoch": 1.0682433902193043,
+      "grad_norm": 1.1128413884288322,
+      "learning_rate": 8.131922259712493e-06,
+      "loss": 0.8854,
+      "step": 27010
+    },
+    {
+      "epoch": 1.0686388894381933,
+      "grad_norm": 1.1240627783948445,
+      "learning_rate": 8.130128270713448e-06,
+      "loss": 0.915,
+      "step": 27020
+    },
+    {
+      "epoch": 1.0690343886570823,
+      "grad_norm": 1.1794780876630457,
+      "learning_rate": 8.128333618806865e-06,
+      "loss": 0.8876,
+      "step": 27030
+    },
+    {
+      "epoch": 1.0694298878759714,
+      "grad_norm": 1.1786219108311415,
+      "learning_rate": 8.126538304372816e-06,
+      "loss": 0.8907,
+      "step": 27040
+    },
+    {
+      "epoch": 1.0698253870948604,
+      "grad_norm": 1.0892197450126473,
+      "learning_rate": 8.124742327791517e-06,
+      "loss": 0.903,
+      "step": 27050
+    },
+    {
+      "epoch": 1.0702208863137495,
+      "grad_norm": 1.0006132720474732,
+      "learning_rate": 8.122945689443328e-06,
+      "loss": 0.8997,
+      "step": 27060
+    },
+    {
+      "epoch": 1.0706163855326385,
+      "grad_norm": 1.1778965773414205,
+      "learning_rate": 8.121148389708745e-06,
+      "loss": 0.9006,
+      "step": 27070
+    },
+    {
+      "epoch": 1.0710118847515275,
+      "grad_norm": 1.1244966274064,
+      "learning_rate": 8.119350428968403e-06,
+      "loss": 0.8954,
+      "step": 27080
+    },
+    {
+      "epoch": 1.0714073839704166,
+      "grad_norm": 1.0197631932308509,
+      "learning_rate": 8.117551807603083e-06,
+      "loss": 0.905,
+      "step": 27090
+    },
+    {
+      "epoch": 1.0718028831893056,
+      "grad_norm": 1.115615567866468,
+      "learning_rate": 8.115752525993701e-06,
+      "loss": 0.9203,
+      "step": 27100
+    },
+    {
+      "epoch": 1.0721983824081947,
+      "grad_norm": 1.0765239213424813,
+      "learning_rate": 8.113952584521314e-06,
+      "loss": 0.8965,
+      "step": 27110
+    },
+    {
+      "epoch": 1.0725938816270837,
+      "grad_norm": 1.0198095409900247,
+      "learning_rate": 8.112151983567117e-06,
+      "loss": 0.9106,
+      "step": 27120
+    },
+    {
+      "epoch": 1.0729893808459727,
+      "grad_norm": 1.0933626872102498,
+      "learning_rate": 8.110350723512448e-06,
+      "loss": 0.8972,
+      "step": 27130
+    },
+    {
+      "epoch": 1.0733848800648618,
+      "grad_norm": 1.0905159498447563,
+      "learning_rate": 8.108548804738783e-06,
+      "loss": 0.8983,
+      "step": 27140
+    },
+    {
+      "epoch": 1.0737803792837508,
+      "grad_norm": 1.0356050178244327,
+      "learning_rate": 8.106746227627739e-06,
+      "loss": 0.8876,
+      "step": 27150
+    },
+    {
+      "epoch": 1.0741758785026398,
+      "grad_norm": 1.1396098722692873,
+      "learning_rate": 8.104942992561067e-06,
+      "loss": 0.9057,
+      "step": 27160
+    },
+    {
+      "epoch": 1.0745713777215289,
+      "grad_norm": 1.2743302032628894,
+      "learning_rate": 8.103139099920666e-06,
+      "loss": 0.9179,
+      "step": 27170
+    },
+    {
+      "epoch": 1.074966876940418,
+      "grad_norm": 1.2623677737547447,
+      "learning_rate": 8.101334550088566e-06,
+      "loss": 0.9033,
+      "step": 27180
+    },
+    {
+      "epoch": 1.0753623761593072,
+      "grad_norm": 1.1729772587339842,
+      "learning_rate": 8.09952934344694e-06,
+      "loss": 0.892,
+      "step": 27190
+    },
+    {
+      "epoch": 1.0757578753781962,
+      "grad_norm": 1.1321572329731517,
+      "learning_rate": 8.097723480378102e-06,
+      "loss": 0.9014,
+      "step": 27200
+    },
+    {
+      "epoch": 1.0761533745970853,
+      "grad_norm": 1.0344941903869767,
+      "learning_rate": 8.095916961264502e-06,
+      "loss": 0.8866,
+      "step": 27210
+    },
+    {
+      "epoch": 1.0765488738159743,
+      "grad_norm": 0.9920099445325548,
+      "learning_rate": 8.094109786488729e-06,
+      "loss": 0.8986,
+      "step": 27220
+    },
+    {
+      "epoch": 1.0769443730348633,
+      "grad_norm": 1.0791226397179134,
+      "learning_rate": 8.092301956433512e-06,
+      "loss": 0.883,
+      "step": 27230
+    },
+    {
+      "epoch": 1.0773398722537524,
+      "grad_norm": 1.3703968053746718,
+      "learning_rate": 8.090493471481717e-06,
+      "loss": 0.8921,
+      "step": 27240
+    },
+    {
+      "epoch": 1.0777353714726414,
+      "grad_norm": 1.1420306684436488,
+      "learning_rate": 8.088684332016355e-06,
+      "loss": 0.9122,
+      "step": 27250
+    },
+    {
+      "epoch": 1.0781308706915305,
+      "grad_norm": 1.020377065531343,
+      "learning_rate": 8.086874538420563e-06,
+      "loss": 0.9131,
+      "step": 27260
+    },
+    {
+      "epoch": 1.0785263699104195,
+      "grad_norm": 1.175087832980096,
+      "learning_rate": 8.085064091077632e-06,
+      "loss": 0.8809,
+      "step": 27270
+    },
+    {
+      "epoch": 1.0789218691293085,
+      "grad_norm": 1.2445735045852733,
+      "learning_rate": 8.08325299037098e-06,
+      "loss": 0.885,
+      "step": 27280
+    },
+    {
+      "epoch": 1.0793173683481976,
+      "grad_norm": 1.2369625712158248,
+      "learning_rate": 8.08144123668417e-06,
+      "loss": 0.8906,
+      "step": 27290
+    },
+    {
+      "epoch": 1.0797128675670866,
+      "grad_norm": 1.0882454123715677,
+      "learning_rate": 8.079628830400893e-06,
+      "loss": 0.8837,
+      "step": 27300
+    },
+    {
+      "epoch": 1.0801083667859757,
+      "grad_norm": 1.1970886907228255,
+      "learning_rate": 8.077815771904993e-06,
+      "loss": 0.9011,
+      "step": 27310
+    },
+    {
+      "epoch": 1.0805038660048647,
+      "grad_norm": 1.1355522024792915,
+      "learning_rate": 8.076002061580441e-06,
+      "loss": 0.8942,
+      "step": 27320
+    },
+    {
+      "epoch": 1.0808993652237537,
+      "grad_norm": 1.0345932697649929,
+      "learning_rate": 8.074187699811351e-06,
+      "loss": 0.9065,
+      "step": 27330
+    },
+    {
+      "epoch": 1.0812948644426428,
+      "grad_norm": 1.2290030983193927,
+      "learning_rate": 8.072372686981975e-06,
+      "loss": 0.8976,
+      "step": 27340
+    },
+    {
+      "epoch": 1.0816903636615318,
+      "grad_norm": 1.1470371833594348,
+      "learning_rate": 8.0705570234767e-06,
+      "loss": 0.8984,
+      "step": 27350
+    },
+    {
+      "epoch": 1.0820858628804209,
+      "grad_norm": 1.1687524580356325,
+      "learning_rate": 8.068740709680054e-06,
+      "loss": 0.8858,
+      "step": 27360
+    },
+    {
+      "epoch": 1.08248136209931,
+      "grad_norm": 1.1309495748275602,
+      "learning_rate": 8.066923745976697e-06,
+      "loss": 0.8902,
+      "step": 27370
+    },
+    {
+      "epoch": 1.082876861318199,
+      "grad_norm": 1.167742351543067,
+      "learning_rate": 8.065106132751437e-06,
+      "loss": 0.8929,
+      "step": 27380
+    },
+    {
+      "epoch": 1.083272360537088,
+      "grad_norm": 1.2127522748180066,
+      "learning_rate": 8.063287870389207e-06,
+      "loss": 0.8898,
+      "step": 27390
+    },
+    {
+      "epoch": 1.083667859755977,
+      "grad_norm": 1.0811128616461532,
+      "learning_rate": 8.061468959275089e-06,
+      "loss": 0.9061,
+      "step": 27400
+    },
+    {
+      "epoch": 1.084063358974866,
+      "grad_norm": 1.1253260949092212,
+      "learning_rate": 8.059649399794295e-06,
+      "loss": 0.889,
+      "step": 27410
+    },
+    {
+      "epoch": 1.084458858193755,
+      "grad_norm": 1.3547996478343813,
+      "learning_rate": 8.057829192332177e-06,
+      "loss": 0.8925,
+      "step": 27420
+    },
+    {
+      "epoch": 1.0848543574126441,
+      "grad_norm": 1.102503719772114,
+      "learning_rate": 8.05600833727422e-06,
+      "loss": 0.9062,
+      "step": 27430
+    },
+    {
+      "epoch": 1.0852498566315332,
+      "grad_norm": 1.0900852983570823,
+      "learning_rate": 8.054186835006057e-06,
+      "loss": 0.8957,
+      "step": 27440
+    },
+    {
+      "epoch": 1.0856453558504222,
+      "grad_norm": 1.1085798877125128,
+      "learning_rate": 8.052364685913444e-06,
+      "loss": 0.8727,
+      "step": 27450
+    },
+    {
+      "epoch": 1.0860408550693113,
+      "grad_norm": 1.150757146133895,
+      "learning_rate": 8.050541890382287e-06,
+      "loss": 0.9008,
+      "step": 27460
+    },
+    {
+      "epoch": 1.0864363542882003,
+      "grad_norm": 1.244177478165845,
+      "learning_rate": 8.04871844879862e-06,
+      "loss": 0.8903,
+      "step": 27470
+    },
+    {
+      "epoch": 1.0868318535070893,
+      "grad_norm": 1.136418139174831,
+      "learning_rate": 8.046894361548617e-06,
+      "loss": 0.8917,
+      "step": 27480
+    },
+    {
+      "epoch": 1.0872273527259784,
+      "grad_norm": 1.0458905330421155,
+      "learning_rate": 8.045069629018585e-06,
+      "loss": 0.8867,
+      "step": 27490
+    },
+    {
+      "epoch": 1.0876228519448674,
+      "grad_norm": 1.287540629833461,
+      "learning_rate": 8.043244251594977e-06,
+      "loss": 0.8868,
+      "step": 27500
+    },
+    {
+      "epoch": 1.0880183511637564,
+      "grad_norm": 1.0983028229890337,
+      "learning_rate": 8.041418229664373e-06,
+      "loss": 0.8849,
+      "step": 27510
+    },
+    {
+      "epoch": 1.0884138503826455,
+      "grad_norm": 1.0820605708080853,
+      "learning_rate": 8.039591563613494e-06,
+      "loss": 0.8976,
+      "step": 27520
+    },
+    {
+      "epoch": 1.0888093496015345,
+      "grad_norm": 1.1308754257809712,
+      "learning_rate": 8.037764253829197e-06,
+      "loss": 0.8987,
+      "step": 27530
+    },
+    {
+      "epoch": 1.0892048488204236,
+      "grad_norm": 1.0928748560251753,
+      "learning_rate": 8.035936300698477e-06,
+      "loss": 0.885,
+      "step": 27540
+    },
+    {
+      "epoch": 1.0896003480393126,
+      "grad_norm": 1.2211412651393077,
+      "learning_rate": 8.034107704608458e-06,
+      "loss": 0.8886,
+      "step": 27550
+    },
+    {
+      "epoch": 1.0899958472582016,
+      "grad_norm": 1.07612843278846,
+      "learning_rate": 8.032278465946408e-06,
+      "loss": 0.9108,
+      "step": 27560
+    },
+    {
+      "epoch": 1.0903913464770907,
+      "grad_norm": 1.2401455742095315,
+      "learning_rate": 8.03044858509973e-06,
+      "loss": 0.8917,
+      "step": 27570
+    },
+    {
+      "epoch": 1.0907868456959797,
+      "grad_norm": 1.1072522064369923,
+      "learning_rate": 8.028618062455958e-06,
+      "loss": 0.8908,
+      "step": 27580
+    },
+    {
+      "epoch": 1.0911823449148688,
+      "grad_norm": 1.088237639846867,
+      "learning_rate": 8.026786898402769e-06,
+      "loss": 0.8972,
+      "step": 27590
+    },
+    {
+      "epoch": 1.0915778441337578,
+      "grad_norm": 1.1707833118213482,
+      "learning_rate": 8.02495509332797e-06,
+      "loss": 0.902,
+      "step": 27600
+    },
+    {
+      "epoch": 1.0919733433526468,
+      "grad_norm": 1.1923207004273388,
+      "learning_rate": 8.023122647619505e-06,
+      "loss": 0.8709,
+      "step": 27610
+    },
+    {
+      "epoch": 1.0923688425715359,
+      "grad_norm": 1.0218484845622284,
+      "learning_rate": 8.021289561665457e-06,
+      "loss": 0.8982,
+      "step": 27620
+    },
+    {
+      "epoch": 1.092764341790425,
+      "grad_norm": 1.1324848871719058,
+      "learning_rate": 8.01945583585404e-06,
+      "loss": 0.89,
+      "step": 27630
+    },
+    {
+      "epoch": 1.093159841009314,
+      "grad_norm": 1.2474788688766536,
+      "learning_rate": 8.01762147057361e-06,
+      "loss": 0.8642,
+      "step": 27640
+    },
+    {
+      "epoch": 1.093555340228203,
+      "grad_norm": 1.0842033907627324,
+      "learning_rate": 8.015786466212647e-06,
+      "loss": 0.8964,
+      "step": 27650
+    },
+    {
+      "epoch": 1.093950839447092,
+      "grad_norm": 1.3133258445448526,
+      "learning_rate": 8.01395082315978e-06,
+      "loss": 0.8841,
+      "step": 27660
+    },
+    {
+      "epoch": 1.094346338665981,
+      "grad_norm": 1.0946303898282324,
+      "learning_rate": 8.012114541803763e-06,
+      "loss": 0.9099,
+      "step": 27670
+    },
+    {
+      "epoch": 1.0947418378848701,
+      "grad_norm": 1.0855375575273427,
+      "learning_rate": 8.01027762253349e-06,
+      "loss": 0.9095,
+      "step": 27680
+    },
+    {
+      "epoch": 1.0951373371037592,
+      "grad_norm": 1.24721771882688,
+      "learning_rate": 8.008440065737992e-06,
+      "loss": 0.8805,
+      "step": 27690
+    },
+    {
+      "epoch": 1.0955328363226482,
+      "grad_norm": 1.2598897594091798,
+      "learning_rate": 8.006601871806426e-06,
+      "loss": 0.8962,
+      "step": 27700
+    },
+    {
+      "epoch": 1.0959283355415372,
+      "grad_norm": 1.0726932236345916,
+      "learning_rate": 8.004763041128095e-06,
+      "loss": 0.9019,
+      "step": 27710
+    },
+    {
+      "epoch": 1.0963238347604263,
+      "grad_norm": 1.110410541762521,
+      "learning_rate": 8.00292357409243e-06,
+      "loss": 0.9011,
+      "step": 27720
+    },
+    {
+      "epoch": 1.0967193339793153,
+      "grad_norm": 1.0898460633379807,
+      "learning_rate": 8.001083471088997e-06,
+      "loss": 0.8902,
+      "step": 27730
+    },
+    {
+      "epoch": 1.0971148331982044,
+      "grad_norm": 1.1139950496030946,
+      "learning_rate": 7.999242732507502e-06,
+      "loss": 0.8864,
+      "step": 27740
+    },
+    {
+      "epoch": 1.0975103324170934,
+      "grad_norm": 1.1467439016036507,
+      "learning_rate": 7.99740135873778e-06,
+      "loss": 0.9145,
+      "step": 27750
+    },
+    {
+      "epoch": 1.0979058316359824,
+      "grad_norm": 1.053929566157083,
+      "learning_rate": 7.9955593501698e-06,
+      "loss": 0.8905,
+      "step": 27760
+    },
+    {
+      "epoch": 1.0983013308548715,
+      "grad_norm": 1.082939633714128,
+      "learning_rate": 7.99371670719367e-06,
+      "loss": 0.8977,
+      "step": 27770
+    },
+    {
+      "epoch": 1.0986968300737605,
+      "grad_norm": 1.215191991670252,
+      "learning_rate": 7.991873430199633e-06,
+      "loss": 0.896,
+      "step": 27780
+    },
+    {
+      "epoch": 1.0990923292926496,
+      "grad_norm": 1.2287437959550631,
+      "learning_rate": 7.990029519578059e-06,
+      "loss": 0.9071,
+      "step": 27790
+    },
+    {
+      "epoch": 1.0994878285115386,
+      "grad_norm": 1.2068989822249285,
+      "learning_rate": 7.98818497571946e-06,
+      "loss": 0.88,
+      "step": 27800
+    },
+    {
+      "epoch": 1.0998833277304276,
+      "grad_norm": 1.184431580757121,
+      "learning_rate": 7.986339799014475e-06,
+      "loss": 0.8965,
+      "step": 27810
+    },
+    {
+      "epoch": 1.100278826949317,
+      "grad_norm": 1.136249035505473,
+      "learning_rate": 7.984493989853885e-06,
+      "loss": 0.8928,
+      "step": 27820
+    },
+    {
+      "epoch": 1.100674326168206,
+      "grad_norm": 1.1327519715150345,
+      "learning_rate": 7.982647548628599e-06,
+      "loss": 0.9078,
+      "step": 27830
+    },
+    {
+      "epoch": 1.101069825387095,
+      "grad_norm": 1.0809721166048105,
+      "learning_rate": 7.980800475729661e-06,
+      "loss": 0.8936,
+      "step": 27840
+    },
+    {
+      "epoch": 1.101465324605984,
+      "grad_norm": 1.2635711312312279,
+      "learning_rate": 7.978952771548249e-06,
+      "loss": 0.8951,
+      "step": 27850
+    },
+    {
+      "epoch": 1.101860823824873,
+      "grad_norm": 1.2618975031616506,
+      "learning_rate": 7.977104436475677e-06,
+      "loss": 0.9035,
+      "step": 27860
+    },
+    {
+      "epoch": 1.102256323043762,
+      "grad_norm": 1.1993118624109151,
+      "learning_rate": 7.97525547090339e-06,
+      "loss": 0.9063,
+      "step": 27870
+    },
+    {
+      "epoch": 1.1026518222626511,
+      "grad_norm": 1.0339672543401843,
+      "learning_rate": 7.973405875222965e-06,
+      "loss": 0.907,
+      "step": 27880
+    },
+    {
+      "epoch": 1.1030473214815402,
+      "grad_norm": 1.106009705517437,
+      "learning_rate": 7.971555649826117e-06,
+      "loss": 0.8772,
+      "step": 27890
+    },
+    {
+      "epoch": 1.1034428207004292,
+      "grad_norm": 0.9632336538697404,
+      "learning_rate": 7.969704795104693e-06,
+      "loss": 0.9139,
+      "step": 27900
+    },
+    {
+      "epoch": 1.1038383199193182,
+      "grad_norm": 1.3731543656345955,
+      "learning_rate": 7.96785331145067e-06,
+      "loss": 0.8678,
+      "step": 27910
+    },
+    {
+      "epoch": 1.1042338191382073,
+      "grad_norm": 1.18598360057372,
+      "learning_rate": 7.966001199256163e-06,
+      "loss": 0.8879,
+      "step": 27920
+    },
+    {
+      "epoch": 1.1046293183570963,
+      "grad_norm": 1.1972679924889749,
+      "learning_rate": 7.964148458913415e-06,
+      "loss": 0.8845,
+      "step": 27930
+    },
+    {
+      "epoch": 1.1050248175759854,
+      "grad_norm": 1.107406935089363,
+      "learning_rate": 7.962295090814805e-06,
+      "loss": 0.8932,
+      "step": 27940
+    },
+    {
+      "epoch": 1.1054203167948744,
+      "grad_norm": 1.371837033995282,
+      "learning_rate": 7.960441095352847e-06,
+      "loss": 0.8671,
+      "step": 27950
+    },
+    {
+      "epoch": 1.1058158160137634,
+      "grad_norm": 1.1529148339030442,
+      "learning_rate": 7.958586472920182e-06,
+      "loss": 0.875,
+      "step": 27960
+    },
+    {
+      "epoch": 1.1062113152326525,
+      "grad_norm": 1.0878175808190371,
+      "learning_rate": 7.956731223909591e-06,
+      "loss": 0.8979,
+      "step": 27970
+    },
+    {
+      "epoch": 1.1066068144515415,
+      "grad_norm": 1.0477759942991176,
+      "learning_rate": 7.95487534871398e-06,
+      "loss": 0.9099,
+      "step": 27980
+    },
+    {
+      "epoch": 1.1070023136704306,
+      "grad_norm": 1.1033663742505546,
+      "learning_rate": 7.953018847726395e-06,
+      "loss": 0.8816,
+      "step": 27990
+    },
+    {
+      "epoch": 1.1073978128893196,
+      "grad_norm": 1.319971764574536,
+      "learning_rate": 7.951161721340008e-06,
+      "loss": 0.8945,
+      "step": 28000
+    },
+    {
+      "epoch": 1.1077933121082086,
+      "grad_norm": 1.055882496634333,
+      "learning_rate": 7.94930396994813e-06,
+      "loss": 0.8976,
+      "step": 28010
+    },
+    {
+      "epoch": 1.1081888113270977,
+      "grad_norm": 1.0542913295818592,
+      "learning_rate": 7.947445593944198e-06,
+      "loss": 0.8872,
+      "step": 28020
+    },
+    {
+      "epoch": 1.1085843105459867,
+      "grad_norm": 1.2368089090227496,
+      "learning_rate": 7.945586593721789e-06,
+      "loss": 0.8888,
+      "step": 28030
+    },
+    {
+      "epoch": 1.1089798097648758,
+      "grad_norm": 1.20012700679628,
+      "learning_rate": 7.9437269696746e-06,
+      "loss": 0.8849,
+      "step": 28040
+    },
+    {
+      "epoch": 1.1093753089837648,
+      "grad_norm": 1.2653919292169655,
+      "learning_rate": 7.941866722196472e-06,
+      "loss": 0.8938,
+      "step": 28050
+    },
+    {
+      "epoch": 1.1097708082026538,
+      "grad_norm": 1.1033828876989404,
+      "learning_rate": 7.940005851681373e-06,
+      "loss": 0.9086,
+      "step": 28060
+    },
+    {
+      "epoch": 1.1101663074215429,
+      "grad_norm": 1.3354989216914552,
+      "learning_rate": 7.938144358523403e-06,
+      "loss": 0.8937,
+      "step": 28070
+    },
+    {
+      "epoch": 1.110561806640432,
+      "grad_norm": 1.1861421270295682,
+      "learning_rate": 7.936282243116795e-06,
+      "loss": 0.8811,
+      "step": 28080
+    },
+    {
+      "epoch": 1.110957305859321,
+      "grad_norm": 1.242405241883624,
+      "learning_rate": 7.934419505855915e-06,
+      "loss": 0.8817,
+      "step": 28090
+    },
+    {
+      "epoch": 1.11135280507821,
+      "grad_norm": 1.2286916772434766,
+      "learning_rate": 7.932556147135255e-06,
+      "loss": 0.8692,
+      "step": 28100
+    },
+    {
+      "epoch": 1.111748304297099,
+      "grad_norm": 1.1713755802561983,
+      "learning_rate": 7.930692167349443e-06,
+      "loss": 0.9094,
+      "step": 28110
+    },
+    {
+      "epoch": 1.112143803515988,
+      "grad_norm": 1.1797864180729225,
+      "learning_rate": 7.92882756689324e-06,
+      "loss": 0.9074,
+      "step": 28120
+    },
+    {
+      "epoch": 1.1125393027348771,
+      "grad_norm": 1.11371342424478,
+      "learning_rate": 7.926962346161535e-06,
+      "loss": 0.8806,
+      "step": 28130
+    },
+    {
+      "epoch": 1.1129348019537662,
+      "grad_norm": 1.0435891701912137,
+      "learning_rate": 7.925096505549352e-06,
+      "loss": 0.8938,
+      "step": 28140
+    },
+    {
+      "epoch": 1.1133303011726552,
+      "grad_norm": 1.1939501224670819,
+      "learning_rate": 7.923230045451842e-06,
+      "loss": 0.8754,
+      "step": 28150
+    },
+    {
+      "epoch": 1.1137258003915442,
+      "grad_norm": 1.169466698231923,
+      "learning_rate": 7.921362966264288e-06,
+      "loss": 0.8884,
+      "step": 28160
+    },
+    {
+      "epoch": 1.1141212996104333,
+      "grad_norm": 1.2350179638145247,
+      "learning_rate": 7.919495268382109e-06,
+      "loss": 0.898,
+      "step": 28170
+    },
+    {
+      "epoch": 1.1145167988293223,
+      "grad_norm": 1.2020950165796456,
+      "learning_rate": 7.917626952200849e-06,
+      "loss": 0.8985,
+      "step": 28180
+    },
+    {
+      "epoch": 1.1149122980482113,
+      "grad_norm": 1.2931599298173728,
+      "learning_rate": 7.915758018116185e-06,
+      "loss": 0.891,
+      "step": 28190
+    },
+    {
+      "epoch": 1.1153077972671004,
+      "grad_norm": 1.1819632414691104,
+      "learning_rate": 7.913888466523927e-06,
+      "loss": 0.8911,
+      "step": 28200
+    },
+    {
+      "epoch": 1.1157032964859894,
+      "grad_norm": 1.2165968019246507,
+      "learning_rate": 7.912018297820012e-06,
+      "loss": 0.8753,
+      "step": 28210
+    },
+    {
+      "epoch": 1.1160987957048785,
+      "grad_norm": 1.2093269983020627,
+      "learning_rate": 7.910147512400512e-06,
+      "loss": 0.9058,
+      "step": 28220
+    },
+    {
+      "epoch": 1.1164942949237675,
+      "grad_norm": 1.0302717910667079,
+      "learning_rate": 7.908276110661625e-06,
+      "loss": 0.9122,
+      "step": 28230
+    },
+    {
+      "epoch": 1.1168897941426565,
+      "grad_norm": 1.1168190634051882,
+      "learning_rate": 7.906404092999685e-06,
+      "loss": 0.8766,
+      "step": 28240
+    },
+    {
+      "epoch": 1.1172852933615456,
+      "grad_norm": 1.0163801403206851,
+      "learning_rate": 7.90453145981115e-06,
+      "loss": 0.9016,
+      "step": 28250
+    },
+    {
+      "epoch": 1.1176807925804346,
+      "grad_norm": 1.3533553884499652,
+      "learning_rate": 7.90265821149261e-06,
+      "loss": 0.8635,
+      "step": 28260
+    },
+    {
+      "epoch": 1.1180762917993237,
+      "grad_norm": 1.0967128254769198,
+      "learning_rate": 7.900784348440793e-06,
+      "loss": 0.9015,
+      "step": 28270
+    },
+    {
+      "epoch": 1.1184717910182127,
+      "grad_norm": 1.0520290096044187,
+      "learning_rate": 7.898909871052546e-06,
+      "loss": 0.8688,
+      "step": 28280
+    },
+    {
+      "epoch": 1.1188672902371017,
+      "grad_norm": 1.1827580731163367,
+      "learning_rate": 7.897034779724855e-06,
+      "loss": 0.8922,
+      "step": 28290
+    },
+    {
+      "epoch": 1.1192627894559908,
+      "grad_norm": 1.1140478824864055,
+      "learning_rate": 7.89515907485483e-06,
+      "loss": 0.8718,
+      "step": 28300
+    },
+    {
+      "epoch": 1.1196582886748798,
+      "grad_norm": 1.1980447825166638,
+      "learning_rate": 7.893282756839712e-06,
+      "loss": 0.869,
+      "step": 28310
+    },
+    {
+      "epoch": 1.1200537878937689,
+      "grad_norm": 1.1222536065585487,
+      "learning_rate": 7.891405826076875e-06,
+      "loss": 0.8894,
+      "step": 28320
+    },
+    {
+      "epoch": 1.120449287112658,
+      "grad_norm": 1.1394294249921872,
+      "learning_rate": 7.88952828296382e-06,
+      "loss": 0.9016,
+      "step": 28330
+    },
+    {
+      "epoch": 1.120844786331547,
+      "grad_norm": 1.1259107628540133,
+      "learning_rate": 7.88765012789818e-06,
+      "loss": 0.9094,
+      "step": 28340
+    },
+    {
+      "epoch": 1.121240285550436,
+      "grad_norm": 1.1498681421065784,
+      "learning_rate": 7.885771361277711e-06,
+      "loss": 0.8941,
+      "step": 28350
+    },
+    {
+      "epoch": 1.121635784769325,
+      "grad_norm": 1.1342837845882838,
+      "learning_rate": 7.883891983500309e-06,
+      "loss": 0.889,
+      "step": 28360
+    },
+    {
+      "epoch": 1.122031283988214,
+      "grad_norm": 1.1788708909394887,
+      "learning_rate": 7.882011994963994e-06,
+      "loss": 0.8766,
+      "step": 28370
+    },
+    {
+      "epoch": 1.122426783207103,
+      "grad_norm": 1.167966682234611,
+      "learning_rate": 7.880131396066913e-06,
+      "loss": 0.901,
+      "step": 28380
+    },
+    {
+      "epoch": 1.1228222824259921,
+      "grad_norm": 1.0482050306868889,
+      "learning_rate": 7.878250187207343e-06,
+      "loss": 0.8904,
+      "step": 28390
+    },
+    {
+      "epoch": 1.1232177816448812,
+      "grad_norm": 1.1262802081477237,
+      "learning_rate": 7.876368368783697e-06,
+      "loss": 0.8637,
+      "step": 28400
+    },
+    {
+      "epoch": 1.1236132808637702,
+      "grad_norm": 1.129571065163481,
+      "learning_rate": 7.874485941194508e-06,
+      "loss": 0.8939,
+      "step": 28410
+    },
+    {
+      "epoch": 1.1240087800826593,
+      "grad_norm": 1.1523677586553804,
+      "learning_rate": 7.872602904838442e-06,
+      "loss": 0.8835,
+      "step": 28420
+    },
+    {
+      "epoch": 1.1244042793015483,
+      "grad_norm": 1.1657147671994192,
+      "learning_rate": 7.870719260114295e-06,
+      "loss": 0.9079,
+      "step": 28430
+    },
+    {
+      "epoch": 1.1247997785204373,
+      "grad_norm": 1.2634129221404926,
+      "learning_rate": 7.868835007420992e-06,
+      "loss": 0.8806,
+      "step": 28440
+    },
+    {
+      "epoch": 1.1251952777393264,
+      "grad_norm": 1.1446032633226912,
+      "learning_rate": 7.866950147157584e-06,
+      "loss": 0.8812,
+      "step": 28450
+    },
+    {
+      "epoch": 1.1255907769582154,
+      "grad_norm": 1.24644506001461,
+      "learning_rate": 7.865064679723249e-06,
+      "loss": 0.8667,
+      "step": 28460
+    },
+    {
+      "epoch": 1.1259862761771045,
+      "grad_norm": 1.1820806072019319,
+      "learning_rate": 7.8631786055173e-06,
+      "loss": 0.8878,
+      "step": 28470
+    },
+    {
+      "epoch": 1.1263817753959935,
+      "grad_norm": 1.2198342469076937,
+      "learning_rate": 7.861291924939178e-06,
+      "loss": 0.8784,
+      "step": 28480
+    },
+    {
+      "epoch": 1.1267772746148825,
+      "grad_norm": 1.217815972872604,
+      "learning_rate": 7.859404638388443e-06,
+      "loss": 0.8882,
+      "step": 28490
+    },
+    {
+      "epoch": 1.1271727738337716,
+      "grad_norm": 1.2234378854603238,
+      "learning_rate": 7.857516746264798e-06,
+      "loss": 0.8568,
+      "step": 28500
+    },
+    {
+      "epoch": 1.1275682730526606,
+      "grad_norm": 1.0343819781590669,
+      "learning_rate": 7.855628248968057e-06,
+      "loss": 0.8823,
+      "step": 28510
+    },
+    {
+      "epoch": 1.1279637722715496,
+      "grad_norm": 1.1324461551860587,
+      "learning_rate": 7.853739146898179e-06,
+      "loss": 0.9039,
+      "step": 28520
+    },
+    {
+      "epoch": 1.1283592714904387,
+      "grad_norm": 1.1757742157096973,
+      "learning_rate": 7.85184944045524e-06,
+      "loss": 0.8713,
+      "step": 28530
+    },
+    {
+      "epoch": 1.1287547707093277,
+      "grad_norm": 1.0611857464814267,
+      "learning_rate": 7.849959130039446e-06,
+      "loss": 0.8885,
+      "step": 28540
+    },
+    {
+      "epoch": 1.129150269928217,
+      "grad_norm": 1.121404948439143,
+      "learning_rate": 7.848068216051135e-06,
+      "loss": 0.9004,
+      "step": 28550
+    },
+    {
+      "epoch": 1.129545769147106,
+      "grad_norm": 1.1360113325190624,
+      "learning_rate": 7.84617669889077e-06,
+      "loss": 0.8821,
+      "step": 28560
+    },
+    {
+      "epoch": 1.129941268365995,
+      "grad_norm": 1.161957591203711,
+      "learning_rate": 7.844284578958942e-06,
+      "loss": 0.8713,
+      "step": 28570
+    },
+    {
+      "epoch": 1.130336767584884,
+      "grad_norm": 1.1955714388665608,
+      "learning_rate": 7.842391856656368e-06,
+      "loss": 0.8754,
+      "step": 28580
+    },
+    {
+      "epoch": 1.1307322668037731,
+      "grad_norm": 1.237353010753038,
+      "learning_rate": 7.840498532383896e-06,
+      "loss": 0.8821,
+      "step": 28590
+    },
+    {
+      "epoch": 1.1311277660226622,
+      "grad_norm": 1.326735377756867,
+      "learning_rate": 7.838604606542498e-06,
+      "loss": 0.8754,
+      "step": 28600
+    },
+    {
+      "epoch": 1.1315232652415512,
+      "grad_norm": 1.1523039103824624,
+      "learning_rate": 7.836710079533276e-06,
+      "loss": 0.8901,
+      "step": 28610
+    },
+    {
+      "epoch": 1.1319187644604403,
+      "grad_norm": 1.2357558713593966,
+      "learning_rate": 7.83481495175746e-06,
+      "loss": 0.8719,
+      "step": 28620
+    },
+    {
+      "epoch": 1.1323142636793293,
+      "grad_norm": 1.078923022820184,
+      "learning_rate": 7.832919223616403e-06,
+      "loss": 0.8866,
+      "step": 28630
+    },
+    {
+      "epoch": 1.1327097628982183,
+      "grad_norm": 1.0768958958249693,
+      "learning_rate": 7.831022895511586e-06,
+      "loss": 0.8934,
+      "step": 28640
+    },
+    {
+      "epoch": 1.1331052621171074,
+      "grad_norm": 1.1672828453130446,
+      "learning_rate": 7.829125967844624e-06,
+      "loss": 0.8814,
+      "step": 28650
+    },
+    {
+      "epoch": 1.1335007613359964,
+      "grad_norm": 1.3018651463727677,
+      "learning_rate": 7.82722844101725e-06,
+      "loss": 0.8864,
+      "step": 28660
+    },
+    {
+      "epoch": 1.1338962605548855,
+      "grad_norm": 1.2506297134258788,
+      "learning_rate": 7.825330315431329e-06,
+      "loss": 0.8837,
+      "step": 28670
+    },
+    {
+      "epoch": 1.1342917597737745,
+      "grad_norm": 1.2187640150041792,
+      "learning_rate": 7.82343159148885e-06,
+      "loss": 0.8995,
+      "step": 28680
+    },
+    {
+      "epoch": 1.1346872589926635,
+      "grad_norm": 1.211016132170073,
+      "learning_rate": 7.821532269591934e-06,
+      "loss": 0.8844,
+      "step": 28690
+    },
+    {
+      "epoch": 1.1350827582115526,
+      "grad_norm": 1.2530868365689445,
+      "learning_rate": 7.819632350142822e-06,
+      "loss": 0.8807,
+      "step": 28700
+    },
+    {
+      "epoch": 1.1354782574304416,
+      "grad_norm": 1.193921393475841,
+      "learning_rate": 7.817731833543883e-06,
+      "loss": 0.8785,
+      "step": 28710
+    },
+    {
+      "epoch": 1.1358737566493307,
+      "grad_norm": 0.9978430926114077,
+      "learning_rate": 7.815830720197616e-06,
+      "loss": 0.9022,
+      "step": 28720
+    },
+    {
+      "epoch": 1.1362692558682197,
+      "grad_norm": 1.1946625840984992,
+      "learning_rate": 7.813929010506645e-06,
+      "loss": 0.8762,
+      "step": 28730
+    },
+    {
+      "epoch": 1.1366647550871087,
+      "grad_norm": 1.2591354732718374,
+      "learning_rate": 7.812026704873717e-06,
+      "loss": 0.8984,
+      "step": 28740
+    },
+    {
+      "epoch": 1.1370602543059978,
+      "grad_norm": 1.0347969077844694,
+      "learning_rate": 7.810123803701711e-06,
+      "loss": 0.8868,
+      "step": 28750
+    },
+    {
+      "epoch": 1.1374557535248868,
+      "grad_norm": 1.2015888244758948,
+      "learning_rate": 7.808220307393626e-06,
+      "loss": 0.8701,
+      "step": 28760
+    },
+    {
+      "epoch": 1.1378512527437759,
+      "grad_norm": 1.041837969689692,
+      "learning_rate": 7.80631621635259e-06,
+      "loss": 0.9248,
+      "step": 28770
+    },
+    {
+      "epoch": 1.138246751962665,
+      "grad_norm": 1.1292194797397903,
+      "learning_rate": 7.804411530981857e-06,
+      "loss": 0.8856,
+      "step": 28780
+    },
+    {
+      "epoch": 1.138642251181554,
+      "grad_norm": 1.0841712144383036,
+      "learning_rate": 7.802506251684809e-06,
+      "loss": 0.8935,
+      "step": 28790
+    },
+    {
+      "epoch": 1.139037750400443,
+      "grad_norm": 1.1216971304097392,
+      "learning_rate": 7.80060037886495e-06,
+      "loss": 0.8865,
+      "step": 28800
+    },
+    {
+      "epoch": 1.139433249619332,
+      "grad_norm": 1.0757648210359316,
+      "learning_rate": 7.79869391292591e-06,
+      "loss": 0.8871,
+      "step": 28810
+    },
+    {
+      "epoch": 1.139828748838221,
+      "grad_norm": 1.2629266635777194,
+      "learning_rate": 7.79678685427145e-06,
+      "loss": 0.8857,
+      "step": 28820
+    },
+    {
+      "epoch": 1.14022424805711,
+      "grad_norm": 1.137588931108426,
+      "learning_rate": 7.794879203305446e-06,
+      "loss": 0.9159,
+      "step": 28830
+    },
+    {
+      "epoch": 1.1406197472759991,
+      "grad_norm": 1.1231830208980025,
+      "learning_rate": 7.79297096043191e-06,
+      "loss": 0.8845,
+      "step": 28840
+    },
+    {
+      "epoch": 1.1410152464948882,
+      "grad_norm": 1.1633748835851536,
+      "learning_rate": 7.791062126054974e-06,
+      "loss": 0.8981,
+      "step": 28850
+    },
+    {
+      "epoch": 1.1414107457137772,
+      "grad_norm": 1.2635872037555984,
+      "learning_rate": 7.789152700578898e-06,
+      "loss": 0.8712,
+      "step": 28860
+    },
+    {
+      "epoch": 1.1418062449326662,
+      "grad_norm": 1.2331340283482086,
+      "learning_rate": 7.787242684408063e-06,
+      "loss": 0.8787,
+      "step": 28870
+    },
+    {
+      "epoch": 1.1422017441515553,
+      "grad_norm": 1.0370422569322812,
+      "learning_rate": 7.785332077946981e-06,
+      "loss": 0.8963,
+      "step": 28880
+    },
+    {
+      "epoch": 1.1425972433704443,
+      "grad_norm": 1.0289576023061546,
+      "learning_rate": 7.783420881600283e-06,
+      "loss": 0.8992,
+      "step": 28890
+    },
+    {
+      "epoch": 1.1429927425893334,
+      "grad_norm": 1.246364477870542,
+      "learning_rate": 7.781509095772727e-06,
+      "loss": 0.873,
+      "step": 28900
+    },
+    {
+      "epoch": 1.1433882418082224,
+      "grad_norm": 1.0686637257279392,
+      "learning_rate": 7.779596720869197e-06,
+      "loss": 0.8894,
+      "step": 28910
+    },
+    {
+      "epoch": 1.1437837410271114,
+      "grad_norm": 1.200909050117641,
+      "learning_rate": 7.777683757294704e-06,
+      "loss": 0.8828,
+      "step": 28920
+    },
+    {
+      "epoch": 1.1441792402460005,
+      "grad_norm": 1.1426696375182521,
+      "learning_rate": 7.77577020545438e-06,
+      "loss": 0.8781,
+      "step": 28930
+    },
+    {
+      "epoch": 1.1445747394648895,
+      "grad_norm": 1.0435177796640491,
+      "learning_rate": 7.77385606575348e-06,
+      "loss": 0.8898,
+      "step": 28940
+    },
+    {
+      "epoch": 1.1449702386837786,
+      "grad_norm": 1.155909040323496,
+      "learning_rate": 7.771941338597387e-06,
+      "loss": 0.8848,
+      "step": 28950
+    },
+    {
+      "epoch": 1.1453657379026676,
+      "grad_norm": 1.0991162110656718,
+      "learning_rate": 7.77002602439161e-06,
+      "loss": 0.8809,
+      "step": 28960
+    },
+    {
+      "epoch": 1.1457612371215566,
+      "grad_norm": 1.3654436389159246,
+      "learning_rate": 7.768110123541775e-06,
+      "loss": 0.8796,
+      "step": 28970
+    },
+    {
+      "epoch": 1.1461567363404457,
+      "grad_norm": 1.1537587675900443,
+      "learning_rate": 7.76619363645364e-06,
+      "loss": 0.909,
+      "step": 28980
+    },
+    {
+      "epoch": 1.1465522355593347,
+      "grad_norm": 1.0697272033235352,
+      "learning_rate": 7.764276563533087e-06,
+      "loss": 0.8892,
+      "step": 28990
+    },
+    {
+      "epoch": 1.1469477347782238,
+      "grad_norm": 1.3159768122619557,
+      "learning_rate": 7.762358905186112e-06,
+      "loss": 0.8651,
+      "step": 29000
+    },
+    {
+      "epoch": 1.1473432339971128,
+      "grad_norm": 1.3905139079102626,
+      "learning_rate": 7.760440661818848e-06,
+      "loss": 0.8786,
+      "step": 29010
+    },
+    {
+      "epoch": 1.1477387332160018,
+      "grad_norm": 1.0774056655973534,
+      "learning_rate": 7.758521833837544e-06,
+      "loss": 0.9042,
+      "step": 29020
+    },
+    {
+      "epoch": 1.1481342324348909,
+      "grad_norm": 1.1019975024925495,
+      "learning_rate": 7.756602421648576e-06,
+      "loss": 0.887,
+      "step": 29030
+    },
+    {
+      "epoch": 1.14852973165378,
+      "grad_norm": 1.147271902231054,
+      "learning_rate": 7.75468242565844e-06,
+      "loss": 0.8909,
+      "step": 29040
+    },
+    {
+      "epoch": 1.148925230872669,
+      "grad_norm": 1.0757767265055174,
+      "learning_rate": 7.75276184627376e-06,
+      "loss": 0.8947,
+      "step": 29050
+    },
+    {
+      "epoch": 1.149320730091558,
+      "grad_norm": 1.3263647051483423,
+      "learning_rate": 7.750840683901284e-06,
+      "loss": 0.8726,
+      "step": 29060
+    },
+    {
+      "epoch": 1.149716229310447,
+      "grad_norm": 1.208365905342983,
+      "learning_rate": 7.748918938947878e-06,
+      "loss": 0.8914,
+      "step": 29070
+    },
+    {
+      "epoch": 1.150111728529336,
+      "grad_norm": 1.1085705403269703,
+      "learning_rate": 7.746996611820534e-06,
+      "loss": 0.8851,
+      "step": 29080
+    },
+    {
+      "epoch": 1.1505072277482251,
+      "grad_norm": 1.1023262182528024,
+      "learning_rate": 7.74507370292637e-06,
+      "loss": 0.8884,
+      "step": 29090
+    },
+    {
+      "epoch": 1.1509027269671142,
+      "grad_norm": 1.2404080016517403,
+      "learning_rate": 7.743150212672628e-06,
+      "loss": 0.8761,
+      "step": 29100
+    },
+    {
+      "epoch": 1.1512982261860032,
+      "grad_norm": 1.2438165887873858,
+      "learning_rate": 7.741226141466665e-06,
+      "loss": 0.9016,
+      "step": 29110
+    },
+    {
+      "epoch": 1.1516937254048922,
+      "grad_norm": 1.1189188385441424,
+      "learning_rate": 7.739301489715968e-06,
+      "loss": 0.8896,
+      "step": 29120
+    },
+    {
+      "epoch": 1.1520892246237813,
+      "grad_norm": 1.1027902423933102,
+      "learning_rate": 7.737376257828146e-06,
+      "loss": 0.8788,
+      "step": 29130
+    },
+    {
+      "epoch": 1.1524847238426705,
+      "grad_norm": 1.3113043837539398,
+      "learning_rate": 7.73545044621093e-06,
+      "loss": 0.8927,
+      "step": 29140
+    },
+    {
+      "epoch": 1.1528802230615596,
+      "grad_norm": 1.211483238559461,
+      "learning_rate": 7.733524055272173e-06,
+      "loss": 0.8883,
+      "step": 29150
+    },
+    {
+      "epoch": 1.1532757222804486,
+      "grad_norm": 1.1649328856987111,
+      "learning_rate": 7.731597085419853e-06,
+      "loss": 0.8893,
+      "step": 29160
+    },
+    {
+      "epoch": 1.1536712214993377,
+      "grad_norm": 1.157832606910769,
+      "learning_rate": 7.729669537062069e-06,
+      "loss": 0.8873,
+      "step": 29170
+    },
+    {
+      "epoch": 1.1540667207182267,
+      "grad_norm": 1.1741372092507605,
+      "learning_rate": 7.727741410607042e-06,
+      "loss": 0.8723,
+      "step": 29180
+    },
+    {
+      "epoch": 1.1544622199371157,
+      "grad_norm": 1.1977631063198806,
+      "learning_rate": 7.725812706463116e-06,
+      "loss": 0.9027,
+      "step": 29190
+    },
+    {
+      "epoch": 1.1548577191560048,
+      "grad_norm": 1.1654335010269374,
+      "learning_rate": 7.723883425038759e-06,
+      "loss": 0.8615,
+      "step": 29200
+    },
+    {
+      "epoch": 1.1552532183748938,
+      "grad_norm": 1.0905358408224222,
+      "learning_rate": 7.721953566742558e-06,
+      "loss": 0.8721,
+      "step": 29210
+    },
+    {
+      "epoch": 1.1556487175937828,
+      "grad_norm": 1.0874304542981303,
+      "learning_rate": 7.720023131983224e-06,
+      "loss": 0.8811,
+      "step": 29220
+    },
+    {
+      "epoch": 1.1560442168126719,
+      "grad_norm": 1.3797745965288783,
+      "learning_rate": 7.71809212116959e-06,
+      "loss": 0.8823,
+      "step": 29230
+    },
+    {
+      "epoch": 1.156439716031561,
+      "grad_norm": 1.4178610596746921,
+      "learning_rate": 7.716160534710613e-06,
+      "loss": 0.8949,
+      "step": 29240
+    },
+    {
+      "epoch": 1.15683521525045,
+      "grad_norm": 1.0866428931632974,
+      "learning_rate": 7.71422837301537e-06,
+      "loss": 0.8733,
+      "step": 29250
+    },
+    {
+      "epoch": 1.157230714469339,
+      "grad_norm": 1.0956032993001137,
+      "learning_rate": 7.712295636493058e-06,
+      "loss": 0.8892,
+      "step": 29260
+    },
+    {
+      "epoch": 1.157626213688228,
+      "grad_norm": 1.334593375690845,
+      "learning_rate": 7.710362325552994e-06,
+      "loss": 0.8722,
+      "step": 29270
+    },
+    {
+      "epoch": 1.158021712907117,
+      "grad_norm": 1.3022906615402798,
+      "learning_rate": 7.708428440604627e-06,
+      "loss": 0.8712,
+      "step": 29280
+    },
+    {
+      "epoch": 1.1584172121260061,
+      "grad_norm": 1.3478174949321362,
+      "learning_rate": 7.706493982057516e-06,
+      "loss": 0.8932,
+      "step": 29290
+    },
+    {
+      "epoch": 1.1588127113448952,
+      "grad_norm": 1.324967160585542,
+      "learning_rate": 7.704558950321348e-06,
+      "loss": 0.8824,
+      "step": 29300
+    },
+    {
+      "epoch": 1.1592082105637842,
+      "grad_norm": 1.1943042609215033,
+      "learning_rate": 7.702623345805932e-06,
+      "loss": 0.8937,
+      "step": 29310
+    },
+    {
+      "epoch": 1.1596037097826732,
+      "grad_norm": 1.1677868388561077,
+      "learning_rate": 7.700687168921189e-06,
+      "loss": 0.8812,
+      "step": 29320
+    },
+    {
+      "epoch": 1.1599992090015623,
+      "grad_norm": 1.1426773226082723,
+      "learning_rate": 7.698750420077174e-06,
+      "loss": 0.892,
+      "step": 29330
+    },
+    {
+      "epoch": 1.1603947082204513,
+      "grad_norm": 1.1001815850326977,
+      "learning_rate": 7.696813099684056e-06,
+      "loss": 0.8754,
+      "step": 29340
+    },
+    {
+      "epoch": 1.1607902074393404,
+      "grad_norm": 1.256356849971641,
+      "learning_rate": 7.694875208152126e-06,
+      "loss": 0.8961,
+      "step": 29350
+    },
+    {
+      "epoch": 1.1611857066582294,
+      "grad_norm": 1.1715949183296233,
+      "learning_rate": 7.692936745891796e-06,
+      "loss": 0.8982,
+      "step": 29360
+    },
+    {
+      "epoch": 1.1615812058771184,
+      "grad_norm": 1.1536921404387677,
+      "learning_rate": 7.690997713313599e-06,
+      "loss": 0.8625,
+      "step": 29370
+    },
+    {
+      "epoch": 1.1619767050960075,
+      "grad_norm": 1.0931891575222479,
+      "learning_rate": 7.689058110828189e-06,
+      "loss": 0.8882,
+      "step": 29380
+    },
+    {
+      "epoch": 1.1623722043148965,
+      "grad_norm": 1.0880714299836853,
+      "learning_rate": 7.68711793884634e-06,
+      "loss": 0.8923,
+      "step": 29390
+    },
+    {
+      "epoch": 1.1627677035337856,
+      "grad_norm": 1.0809193901746335,
+      "learning_rate": 7.685177197778948e-06,
+      "loss": 0.8988,
+      "step": 29400
+    },
+    {
+      "epoch": 1.1631632027526746,
+      "grad_norm": 1.2882994424366716,
+      "learning_rate": 7.683235888037028e-06,
+      "loss": 0.9088,
+      "step": 29410
+    },
+    {
+      "epoch": 1.1635587019715636,
+      "grad_norm": 1.261155261186555,
+      "learning_rate": 7.681294010031719e-06,
+      "loss": 0.8872,
+      "step": 29420
+    },
+    {
+      "epoch": 1.1639542011904527,
+      "grad_norm": 1.104916235432575,
+      "learning_rate": 7.679351564174273e-06,
+      "loss": 0.8725,
+      "step": 29430
+    },
+    {
+      "epoch": 1.1643497004093417,
+      "grad_norm": 1.2184303141927162,
+      "learning_rate": 7.677408550876069e-06,
+      "loss": 0.9017,
+      "step": 29440
+    },
+    {
+      "epoch": 1.1647451996282308,
+      "grad_norm": 1.3714527644520074,
+      "learning_rate": 7.675464970548604e-06,
+      "loss": 0.895,
+      "step": 29450
+    },
+    {
+      "epoch": 1.1651406988471198,
+      "grad_norm": 1.057714246125476,
+      "learning_rate": 7.673520823603496e-06,
+      "loss": 0.8847,
+      "step": 29460
+    },
+    {
+      "epoch": 1.1655361980660088,
+      "grad_norm": 1.1613973437659404,
+      "learning_rate": 7.671576110452479e-06,
+      "loss": 0.8913,
+      "step": 29470
+    },
+    {
+      "epoch": 1.1659316972848979,
+      "grad_norm": 1.1202199557851293,
+      "learning_rate": 7.669630831507412e-06,
+      "loss": 0.8834,
+      "step": 29480
+    },
+    {
+      "epoch": 1.166327196503787,
+      "grad_norm": 1.181775828898434,
+      "learning_rate": 7.66768498718027e-06,
+      "loss": 0.8645,
+      "step": 29490
+    },
+    {
+      "epoch": 1.166722695722676,
+      "grad_norm": 1.2438773133442504,
+      "learning_rate": 7.665738577883155e-06,
+      "loss": 0.8844,
+      "step": 29500
+    },
+    {
+      "epoch": 1.167118194941565,
+      "grad_norm": 1.1461310396287698,
+      "learning_rate": 7.663791604028276e-06,
+      "loss": 0.8891,
+      "step": 29510
+    },
+    {
+      "epoch": 1.167513694160454,
+      "grad_norm": 1.11276964052036,
+      "learning_rate": 7.661844066027974e-06,
+      "loss": 0.9014,
+      "step": 29520
+    },
+    {
+      "epoch": 1.167909193379343,
+      "grad_norm": 1.187928335957745,
+      "learning_rate": 7.6598959642947e-06,
+      "loss": 0.8891,
+      "step": 29530
+    },
+    {
+      "epoch": 1.168304692598232,
+      "grad_norm": 1.1000482762167085,
+      "learning_rate": 7.657947299241031e-06,
+      "loss": 0.8972,
+      "step": 29540
+    },
+    {
+      "epoch": 1.1687001918171211,
+      "grad_norm": 1.3865771008026182,
+      "learning_rate": 7.655998071279663e-06,
+      "loss": 0.8526,
+      "step": 29550
+    },
+    {
+      "epoch": 1.1690956910360102,
+      "grad_norm": 1.2635972530918929,
+      "learning_rate": 7.654048280823404e-06,
+      "loss": 0.8805,
+      "step": 29560
+    },
+    {
+      "epoch": 1.1694911902548992,
+      "grad_norm": 1.1103108456563064,
+      "learning_rate": 7.652097928285188e-06,
+      "loss": 0.8936,
+      "step": 29570
+    },
+    {
+      "epoch": 1.1698866894737883,
+      "grad_norm": 1.2117154659725748,
+      "learning_rate": 7.650147014078069e-06,
+      "loss": 0.8735,
+      "step": 29580
+    },
+    {
+      "epoch": 1.1702821886926773,
+      "grad_norm": 1.257496495192249,
+      "learning_rate": 7.648195538615216e-06,
+      "loss": 0.8857,
+      "step": 29590
+    },
+    {
+      "epoch": 1.1706776879115663,
+      "grad_norm": 1.0930912157977482,
+      "learning_rate": 7.646243502309915e-06,
+      "loss": 0.8593,
+      "step": 29600
+    },
+    {
+      "epoch": 1.1710731871304554,
+      "grad_norm": 1.1981145172298138,
+      "learning_rate": 7.644290905575577e-06,
+      "loss": 0.8628,
+      "step": 29610
+    },
+    {
+      "epoch": 1.1714686863493444,
+      "grad_norm": 1.1603747325966454,
+      "learning_rate": 7.642337748825729e-06,
+      "loss": 0.8786,
+      "step": 29620
+    },
+    {
+      "epoch": 1.1718641855682335,
+      "grad_norm": 1.188747653562277,
+      "learning_rate": 7.640384032474013e-06,
+      "loss": 0.8858,
+      "step": 29630
+    },
+    {
+      "epoch": 1.1722596847871225,
+      "grad_norm": 1.2975011549925548,
+      "learning_rate": 7.638429756934196e-06,
+      "loss": 0.8735,
+      "step": 29640
+    },
+    {
+      "epoch": 1.1726551840060115,
+      "grad_norm": 1.0971934138813961,
+      "learning_rate": 7.636474922620156e-06,
+      "loss": 0.8812,
+      "step": 29650
+    },
+    {
+      "epoch": 1.1730506832249006,
+      "grad_norm": 1.1882480484422593,
+      "learning_rate": 7.634519529945899e-06,
+      "loss": 0.8619,
+      "step": 29660
+    },
+    {
+      "epoch": 1.1734461824437896,
+      "grad_norm": 1.216072478683306,
+      "learning_rate": 7.632563579325537e-06,
+      "loss": 0.895,
+      "step": 29670
+    },
+    {
+      "epoch": 1.1738416816626787,
+      "grad_norm": 1.174018572091714,
+      "learning_rate": 7.630607071173314e-06,
+      "loss": 0.8705,
+      "step": 29680
+    },
+    {
+      "epoch": 1.1742371808815677,
+      "grad_norm": 1.0417398323565927,
+      "learning_rate": 7.628650005903582e-06,
+      "loss": 0.8782,
+      "step": 29690
+    },
+    {
+      "epoch": 1.1746326801004567,
+      "grad_norm": 1.0115777617178112,
+      "learning_rate": 7.626692383930811e-06,
+      "loss": 0.8937,
+      "step": 29700
+    },
+    {
+      "epoch": 1.1750281793193458,
+      "grad_norm": 1.0298194108286844,
+      "learning_rate": 7.624734205669594e-06,
+      "loss": 0.8763,
+      "step": 29710
+    },
+    {
+      "epoch": 1.1754236785382348,
+      "grad_norm": 1.2451098686417932,
+      "learning_rate": 7.62277547153464e-06,
+      "loss": 0.8565,
+      "step": 29720
+    },
+    {
+      "epoch": 1.1758191777571239,
+      "grad_norm": 1.2874579115129148,
+      "learning_rate": 7.620816181940776e-06,
+      "loss": 0.8745,
+      "step": 29730
+    },
+    {
+      "epoch": 1.176214676976013,
+      "grad_norm": 1.4712033002626335,
+      "learning_rate": 7.618856337302944e-06,
+      "loss": 0.8836,
+      "step": 29740
+    },
+    {
+      "epoch": 1.176610176194902,
+      "grad_norm": 1.388935780961097,
+      "learning_rate": 7.616895938036207e-06,
+      "loss": 0.8719,
+      "step": 29750
+    },
+    {
+      "epoch": 1.177005675413791,
+      "grad_norm": 1.1775684299699603,
+      "learning_rate": 7.614934984555742e-06,
+      "loss": 0.8866,
+      "step": 29760
+    },
+    {
+      "epoch": 1.17740117463268,
+      "grad_norm": 1.1639626874243392,
+      "learning_rate": 7.6129734772768485e-06,
+      "loss": 0.8914,
+      "step": 29770
+    },
+    {
+      "epoch": 1.177796673851569,
+      "grad_norm": 1.0948925950309525,
+      "learning_rate": 7.611011416614937e-06,
+      "loss": 0.8813,
+      "step": 29780
+    },
+    {
+      "epoch": 1.178192173070458,
+      "grad_norm": 1.1279932390244016,
+      "learning_rate": 7.609048802985542e-06,
+      "loss": 0.8908,
+      "step": 29790
+    },
+    {
+      "epoch": 1.1785876722893471,
+      "grad_norm": 1.379998269514362,
+      "learning_rate": 7.607085636804308e-06,
+      "loss": 0.8873,
+      "step": 29800
+    },
+    {
+      "epoch": 1.1789831715082362,
+      "grad_norm": 1.1807386421380142,
+      "learning_rate": 7.605121918487002e-06,
+      "loss": 0.8842,
+      "step": 29810
+    },
+    {
+      "epoch": 1.1793786707271252,
+      "grad_norm": 1.144539320674775,
+      "learning_rate": 7.603157648449503e-06,
+      "loss": 0.8798,
+      "step": 29820
+    },
+    {
+      "epoch": 1.1797741699460143,
+      "grad_norm": 1.4020064432673762,
+      "learning_rate": 7.601192827107814e-06,
+      "loss": 0.8571,
+      "step": 29830
+    },
+    {
+      "epoch": 1.1801696691649033,
+      "grad_norm": 1.0939681126918015,
+      "learning_rate": 7.599227454878048e-06,
+      "loss": 0.879,
+      "step": 29840
+    },
+    {
+      "epoch": 1.1805651683837923,
+      "grad_norm": 1.155615890244559,
+      "learning_rate": 7.597261532176437e-06,
+      "loss": 0.8907,
+      "step": 29850
+    },
+    {
+      "epoch": 1.1809606676026814,
+      "grad_norm": 1.1912816217492002,
+      "learning_rate": 7.5952950594193295e-06,
+      "loss": 0.8865,
+      "step": 29860
+    },
+    {
+      "epoch": 1.1813561668215704,
+      "grad_norm": 1.1103606289751944,
+      "learning_rate": 7.593328037023193e-06,
+      "loss": 0.8893,
+      "step": 29870
+    },
+    {
+      "epoch": 1.1817516660404594,
+      "grad_norm": 1.183606053747966,
+      "learning_rate": 7.591360465404607e-06,
+      "loss": 0.8831,
+      "step": 29880
+    },
+    {
+      "epoch": 1.1821471652593487,
+      "grad_norm": 1.2617969120463344,
+      "learning_rate": 7.589392344980269e-06,
+      "loss": 0.8826,
+      "step": 29890
+    },
+    {
+      "epoch": 1.1825426644782377,
+      "grad_norm": 1.2071476689982257,
+      "learning_rate": 7.587423676166996e-06,
+      "loss": 0.88,
+      "step": 29900
+    },
+    {
+      "epoch": 1.1829381636971268,
+      "grad_norm": 1.1883887689888764,
+      "learning_rate": 7.585454459381716e-06,
+      "loss": 0.8799,
+      "step": 29910
+    },
+    {
+      "epoch": 1.1833336629160158,
+      "grad_norm": 1.241758646047502,
+      "learning_rate": 7.583484695041476e-06,
+      "loss": 0.8843,
+      "step": 29920
+    },
+    {
+      "epoch": 1.1837291621349049,
+      "grad_norm": 1.052165823201099,
+      "learning_rate": 7.581514383563438e-06,
+      "loss": 0.8853,
+      "step": 29930
+    },
+    {
+      "epoch": 1.184124661353794,
+      "grad_norm": 1.3103002800711077,
+      "learning_rate": 7.579543525364881e-06,
+      "loss": 0.8687,
+      "step": 29940
+    },
+    {
+      "epoch": 1.184520160572683,
+      "grad_norm": 1.1200602792263559,
+      "learning_rate": 7.577572120863199e-06,
+      "loss": 0.8808,
+      "step": 29950
+    },
+    {
+      "epoch": 1.184915659791572,
+      "grad_norm": 1.2705716109868155,
+      "learning_rate": 7.575600170475901e-06,
+      "loss": 0.8754,
+      "step": 29960
+    },
+    {
+      "epoch": 1.185311159010461,
+      "grad_norm": 1.1949716115294393,
+      "learning_rate": 7.573627674620612e-06,
+      "loss": 0.874,
+      "step": 29970
+    },
+    {
+      "epoch": 1.18570665822935,
+      "grad_norm": 1.0711905403840896,
+      "learning_rate": 7.571654633715073e-06,
+      "loss": 0.8919,
+      "step": 29980
+    },
+    {
+      "epoch": 1.186102157448239,
+      "grad_norm": 1.2587671858435558,
+      "learning_rate": 7.569681048177142e-06,
+      "loss": 0.8738,
+      "step": 29990
+    },
+    {
+      "epoch": 1.1864976566671281,
+      "grad_norm": 1.2775112551314405,
+      "learning_rate": 7.567706918424789e-06,
+      "loss": 0.8725,
+      "step": 30000
+    },
+    {
+      "epoch": 1.1868931558860172,
+      "grad_norm": 1.1885472933999128,
+      "learning_rate": 7.5657322448761e-06,
+      "loss": 0.8788,
+      "step": 30010
+    },
+    {
+      "epoch": 1.1872886551049062,
+      "grad_norm": 1.1186515377138577,
+      "learning_rate": 7.563757027949279e-06,
+      "loss": 0.8818,
+      "step": 30020
+    },
+    {
+      "epoch": 1.1876841543237953,
+      "grad_norm": 1.2993810336876137,
+      "learning_rate": 7.561781268062641e-06,
+      "loss": 0.8709,
+      "step": 30030
+    },
+    {
+      "epoch": 1.1880796535426843,
+      "grad_norm": 1.153115171529422,
+      "learning_rate": 7.559804965634621e-06,
+      "loss": 0.8681,
+      "step": 30040
+    },
+    {
+      "epoch": 1.1884751527615733,
+      "grad_norm": 1.2102730861638553,
+      "learning_rate": 7.557828121083764e-06,
+      "loss": 0.8748,
+      "step": 30050
+    },
+    {
+      "epoch": 1.1888706519804624,
+      "grad_norm": 1.5041728367497254,
+      "learning_rate": 7.555850734828732e-06,
+      "loss": 0.8663,
+      "step": 30060
+    },
+    {
+      "epoch": 1.1892661511993514,
+      "grad_norm": 1.2767644170458703,
+      "learning_rate": 7.553872807288303e-06,
+      "loss": 0.8635,
+      "step": 30070
+    },
+    {
+      "epoch": 1.1896616504182405,
+      "grad_norm": 1.1683450132636235,
+      "learning_rate": 7.551894338881365e-06,
+      "loss": 0.8552,
+      "step": 30080
+    },
+    {
+      "epoch": 1.1900571496371295,
+      "grad_norm": 1.2379071855299737,
+      "learning_rate": 7.5499153300269245e-06,
+      "loss": 0.8873,
+      "step": 30090
+    },
+    {
+      "epoch": 1.1904526488560185,
+      "grad_norm": 1.1647630688321888,
+      "learning_rate": 7.547935781144104e-06,
+      "loss": 0.8505,
+      "step": 30100
+    },
+    {
+      "epoch": 1.1908481480749076,
+      "grad_norm": 1.2765442384447536,
+      "learning_rate": 7.545955692652138e-06,
+      "loss": 0.8724,
+      "step": 30110
+    },
+    {
+      "epoch": 1.1912436472937966,
+      "grad_norm": 1.297724651854536,
+      "learning_rate": 7.543975064970374e-06,
+      "loss": 0.8742,
+      "step": 30120
+    },
+    {
+      "epoch": 1.1916391465126857,
+      "grad_norm": 1.1240698859136409,
+      "learning_rate": 7.541993898518274e-06,
+      "loss": 0.8984,
+      "step": 30130
+    },
+    {
+      "epoch": 1.1920346457315747,
+      "grad_norm": 1.3470036819026818,
+      "learning_rate": 7.540012193715416e-06,
+      "loss": 0.8799,
+      "step": 30140
+    },
+    {
+      "epoch": 1.1924301449504637,
+      "grad_norm": 1.2314084608642535,
+      "learning_rate": 7.538029950981491e-06,
+      "loss": 0.8791,
+      "step": 30150
+    },
+    {
+      "epoch": 1.1928256441693528,
+      "grad_norm": 1.1784692893783266,
+      "learning_rate": 7.536047170736305e-06,
+      "loss": 0.8854,
+      "step": 30160
+    },
+    {
+      "epoch": 1.1932211433882418,
+      "grad_norm": 1.14420482272867,
+      "learning_rate": 7.534063853399778e-06,
+      "loss": 0.8971,
+      "step": 30170
+    },
+    {
+      "epoch": 1.1936166426071309,
+      "grad_norm": 1.4487798439281692,
+      "learning_rate": 7.532079999391939e-06,
+      "loss": 0.8622,
+      "step": 30180
+    },
+    {
+      "epoch": 1.19401214182602,
+      "grad_norm": 1.0877524904221965,
+      "learning_rate": 7.530095609132936e-06,
+      "loss": 0.88,
+      "step": 30190
+    },
+    {
+      "epoch": 1.194407641044909,
+      "grad_norm": 1.234983092565341,
+      "learning_rate": 7.528110683043029e-06,
+      "loss": 0.8485,
+      "step": 30200
+    },
+    {
+      "epoch": 1.194803140263798,
+      "grad_norm": 1.1251153792094335,
+      "learning_rate": 7.526125221542593e-06,
+      "loss": 0.8829,
+      "step": 30210
+    },
+    {
+      "epoch": 1.195198639482687,
+      "grad_norm": 1.2267855953598872,
+      "learning_rate": 7.524139225052112e-06,
+      "loss": 0.8819,
+      "step": 30220
+    },
+    {
+      "epoch": 1.195594138701576,
+      "grad_norm": 1.5108971884967024,
+      "learning_rate": 7.522152693992187e-06,
+      "loss": 0.8712,
+      "step": 30230
+    },
+    {
+      "epoch": 1.195989637920465,
+      "grad_norm": 1.2659442593945363,
+      "learning_rate": 7.520165628783532e-06,
+      "loss": 0.878,
+      "step": 30240
+    },
+    {
+      "epoch": 1.1963851371393541,
+      "grad_norm": 1.1497900474574254,
+      "learning_rate": 7.518178029846972e-06,
+      "loss": 0.9073,
+      "step": 30250
+    },
+    {
+      "epoch": 1.1967806363582432,
+      "grad_norm": 1.2919209547468042,
+      "learning_rate": 7.516189897603448e-06,
+      "loss": 0.8426,
+      "step": 30260
+    },
+    {
+      "epoch": 1.1971761355771322,
+      "grad_norm": 1.2807626231224034,
+      "learning_rate": 7.514201232474012e-06,
+      "loss": 0.896,
+      "step": 30270
+    },
+    {
+      "epoch": 1.1975716347960212,
+      "grad_norm": 1.1555291983064853,
+      "learning_rate": 7.512212034879827e-06,
+      "loss": 0.8928,
+      "step": 30280
+    },
+    {
+      "epoch": 1.1979671340149103,
+      "grad_norm": 1.2648593824664756,
+      "learning_rate": 7.510222305242174e-06,
+      "loss": 0.8781,
+      "step": 30290
+    },
+    {
+      "epoch": 1.1983626332337993,
+      "grad_norm": 0.990387571846563,
+      "learning_rate": 7.508232043982443e-06,
+      "loss": 0.8943,
+      "step": 30300
+    },
+    {
+      "epoch": 1.1987581324526884,
+      "grad_norm": 1.2381759611804162,
+      "learning_rate": 7.506241251522135e-06,
+      "loss": 0.8692,
+      "step": 30310
+    },
+    {
+      "epoch": 1.1991536316715774,
+      "grad_norm": 1.3334554431564103,
+      "learning_rate": 7.5042499282828674e-06,
+      "loss": 0.8772,
+      "step": 30320
+    },
+    {
+      "epoch": 1.1995491308904664,
+      "grad_norm": 1.3819279434705443,
+      "learning_rate": 7.50225807468637e-06,
+      "loss": 0.8744,
+      "step": 30330
+    },
+    {
+      "epoch": 1.1999446301093555,
+      "grad_norm": 1.0502563676895056,
+      "learning_rate": 7.5002656911544795e-06,
+      "loss": 0.8953,
+      "step": 30340
+    },
+    {
+      "epoch": 1.2003401293282445,
+      "grad_norm": 1.4035638473126562,
+      "learning_rate": 7.498272778109152e-06,
+      "loss": 0.8698,
+      "step": 30350
+    },
+    {
+      "epoch": 1.2007356285471336,
+      "grad_norm": 1.3338299481943479,
+      "learning_rate": 7.49627933597245e-06,
+      "loss": 0.8816,
+      "step": 30360
+    },
+    {
+      "epoch": 1.2011311277660226,
+      "grad_norm": 1.2140920358452105,
+      "learning_rate": 7.494285365166552e-06,
+      "loss": 0.8594,
+      "step": 30370
+    },
+    {
+      "epoch": 1.2015266269849116,
+      "grad_norm": 1.2231030904066684,
+      "learning_rate": 7.492290866113746e-06,
+      "loss": 0.8767,
+      "step": 30380
+    },
+    {
+      "epoch": 1.2019221262038007,
+      "grad_norm": 1.2316438852221772,
+      "learning_rate": 7.490295839236432e-06,
+      "loss": 0.8808,
+      "step": 30390
+    },
+    {
+      "epoch": 1.2023176254226897,
+      "grad_norm": 1.1362333437213394,
+      "learning_rate": 7.488300284957125e-06,
+      "loss": 0.867,
+      "step": 30400
+    },
+    {
+      "epoch": 1.2027131246415788,
+      "grad_norm": 1.1500453213525457,
+      "learning_rate": 7.486304203698448e-06,
+      "loss": 0.8866,
+      "step": 30410
+    },
+    {
+      "epoch": 1.2031086238604678,
+      "grad_norm": 1.1506962740643867,
+      "learning_rate": 7.484307595883135e-06,
+      "loss": 0.8886,
+      "step": 30420
+    },
+    {
+      "epoch": 1.2035041230793568,
+      "grad_norm": 1.201408204583174,
+      "learning_rate": 7.482310461934036e-06,
+      "loss": 0.8845,
+      "step": 30430
+    },
+    {
+      "epoch": 1.2038996222982459,
+      "grad_norm": 1.2820270144495614,
+      "learning_rate": 7.480312802274108e-06,
+      "loss": 0.8769,
+      "step": 30440
+    },
+    {
+      "epoch": 1.204295121517135,
+      "grad_norm": 1.2680078696978063,
+      "learning_rate": 7.478314617326421e-06,
+      "loss": 0.8921,
+      "step": 30450
+    },
+    {
+      "epoch": 1.204690620736024,
+      "grad_norm": 1.394901389157302,
+      "learning_rate": 7.4763159075141576e-06,
+      "loss": 0.8772,
+      "step": 30460
+    },
+    {
+      "epoch": 1.205086119954913,
+      "grad_norm": 1.2219993598695291,
+      "learning_rate": 7.474316673260611e-06,
+      "loss": 0.8799,
+      "step": 30470
+    },
+    {
+      "epoch": 1.2054816191738023,
+      "grad_norm": 1.2392409578102421,
+      "learning_rate": 7.472316914989182e-06,
+      "loss": 0.8882,
+      "step": 30480
+    },
+    {
+      "epoch": 1.2058771183926913,
+      "grad_norm": 1.2441364063396931,
+      "learning_rate": 7.470316633123386e-06,
+      "loss": 0.861,
+      "step": 30490
+    },
+    {
+      "epoch": 1.2062726176115803,
+      "grad_norm": 1.1590666017279956,
+      "learning_rate": 7.468315828086849e-06,
+      "loss": 0.8659,
+      "step": 30500
+    },
+    {
+      "epoch": 1.2066681168304694,
+      "grad_norm": 1.2702387855919597,
+      "learning_rate": 7.46631450030331e-06,
+      "loss": 0.8672,
+      "step": 30510
+    },
+    {
+      "epoch": 1.2070636160493584,
+      "grad_norm": 1.209849087076642,
+      "learning_rate": 7.464312650196611e-06,
+      "loss": 0.8761,
+      "step": 30520
+    },
+    {
+      "epoch": 1.2074591152682475,
+      "grad_norm": 1.1434322955661456,
+      "learning_rate": 7.462310278190712e-06,
+      "loss": 0.8924,
+      "step": 30530
+    },
+    {
+      "epoch": 1.2078546144871365,
+      "grad_norm": 1.2656374759466265,
+      "learning_rate": 7.4603073847096815e-06,
+      "loss": 0.8682,
+      "step": 30540
+    },
+    {
+      "epoch": 1.2082501137060255,
+      "grad_norm": 1.4873812869617682,
+      "learning_rate": 7.458303970177697e-06,
+      "loss": 0.8901,
+      "step": 30550
+    },
+    {
+      "epoch": 1.2086456129249146,
+      "grad_norm": 1.0741380642659981,
+      "learning_rate": 7.456300035019048e-06,
+      "loss": 0.8773,
+      "step": 30560
+    },
+    {
+      "epoch": 1.2090411121438036,
+      "grad_norm": 1.1483662240291839,
+      "learning_rate": 7.454295579658133e-06,
+      "loss": 0.8761,
+      "step": 30570
+    },
+    {
+      "epoch": 1.2094366113626926,
+      "grad_norm": 1.2671918665761617,
+      "learning_rate": 7.452290604519461e-06,
+      "loss": 0.8786,
+      "step": 30580
+    },
+    {
+      "epoch": 1.2098321105815817,
+      "grad_norm": 1.0809221742041761,
+      "learning_rate": 7.450285110027653e-06,
+      "loss": 0.8728,
+      "step": 30590
+    },
+    {
+      "epoch": 1.2102276098004707,
+      "grad_norm": 1.187613536189826,
+      "learning_rate": 7.448279096607438e-06,
+      "loss": 0.8752,
+      "step": 30600
+    },
+    {
+      "epoch": 1.2106231090193598,
+      "grad_norm": 1.2879164023944698,
+      "learning_rate": 7.446272564683653e-06,
+      "loss": 0.8831,
+      "step": 30610
+    },
+    {
+      "epoch": 1.2110186082382488,
+      "grad_norm": 1.1623723799959365,
+      "learning_rate": 7.44426551468125e-06,
+      "loss": 0.867,
+      "step": 30620
+    },
+    {
+      "epoch": 1.2114141074571378,
+      "grad_norm": 1.3425602614978056,
+      "learning_rate": 7.442257947025286e-06,
+      "loss": 0.8804,
+      "step": 30630
+    },
+    {
+      "epoch": 1.2118096066760269,
+      "grad_norm": 1.1601354914649742,
+      "learning_rate": 7.44024986214093e-06,
+      "loss": 0.857,
+      "step": 30640
+    },
+    {
+      "epoch": 1.212205105894916,
+      "grad_norm": 1.225937988692282,
+      "learning_rate": 7.43824126045346e-06,
+      "loss": 0.883,
+      "step": 30650
+    },
+    {
+      "epoch": 1.212600605113805,
+      "grad_norm": 1.3907266664149571,
+      "learning_rate": 7.4362321423882655e-06,
+      "loss": 0.8629,
+      "step": 30660
+    },
+    {
+      "epoch": 1.212996104332694,
+      "grad_norm": 1.2297971837030044,
+      "learning_rate": 7.4342225083708385e-06,
+      "loss": 0.863,
+      "step": 30670
+    },
+    {
+      "epoch": 1.213391603551583,
+      "grad_norm": 1.1037882143071152,
+      "learning_rate": 7.432212358826789e-06,
+      "loss": 0.849,
+      "step": 30680
+    },
+    {
+      "epoch": 1.213787102770472,
+      "grad_norm": 1.1672533533360685,
+      "learning_rate": 7.430201694181831e-06,
+      "loss": 0.8918,
+      "step": 30690
+    },
+    {
+      "epoch": 1.2141826019893611,
+      "grad_norm": 1.1882231387364985,
+      "learning_rate": 7.428190514861789e-06,
+      "loss": 0.8719,
+      "step": 30700
+    },
+    {
+      "epoch": 1.2145781012082502,
+      "grad_norm": 1.1269240213938998,
+      "learning_rate": 7.426178821292596e-06,
+      "loss": 0.8918,
+      "step": 30710
+    },
+    {
+      "epoch": 1.2149736004271392,
+      "grad_norm": 1.1447869970913642,
+      "learning_rate": 7.424166613900294e-06,
+      "loss": 0.8776,
+      "step": 30720
+    },
+    {
+      "epoch": 1.2153690996460282,
+      "grad_norm": 1.1551866874166905,
+      "learning_rate": 7.422153893111035e-06,
+      "loss": 0.8933,
+      "step": 30730
+    },
+    {
+      "epoch": 1.2157645988649173,
+      "grad_norm": 1.2770598590349849,
+      "learning_rate": 7.420140659351078e-06,
+      "loss": 0.8532,
+      "step": 30740
+    },
+    {
+      "epoch": 1.2161600980838063,
+      "grad_norm": 1.2679285696559792,
+      "learning_rate": 7.4181269130467925e-06,
+      "loss": 0.8682,
+      "step": 30750
+    },
+    {
+      "epoch": 1.2165555973026954,
+      "grad_norm": 1.3037050216813681,
+      "learning_rate": 7.416112654624653e-06,
+      "loss": 0.8673,
+      "step": 30760
+    },
+    {
+      "epoch": 1.2169510965215844,
+      "grad_norm": 1.1891946357108039,
+      "learning_rate": 7.414097884511247e-06,
+      "loss": 0.8855,
+      "step": 30770
+    },
+    {
+      "epoch": 1.2173465957404734,
+      "grad_norm": 1.1199983984461903,
+      "learning_rate": 7.412082603133269e-06,
+      "loss": 0.8768,
+      "step": 30780
+    },
+    {
+      "epoch": 1.2177420949593625,
+      "grad_norm": 1.2732492102802262,
+      "learning_rate": 7.41006681091752e-06,
+      "loss": 0.8824,
+      "step": 30790
+    },
+    {
+      "epoch": 1.2181375941782515,
+      "grad_norm": 1.284205533910894,
+      "learning_rate": 7.408050508290908e-06,
+      "loss": 0.8613,
+      "step": 30800
+    },
+    {
+      "epoch": 1.2185330933971406,
+      "grad_norm": 1.1873043450084546,
+      "learning_rate": 7.4060336956804544e-06,
+      "loss": 0.8675,
+      "step": 30810
+    },
+    {
+      "epoch": 1.2189285926160296,
+      "grad_norm": 1.10807913536078,
+      "learning_rate": 7.404016373513286e-06,
+      "loss": 0.8849,
+      "step": 30820
+    },
+    {
+      "epoch": 1.2193240918349186,
+      "grad_norm": 1.1444073467795979,
+      "learning_rate": 7.401998542216634e-06,
+      "loss": 0.883,
+      "step": 30830
+    },
+    {
+      "epoch": 1.2197195910538077,
+      "grad_norm": 1.2277546753098685,
+      "learning_rate": 7.3999802022178444e-06,
+      "loss": 0.884,
+      "step": 30840
+    },
+    {
+      "epoch": 1.2201150902726967,
+      "grad_norm": 1.3438082688363076,
+      "learning_rate": 7.397961353944363e-06,
+      "loss": 0.8595,
+      "step": 30850
+    },
+    {
+      "epoch": 1.2205105894915858,
+      "grad_norm": 1.2793345614982596,
+      "learning_rate": 7.39594199782375e-06,
+      "loss": 0.858,
+      "step": 30860
+    },
+    {
+      "epoch": 1.2209060887104748,
+      "grad_norm": 1.047414218746642,
+      "learning_rate": 7.3939221342836685e-06,
+      "loss": 0.8765,
+      "step": 30870
+    },
+    {
+      "epoch": 1.2213015879293638,
+      "grad_norm": 0.9868602981324371,
+      "learning_rate": 7.391901763751893e-06,
+      "loss": 0.8717,
+      "step": 30880
+    },
+    {
+      "epoch": 1.2216970871482529,
+      "grad_norm": 1.0432859996866466,
+      "learning_rate": 7.389880886656302e-06,
+      "loss": 0.8845,
+      "step": 30890
+    },
+    {
+      "epoch": 1.222092586367142,
+      "grad_norm": 1.3761920878144458,
+      "learning_rate": 7.387859503424885e-06,
+      "loss": 0.863,
+      "step": 30900
+    },
+    {
+      "epoch": 1.222488085586031,
+      "grad_norm": 1.3523825898214374,
+      "learning_rate": 7.385837614485733e-06,
+      "loss": 0.87,
+      "step": 30910
+    },
+    {
+      "epoch": 1.22288358480492,
+      "grad_norm": 1.1706124295931761,
+      "learning_rate": 7.3838152202670475e-06,
+      "loss": 0.8789,
+      "step": 30920
+    },
+    {
+      "epoch": 1.223279084023809,
+      "grad_norm": 1.112920698040595,
+      "learning_rate": 7.38179232119714e-06,
+      "loss": 0.8622,
+      "step": 30930
+    },
+    {
+      "epoch": 1.223674583242698,
+      "grad_norm": 1.139550207736678,
+      "learning_rate": 7.379768917704423e-06,
+      "loss": 0.8849,
+      "step": 30940
+    },
+    {
+      "epoch": 1.224070082461587,
+      "grad_norm": 1.2182267428090179,
+      "learning_rate": 7.377745010217422e-06,
+      "loss": 0.8751,
+      "step": 30950
+    },
+    {
+      "epoch": 1.2244655816804761,
+      "grad_norm": 1.112034364641804,
+      "learning_rate": 7.375720599164762e-06,
+      "loss": 0.8563,
+      "step": 30960
+    },
+    {
+      "epoch": 1.2248610808993652,
+      "grad_norm": 1.2264328357299235,
+      "learning_rate": 7.373695684975181e-06,
+      "loss": 0.8519,
+      "step": 30970
+    },
+    {
+      "epoch": 1.2252565801182542,
+      "grad_norm": 1.249742937772663,
+      "learning_rate": 7.371670268077521e-06,
+      "loss": 0.8842,
+      "step": 30980
+    },
+    {
+      "epoch": 1.2256520793371433,
+      "grad_norm": 1.0666822579524449,
+      "learning_rate": 7.369644348900728e-06,
+      "loss": 0.8781,
+      "step": 30990
+    },
+    {
+      "epoch": 1.2260475785560323,
+      "grad_norm": 1.349618199259138,
+      "learning_rate": 7.367617927873861e-06,
+      "loss": 0.8565,
+      "step": 31000
+    },
+    {
+      "epoch": 1.2264430777749213,
+      "grad_norm": 1.3947666751637184,
+      "learning_rate": 7.365591005426079e-06,
+      "loss": 0.882,
+      "step": 31010
+    },
+    {
+      "epoch": 1.2268385769938104,
+      "grad_norm": 1.1658629143014931,
+      "learning_rate": 7.36356358198665e-06,
+      "loss": 0.8768,
+      "step": 31020
+    },
+    {
+      "epoch": 1.2272340762126994,
+      "grad_norm": 1.1997589208803603,
+      "learning_rate": 7.361535657984948e-06,
+      "loss": 0.8972,
+      "step": 31030
+    },
+    {
+      "epoch": 1.2276295754315885,
+      "grad_norm": 1.1546975370856547,
+      "learning_rate": 7.3595072338504515e-06,
+      "loss": 0.8625,
+      "step": 31040
+    },
+    {
+      "epoch": 1.2280250746504775,
+      "grad_norm": 1.2293308281607909,
+      "learning_rate": 7.357478310012744e-06,
+      "loss": 0.8672,
+      "step": 31050
+    },
+    {
+      "epoch": 1.2284205738693665,
+      "grad_norm": 1.247945308541937,
+      "learning_rate": 7.355448886901521e-06,
+      "loss": 0.868,
+      "step": 31060
+    },
+    {
+      "epoch": 1.2288160730882556,
+      "grad_norm": 1.3806279092443983,
+      "learning_rate": 7.353418964946579e-06,
+      "loss": 0.8608,
+      "step": 31070
+    },
+    {
+      "epoch": 1.2292115723071446,
+      "grad_norm": 1.2590724085519411,
+      "learning_rate": 7.3513885445778175e-06,
+      "loss": 0.8616,
+      "step": 31080
+    },
+    {
+      "epoch": 1.2296070715260337,
+      "grad_norm": 1.1164248425888081,
+      "learning_rate": 7.349357626225249e-06,
+      "loss": 0.8874,
+      "step": 31090
+    },
+    {
+      "epoch": 1.2300025707449227,
+      "grad_norm": 1.2414045650432723,
+      "learning_rate": 7.347326210318983e-06,
+      "loss": 0.8669,
+      "step": 31100
+    },
+    {
+      "epoch": 1.2303980699638117,
+      "grad_norm": 1.1907321293095392,
+      "learning_rate": 7.34529429728924e-06,
+      "loss": 0.8629,
+      "step": 31110
+    },
+    {
+      "epoch": 1.2307935691827008,
+      "grad_norm": 1.3220774501477472,
+      "learning_rate": 7.3432618875663465e-06,
+      "loss": 0.8654,
+      "step": 31120
+    },
+    {
+      "epoch": 1.2311890684015898,
+      "grad_norm": 1.1760025782629897,
+      "learning_rate": 7.341228981580729e-06,
+      "loss": 0.8547,
+      "step": 31130
+    },
+    {
+      "epoch": 1.2315845676204789,
+      "grad_norm": 1.242813154183523,
+      "learning_rate": 7.339195579762924e-06,
+      "loss": 0.8643,
+      "step": 31140
+    },
+    {
+      "epoch": 1.231980066839368,
+      "grad_norm": 1.295488838746389,
+      "learning_rate": 7.337161682543572e-06,
+      "loss": 0.8717,
+      "step": 31150
+    },
+    {
+      "epoch": 1.232375566058257,
+      "grad_norm": 1.3594940339402584,
+      "learning_rate": 7.335127290353415e-06,
+      "loss": 0.873,
+      "step": 31160
+    },
+    {
+      "epoch": 1.232771065277146,
+      "grad_norm": 1.2253792758014268,
+      "learning_rate": 7.333092403623304e-06,
+      "loss": 0.8618,
+      "step": 31170
+    },
+    {
+      "epoch": 1.233166564496035,
+      "grad_norm": 1.0445603472519083,
+      "learning_rate": 7.3310570227841934e-06,
+      "loss": 0.8651,
+      "step": 31180
+    },
+    {
+      "epoch": 1.233562063714924,
+      "grad_norm": 1.3084948848418392,
+      "learning_rate": 7.329021148267141e-06,
+      "loss": 0.8702,
+      "step": 31190
+    },
+    {
+      "epoch": 1.233957562933813,
+      "grad_norm": 1.22932538642094,
+      "learning_rate": 7.326984780503311e-06,
+      "loss": 0.8705,
+      "step": 31200
+    },
+    {
+      "epoch": 1.2343530621527021,
+      "grad_norm": 1.1620894275450713,
+      "learning_rate": 7.324947919923971e-06,
+      "loss": 0.8686,
+      "step": 31210
+    },
+    {
+      "epoch": 1.2347485613715912,
+      "grad_norm": 1.1566605689648233,
+      "learning_rate": 7.322910566960492e-06,
+      "loss": 0.8581,
+      "step": 31220
+    },
+    {
+      "epoch": 1.2351440605904804,
+      "grad_norm": 1.3485308482619998,
+      "learning_rate": 7.320872722044353e-06,
+      "loss": 0.8544,
+      "step": 31230
+    },
+    {
+      "epoch": 1.2355395598093695,
+      "grad_norm": 1.178213515724198,
+      "learning_rate": 7.318834385607132e-06,
+      "loss": 0.8667,
+      "step": 31240
+    },
+    {
+      "epoch": 1.2359350590282585,
+      "grad_norm": 1.4203837370416543,
+      "learning_rate": 7.316795558080515e-06,
+      "loss": 0.8629,
+      "step": 31250
+    },
+    {
+      "epoch": 1.2363305582471475,
+      "grad_norm": 1.018730164606751,
+      "learning_rate": 7.3147562398962905e-06,
+      "loss": 0.8747,
+      "step": 31260
+    },
+    {
+      "epoch": 1.2367260574660366,
+      "grad_norm": 1.173818356507461,
+      "learning_rate": 7.312716431486352e-06,
+      "loss": 0.8549,
+      "step": 31270
+    },
+    {
+      "epoch": 1.2371215566849256,
+      "grad_norm": 1.1226114907007296,
+      "learning_rate": 7.310676133282694e-06,
+      "loss": 0.8912,
+      "step": 31280
+    },
+    {
+      "epoch": 1.2375170559038147,
+      "grad_norm": 1.4679844843098098,
+      "learning_rate": 7.308635345717419e-06,
+      "loss": 0.8774,
+      "step": 31290
+    },
+    {
+      "epoch": 1.2379125551227037,
+      "grad_norm": 1.20059472400533,
+      "learning_rate": 7.306594069222727e-06,
+      "loss": 0.873,
+      "step": 31300
+    },
+    {
+      "epoch": 1.2383080543415927,
+      "grad_norm": 1.1476944818926023,
+      "learning_rate": 7.304552304230932e-06,
+      "loss": 0.8552,
+      "step": 31310
+    },
+    {
+      "epoch": 1.2387035535604818,
+      "grad_norm": 1.3089906317837405,
+      "learning_rate": 7.302510051174438e-06,
+      "loss": 0.8783,
+      "step": 31320
+    },
+    {
+      "epoch": 1.2390990527793708,
+      "grad_norm": 1.1961947022610788,
+      "learning_rate": 7.300467310485765e-06,
+      "loss": 0.8679,
+      "step": 31330
+    },
+    {
+      "epoch": 1.2394945519982599,
+      "grad_norm": 1.2936985679649506,
+      "learning_rate": 7.298424082597526e-06,
+      "loss": 0.8718,
+      "step": 31340
+    },
+    {
+      "epoch": 1.239890051217149,
+      "grad_norm": 1.2268852158815078,
+      "learning_rate": 7.2963803679424425e-06,
+      "loss": 0.8523,
+      "step": 31350
+    },
+    {
+      "epoch": 1.240285550436038,
+      "grad_norm": 1.298636821660816,
+      "learning_rate": 7.29433616695334e-06,
+      "loss": 0.8774,
+      "step": 31360
+    },
+    {
+      "epoch": 1.240681049654927,
+      "grad_norm": 1.1745987817692933,
+      "learning_rate": 7.292291480063145e-06,
+      "loss": 0.894,
+      "step": 31370
+    },
+    {
+      "epoch": 1.241076548873816,
+      "grad_norm": 1.1828074235921668,
+      "learning_rate": 7.290246307704886e-06,
+      "loss": 0.8853,
+      "step": 31380
+    },
+    {
+      "epoch": 1.241472048092705,
+      "grad_norm": 1.2138855329839873,
+      "learning_rate": 7.288200650311697e-06,
+      "loss": 0.8694,
+      "step": 31390
+    },
+    {
+      "epoch": 1.241867547311594,
+      "grad_norm": 1.2572049646820538,
+      "learning_rate": 7.286154508316809e-06,
+      "loss": 0.8573,
+      "step": 31400
+    },
+    {
+      "epoch": 1.2422630465304831,
+      "grad_norm": 1.1673004556088065,
+      "learning_rate": 7.284107882153566e-06,
+      "loss": 0.8509,
+      "step": 31410
+    },
+    {
+      "epoch": 1.2426585457493722,
+      "grad_norm": 1.185079575213692,
+      "learning_rate": 7.282060772255405e-06,
+      "loss": 0.8892,
+      "step": 31420
+    },
+    {
+      "epoch": 1.2430540449682612,
+      "grad_norm": 1.1505916080441212,
+      "learning_rate": 7.280013179055868e-06,
+      "loss": 0.8849,
+      "step": 31430
+    },
+    {
+      "epoch": 1.2434495441871503,
+      "grad_norm": 1.0699872828468424,
+      "learning_rate": 7.277965102988602e-06,
+      "loss": 0.8568,
+      "step": 31440
+    },
+    {
+      "epoch": 1.2438450434060393,
+      "grad_norm": 1.2363136720009182,
+      "learning_rate": 7.275916544487354e-06,
+      "loss": 0.8657,
+      "step": 31450
+    },
+    {
+      "epoch": 1.2442405426249283,
+      "grad_norm": 1.216989503342513,
+      "learning_rate": 7.273867503985973e-06,
+      "loss": 0.8732,
+      "step": 31460
+    },
+    {
+      "epoch": 1.2446360418438174,
+      "grad_norm": 1.129524993048562,
+      "learning_rate": 7.27181798191841e-06,
+      "loss": 0.8579,
+      "step": 31470
+    },
+    {
+      "epoch": 1.2450315410627064,
+      "grad_norm": 1.312519684527118,
+      "learning_rate": 7.26976797871872e-06,
+      "loss": 0.8722,
+      "step": 31480
+    },
+    {
+      "epoch": 1.2454270402815955,
+      "grad_norm": 1.1194329825990177,
+      "learning_rate": 7.2677174948210596e-06,
+      "loss": 0.861,
+      "step": 31490
+    },
+    {
+      "epoch": 1.2458225395004845,
+      "grad_norm": 1.1578306475693407,
+      "learning_rate": 7.265666530659683e-06,
+      "loss": 0.8624,
+      "step": 31500
+    },
+    {
+      "epoch": 1.2462180387193735,
+      "grad_norm": 1.1203388797303029,
+      "learning_rate": 7.263615086668951e-06,
+      "loss": 0.8739,
+      "step": 31510
+    },
+    {
+      "epoch": 1.2466135379382626,
+      "grad_norm": 1.1642138385927037,
+      "learning_rate": 7.261563163283327e-06,
+      "loss": 0.8655,
+      "step": 31520
+    },
+    {
+      "epoch": 1.2470090371571516,
+      "grad_norm": 1.2330200420848738,
+      "learning_rate": 7.259510760937368e-06,
+      "loss": 0.8702,
+      "step": 31530
+    },
+    {
+      "epoch": 1.2474045363760407,
+      "grad_norm": 1.0892217493232934,
+      "learning_rate": 7.257457880065742e-06,
+      "loss": 0.8817,
+      "step": 31540
+    },
+    {
+      "epoch": 1.2478000355949297,
+      "grad_norm": 1.295547559069332,
+      "learning_rate": 7.25540452110321e-06,
+      "loss": 0.8641,
+      "step": 31550
+    },
+    {
+      "epoch": 1.2481955348138187,
+      "grad_norm": 1.0413299693830773,
+      "learning_rate": 7.253350684484641e-06,
+      "loss": 0.8576,
+      "step": 31560
+    },
+    {
+      "epoch": 1.2485910340327078,
+      "grad_norm": 1.2151881161897842,
+      "learning_rate": 7.2512963706450026e-06,
+      "loss": 0.8564,
+      "step": 31570
+    },
+    {
+      "epoch": 1.2489865332515968,
+      "grad_norm": 1.2842122769745818,
+      "learning_rate": 7.249241580019363e-06,
+      "loss": 0.86,
+      "step": 31580
+    },
+    {
+      "epoch": 1.2493820324704858,
+      "grad_norm": 1.280145148146454,
+      "learning_rate": 7.247186313042891e-06,
+      "loss": 0.86,
+      "step": 31590
+    },
+    {
+      "epoch": 1.2497775316893749,
+      "grad_norm": 1.2544077681597052,
+      "learning_rate": 7.245130570150856e-06,
+      "loss": 0.8483,
+      "step": 31600
+    },
+    {
+      "epoch": 1.250173030908264,
+      "grad_norm": 1.2872839305033144,
+      "learning_rate": 7.243074351778631e-06,
+      "loss": 0.8715,
+      "step": 31610
+    },
+    {
+      "epoch": 1.250568530127153,
+      "grad_norm": 1.160940326062675,
+      "learning_rate": 7.2410176583616866e-06,
+      "loss": 0.8944,
+      "step": 31620
+    },
+    {
+      "epoch": 1.250964029346042,
+      "grad_norm": 1.0401148991120426,
+      "learning_rate": 7.238960490335597e-06,
+      "loss": 0.8754,
+      "step": 31630
+    },
+    {
+      "epoch": 1.251359528564931,
+      "grad_norm": 1.110946732267155,
+      "learning_rate": 7.236902848136033e-06,
+      "loss": 0.8779,
+      "step": 31640
+    },
+    {
+      "epoch": 1.25175502778382,
+      "grad_norm": 1.2022224003429425,
+      "learning_rate": 7.234844732198769e-06,
+      "loss": 0.8547,
+      "step": 31650
+    },
+    {
+      "epoch": 1.2521505270027091,
+      "grad_norm": 1.2423666740154775,
+      "learning_rate": 7.232786142959678e-06,
+      "loss": 0.8685,
+      "step": 31660
+    },
+    {
+      "epoch": 1.2525460262215982,
+      "grad_norm": 1.0535073112479556,
+      "learning_rate": 7.230727080854735e-06,
+      "loss": 0.8794,
+      "step": 31670
+    },
+    {
+      "epoch": 1.2529415254404872,
+      "grad_norm": 1.2446774591081935,
+      "learning_rate": 7.228667546320012e-06,
+      "loss": 0.853,
+      "step": 31680
+    },
+    {
+      "epoch": 1.2533370246593762,
+      "grad_norm": 1.1695945932793697,
+      "learning_rate": 7.226607539791686e-06,
+      "loss": 0.8715,
+      "step": 31690
+    },
+    {
+      "epoch": 1.2537325238782653,
+      "grad_norm": 1.2525059312609752,
+      "learning_rate": 7.224547061706031e-06,
+      "loss": 0.8781,
+      "step": 31700
+    },
+    {
+      "epoch": 1.2541280230971543,
+      "grad_norm": 1.107843234569712,
+      "learning_rate": 7.222486112499417e-06,
+      "loss": 0.8613,
+      "step": 31710
+    },
+    {
+      "epoch": 1.2545235223160434,
+      "grad_norm": 1.433472027281944,
+      "learning_rate": 7.220424692608322e-06,
+      "loss": 0.8629,
+      "step": 31720
+    },
+    {
+      "epoch": 1.2549190215349324,
+      "grad_norm": 1.2121744401639958,
+      "learning_rate": 7.218362802469318e-06,
+      "loss": 0.8724,
+      "step": 31730
+    },
+    {
+      "epoch": 1.2553145207538214,
+      "grad_norm": 1.245647044496111,
+      "learning_rate": 7.2163004425190766e-06,
+      "loss": 0.8755,
+      "step": 31740
+    },
+    {
+      "epoch": 1.2557100199727105,
+      "grad_norm": 1.1793159121658583,
+      "learning_rate": 7.214237613194372e-06,
+      "loss": 0.87,
+      "step": 31750
+    },
+    {
+      "epoch": 1.2561055191915995,
+      "grad_norm": 1.4154921806680583,
+      "learning_rate": 7.212174314932077e-06,
+      "loss": 0.8759,
+      "step": 31760
+    },
+    {
+      "epoch": 1.2565010184104886,
+      "grad_norm": 1.1570660480701687,
+      "learning_rate": 7.2101105481691605e-06,
+      "loss": 0.8484,
+      "step": 31770
+    },
+    {
+      "epoch": 1.2568965176293778,
+      "grad_norm": 1.3312105736122783,
+      "learning_rate": 7.2080463133426935e-06,
+      "loss": 0.8516,
+      "step": 31780
+    },
+    {
+      "epoch": 1.2572920168482669,
+      "grad_norm": 1.120168502556861,
+      "learning_rate": 7.205981610889846e-06,
+      "loss": 0.8654,
+      "step": 31790
+    },
+    {
+      "epoch": 1.257687516067156,
+      "grad_norm": 1.4031897475386614,
+      "learning_rate": 7.203916441247887e-06,
+      "loss": 0.8678,
+      "step": 31800
+    },
+    {
+      "epoch": 1.258083015286045,
+      "grad_norm": 1.2170041226692108,
+      "learning_rate": 7.201850804854182e-06,
+      "loss": 0.862,
+      "step": 31810
+    },
+    {
+      "epoch": 1.258478514504934,
+      "grad_norm": 1.1742007585537348,
+      "learning_rate": 7.199784702146202e-06,
+      "loss": 0.8603,
+      "step": 31820
+    },
+    {
+      "epoch": 1.258874013723823,
+      "grad_norm": 1.3371052527133733,
+      "learning_rate": 7.1977181335615085e-06,
+      "loss": 0.8589,
+      "step": 31830
+    },
+    {
+      "epoch": 1.259269512942712,
+      "grad_norm": 1.222790588857591,
+      "learning_rate": 7.195651099537765e-06,
+      "loss": 0.8806,
+      "step": 31840
+    },
+    {
+      "epoch": 1.259665012161601,
+      "grad_norm": 1.0876102133595091,
+      "learning_rate": 7.193583600512736e-06,
+      "loss": 0.874,
+      "step": 31850
+    },
+    {
+      "epoch": 1.2600605113804901,
+      "grad_norm": 1.1337590631700962,
+      "learning_rate": 7.191515636924281e-06,
+      "loss": 0.8524,
+      "step": 31860
+    },
+    {
+      "epoch": 1.2604560105993792,
+      "grad_norm": 1.3494189781290324,
+      "learning_rate": 7.189447209210359e-06,
+      "loss": 0.8762,
+      "step": 31870
+    },
+    {
+      "epoch": 1.2608515098182682,
+      "grad_norm": 1.2793363660690056,
+      "learning_rate": 7.187378317809028e-06,
+      "loss": 0.8639,
+      "step": 31880
+    },
+    {
+      "epoch": 1.2612470090371573,
+      "grad_norm": 1.0926760706820657,
+      "learning_rate": 7.185308963158445e-06,
+      "loss": 0.8548,
+      "step": 31890
+    },
+    {
+      "epoch": 1.2616425082560463,
+      "grad_norm": 1.4817884694980814,
+      "learning_rate": 7.183239145696862e-06,
+      "loss": 0.8669,
+      "step": 31900
+    },
+    {
+      "epoch": 1.2620380074749353,
+      "grad_norm": 1.138026427359992,
+      "learning_rate": 7.181168865862631e-06,
+      "loss": 0.8842,
+      "step": 31910
+    },
+    {
+      "epoch": 1.2624335066938244,
+      "grad_norm": 1.3158088417233433,
+      "learning_rate": 7.179098124094204e-06,
+      "loss": 0.8721,
+      "step": 31920
+    },
+    {
+      "epoch": 1.2628290059127134,
+      "grad_norm": 1.587337009026462,
+      "learning_rate": 7.177026920830125e-06,
+      "loss": 0.8675,
+      "step": 31930
+    },
+    {
+      "epoch": 1.2632245051316024,
+      "grad_norm": 1.2679009275670674,
+      "learning_rate": 7.174955256509043e-06,
+      "loss": 0.847,
+      "step": 31940
+    },
+    {
+      "epoch": 1.2636200043504915,
+      "grad_norm": 1.0965321520039089,
+      "learning_rate": 7.1728831315696986e-06,
+      "loss": 0.8765,
+      "step": 31950
+    },
+    {
+      "epoch": 1.2640155035693805,
+      "grad_norm": 1.3338168865013313,
+      "learning_rate": 7.170810546450934e-06,
+      "loss": 0.8782,
+      "step": 31960
+    },
+    {
+      "epoch": 1.2644110027882696,
+      "grad_norm": 1.269791458172327,
+      "learning_rate": 7.168737501591685e-06,
+      "loss": 0.8724,
+      "step": 31970
+    },
+    {
+      "epoch": 1.2648065020071586,
+      "grad_norm": 1.1935395137092817,
+      "learning_rate": 7.166663997430989e-06,
+      "loss": 0.8793,
+      "step": 31980
+    },
+    {
+      "epoch": 1.2652020012260476,
+      "grad_norm": 1.3230734527146772,
+      "learning_rate": 7.164590034407978e-06,
+      "loss": 0.8713,
+      "step": 31990
+    },
+    {
+      "epoch": 1.2655975004449367,
+      "grad_norm": 1.1794508212240498,
+      "learning_rate": 7.162515612961882e-06,
+      "loss": 0.8682,
+      "step": 32000
+    },
+    {
+      "epoch": 1.2659929996638257,
+      "grad_norm": 1.4078866696702395,
+      "learning_rate": 7.160440733532029e-06,
+      "loss": 0.8763,
+      "step": 32010
+    },
+    {
+      "epoch": 1.2663884988827148,
+      "grad_norm": 1.074316113246102,
+      "learning_rate": 7.15836539655784e-06,
+      "loss": 0.8666,
+      "step": 32020
+    },
+    {
+      "epoch": 1.2667839981016038,
+      "grad_norm": 1.149400299234721,
+      "learning_rate": 7.1562896024788385e-06,
+      "loss": 0.8615,
+      "step": 32030
+    },
+    {
+      "epoch": 1.2671794973204928,
+      "grad_norm": 1.0906993841549095,
+      "learning_rate": 7.15421335173464e-06,
+      "loss": 0.8645,
+      "step": 32040
+    },
+    {
+      "epoch": 1.2675749965393819,
+      "grad_norm": 1.2182768864036402,
+      "learning_rate": 7.152136644764961e-06,
+      "loss": 0.8609,
+      "step": 32050
+    },
+    {
+      "epoch": 1.267970495758271,
+      "grad_norm": 1.2005887287674193,
+      "learning_rate": 7.150059482009611e-06,
+      "loss": 0.8435,
+      "step": 32060
+    },
+    {
+      "epoch": 1.26836599497716,
+      "grad_norm": 1.1974936061138395,
+      "learning_rate": 7.1479818639084995e-06,
+      "loss": 0.8799,
+      "step": 32070
+    },
+    {
+      "epoch": 1.268761494196049,
+      "grad_norm": 1.2392282115498527,
+      "learning_rate": 7.145903790901627e-06,
+      "loss": 0.8801,
+      "step": 32080
+    },
+    {
+      "epoch": 1.269156993414938,
+      "grad_norm": 1.2631428065902746,
+      "learning_rate": 7.143825263429096e-06,
+      "loss": 0.8544,
+      "step": 32090
+    },
+    {
+      "epoch": 1.269552492633827,
+      "grad_norm": 1.2523901157944977,
+      "learning_rate": 7.141746281931104e-06,
+      "loss": 0.8688,
+      "step": 32100
+    },
+    {
+      "epoch": 1.2699479918527161,
+      "grad_norm": 1.3706967956352027,
+      "learning_rate": 7.139666846847942e-06,
+      "loss": 0.8651,
+      "step": 32110
+    },
+    {
+      "epoch": 1.2703434910716052,
+      "grad_norm": 1.2109669885347163,
+      "learning_rate": 7.137586958619996e-06,
+      "loss": 0.8679,
+      "step": 32120
+    },
+    {
+      "epoch": 1.2707389902904942,
+      "grad_norm": 1.3991337728841258,
+      "learning_rate": 7.135506617687757e-06,
+      "loss": 0.8409,
+      "step": 32130
+    },
+    {
+      "epoch": 1.2711344895093832,
+      "grad_norm": 1.1924039426635944,
+      "learning_rate": 7.133425824491801e-06,
+      "loss": 0.8689,
+      "step": 32140
+    },
+    {
+      "epoch": 1.2715299887282723,
+      "grad_norm": 1.362295132597007,
+      "learning_rate": 7.131344579472805e-06,
+      "loss": 0.8578,
+      "step": 32150
+    },
+    {
+      "epoch": 1.2719254879471613,
+      "grad_norm": 1.2343421632611244,
+      "learning_rate": 7.129262883071543e-06,
+      "loss": 0.8707,
+      "step": 32160
+    },
+    {
+      "epoch": 1.2723209871660504,
+      "grad_norm": 1.1894184926185967,
+      "learning_rate": 7.1271807357288806e-06,
+      "loss": 0.8517,
+      "step": 32170
+    },
+    {
+      "epoch": 1.2727164863849394,
+      "grad_norm": 1.3818950005212534,
+      "learning_rate": 7.125098137885782e-06,
+      "loss": 0.8613,
+      "step": 32180
+    },
+    {
+      "epoch": 1.2731119856038284,
+      "grad_norm": 1.1603924401387735,
+      "learning_rate": 7.123015089983305e-06,
+      "loss": 0.8647,
+      "step": 32190
+    },
+    {
+      "epoch": 1.2735074848227175,
+      "grad_norm": 1.3160384735486619,
+      "learning_rate": 7.120931592462605e-06,
+      "loss": 0.8531,
+      "step": 32200
+    },
+    {
+      "epoch": 1.2739029840416065,
+      "grad_norm": 1.2835214870555722,
+      "learning_rate": 7.118847645764928e-06,
+      "loss": 0.8579,
+      "step": 32210
+    },
+    {
+      "epoch": 1.2742984832604956,
+      "grad_norm": 1.388077953747811,
+      "learning_rate": 7.116763250331621e-06,
+      "loss": 0.8438,
+      "step": 32220
+    },
+    {
+      "epoch": 1.2746939824793846,
+      "grad_norm": 1.2908555534585417,
+      "learning_rate": 7.114678406604122e-06,
+      "loss": 0.8606,
+      "step": 32230
+    },
+    {
+      "epoch": 1.2750894816982736,
+      "grad_norm": 1.2526526548436479,
+      "learning_rate": 7.112593115023966e-06,
+      "loss": 0.8703,
+      "step": 32240
+    },
+    {
+      "epoch": 1.2754849809171627,
+      "grad_norm": 1.2778467661614983,
+      "learning_rate": 7.110507376032782e-06,
+      "loss": 0.875,
+      "step": 32250
+    },
+    {
+      "epoch": 1.2758804801360517,
+      "grad_norm": 1.307059533776455,
+      "learning_rate": 7.108421190072292e-06,
+      "loss": 0.8613,
+      "step": 32260
+    },
+    {
+      "epoch": 1.2762759793549407,
+      "grad_norm": 1.2749438945023903,
+      "learning_rate": 7.106334557584317e-06,
+      "loss": 0.8642,
+      "step": 32270
+    },
+    {
+      "epoch": 1.2766714785738298,
+      "grad_norm": 1.149785518292012,
+      "learning_rate": 7.104247479010769e-06,
+      "loss": 0.8633,
+      "step": 32280
+    },
+    {
+      "epoch": 1.2770669777927188,
+      "grad_norm": 1.1588261175108288,
+      "learning_rate": 7.1021599547936535e-06,
+      "loss": 0.8427,
+      "step": 32290
+    },
+    {
+      "epoch": 1.2774624770116079,
+      "grad_norm": 1.220578410689106,
+      "learning_rate": 7.100071985375077e-06,
+      "loss": 0.8694,
+      "step": 32300
+    },
+    {
+      "epoch": 1.277857976230497,
+      "grad_norm": 1.2845897819526524,
+      "learning_rate": 7.097983571197231e-06,
+      "loss": 0.8577,
+      "step": 32310
+    },
+    {
+      "epoch": 1.278253475449386,
+      "grad_norm": 1.157867286741088,
+      "learning_rate": 7.095894712702408e-06,
+      "loss": 0.8766,
+      "step": 32320
+    },
+    {
+      "epoch": 1.278648974668275,
+      "grad_norm": 1.1440947803786123,
+      "learning_rate": 7.093805410332992e-06,
+      "loss": 0.8814,
+      "step": 32330
+    },
+    {
+      "epoch": 1.279044473887164,
+      "grad_norm": 1.2427330508660894,
+      "learning_rate": 7.091715664531462e-06,
+      "loss": 0.8721,
+      "step": 32340
+    },
+    {
+      "epoch": 1.279439973106053,
+      "grad_norm": 1.155313234504232,
+      "learning_rate": 7.089625475740389e-06,
+      "loss": 0.8949,
+      "step": 32350
+    },
+    {
+      "epoch": 1.279835472324942,
+      "grad_norm": 1.2304585447473886,
+      "learning_rate": 7.0875348444024415e-06,
+      "loss": 0.8515,
+      "step": 32360
+    },
+    {
+      "epoch": 1.2802309715438311,
+      "grad_norm": 1.1588000821111213,
+      "learning_rate": 7.085443770960377e-06,
+      "loss": 0.8797,
+      "step": 32370
+    },
+    {
+      "epoch": 1.2806264707627202,
+      "grad_norm": 1.4707994405581124,
+      "learning_rate": 7.083352255857051e-06,
+      "loss": 0.8673,
+      "step": 32380
+    },
+    {
+      "epoch": 1.2810219699816092,
+      "grad_norm": 1.2138252837973547,
+      "learning_rate": 7.081260299535408e-06,
+      "loss": 0.866,
+      "step": 32390
+    },
+    {
+      "epoch": 1.2814174692004983,
+      "grad_norm": 1.1581313079229234,
+      "learning_rate": 7.079167902438491e-06,
+      "loss": 0.8479,
+      "step": 32400
+    },
+    {
+      "epoch": 1.2818129684193873,
+      "grad_norm": 1.3744146620827908,
+      "learning_rate": 7.0770750650094335e-06,
+      "loss": 0.8642,
+      "step": 32410
+    },
+    {
+      "epoch": 1.2822084676382763,
+      "grad_norm": 1.3730341534264714,
+      "learning_rate": 7.07498178769146e-06,
+      "loss": 0.849,
+      "step": 32420
+    },
+    {
+      "epoch": 1.2826039668571654,
+      "grad_norm": 1.2149439420355683,
+      "learning_rate": 7.072888070927896e-06,
+      "loss": 0.8782,
+      "step": 32430
+    },
+    {
+      "epoch": 1.2829994660760544,
+      "grad_norm": 1.2047264973344871,
+      "learning_rate": 7.070793915162149e-06,
+      "loss": 0.8697,
+      "step": 32440
+    },
+    {
+      "epoch": 1.2833949652949435,
+      "grad_norm": 1.241366024965804,
+      "learning_rate": 7.06869932083773e-06,
+      "loss": 0.8654,
+      "step": 32450
+    },
+    {
+      "epoch": 1.2837904645138325,
+      "grad_norm": 1.2673240329572115,
+      "learning_rate": 7.066604288398235e-06,
+      "loss": 0.8397,
+      "step": 32460
+    },
+    {
+      "epoch": 1.2841859637327215,
+      "grad_norm": 1.4167497689562447,
+      "learning_rate": 7.064508818287357e-06,
+      "loss": 0.8651,
+      "step": 32470
+    },
+    {
+      "epoch": 1.2845814629516106,
+      "grad_norm": 1.3695440545744886,
+      "learning_rate": 7.06241291094888e-06,
+      "loss": 0.8683,
+      "step": 32480
+    },
+    {
+      "epoch": 1.2849769621704996,
+      "grad_norm": 1.2498082325079158,
+      "learning_rate": 7.060316566826684e-06,
+      "loss": 0.8492,
+      "step": 32490
+    },
+    {
+      "epoch": 1.2853724613893887,
+      "grad_norm": 1.267632121482906,
+      "learning_rate": 7.0582197863647375e-06,
+      "loss": 0.8679,
+      "step": 32500
+    },
+    {
+      "epoch": 1.2857679606082777,
+      "grad_norm": 1.2407303913357426,
+      "learning_rate": 7.0561225700071e-06,
+      "loss": 0.8603,
+      "step": 32510
+    },
+    {
+      "epoch": 1.2861634598271667,
+      "grad_norm": 1.0509310829115002,
+      "learning_rate": 7.054024918197928e-06,
+      "loss": 0.8571,
+      "step": 32520
+    },
+    {
+      "epoch": 1.2865589590460558,
+      "grad_norm": 1.1969784447272407,
+      "learning_rate": 7.0519268313814696e-06,
+      "loss": 0.8197,
+      "step": 32530
+    },
+    {
+      "epoch": 1.2869544582649448,
+      "grad_norm": 1.1805631308961655,
+      "learning_rate": 7.049828310002063e-06,
+      "loss": 0.878,
+      "step": 32540
+    },
+    {
+      "epoch": 1.2873499574838339,
+      "grad_norm": 1.211590238409041,
+      "learning_rate": 7.047729354504136e-06,
+      "loss": 0.881,
+      "step": 32550
+    },
+    {
+      "epoch": 1.287745456702723,
+      "grad_norm": 1.3159108830343862,
+      "learning_rate": 7.045629965332215e-06,
+      "loss": 0.8667,
+      "step": 32560
+    },
+    {
+      "epoch": 1.288140955921612,
+      "grad_norm": 1.306342828026448,
+      "learning_rate": 7.0435301429309145e-06,
+      "loss": 0.8824,
+      "step": 32570
+    },
+    {
+      "epoch": 1.288536455140501,
+      "grad_norm": 1.3214931389988513,
+      "learning_rate": 7.041429887744938e-06,
+      "loss": 0.8465,
+      "step": 32580
+    },
+    {
+      "epoch": 1.28893195435939,
+      "grad_norm": 0.9889328389785522,
+      "learning_rate": 7.039329200219087e-06,
+      "loss": 0.858,
+      "step": 32590
+    },
+    {
+      "epoch": 1.289327453578279,
+      "grad_norm": 1.1349958066128845,
+      "learning_rate": 7.0372280807982484e-06,
+      "loss": 0.8712,
+      "step": 32600
+    },
+    {
+      "epoch": 1.289722952797168,
+      "grad_norm": 1.258006047398987,
+      "learning_rate": 7.035126529927405e-06,
+      "loss": 0.847,
+      "step": 32610
+    },
+    {
+      "epoch": 1.2901184520160573,
+      "grad_norm": 1.1023639853239255,
+      "learning_rate": 7.033024548051629e-06,
+      "loss": 0.8653,
+      "step": 32620
+    },
+    {
+      "epoch": 1.2905139512349464,
+      "grad_norm": 1.3152795537492687,
+      "learning_rate": 7.030922135616083e-06,
+      "loss": 0.8741,
+      "step": 32630
+    },
+    {
+      "epoch": 1.2909094504538354,
+      "grad_norm": 1.1806476932698309,
+      "learning_rate": 7.028819293066024e-06,
+      "loss": 0.8672,
+      "step": 32640
+    },
+    {
+      "epoch": 1.2913049496727245,
+      "grad_norm": 1.1141376258193387,
+      "learning_rate": 7.026716020846796e-06,
+      "loss": 0.8784,
+      "step": 32650
+    },
+    {
+      "epoch": 1.2917004488916135,
+      "grad_norm": 1.2529273972844328,
+      "learning_rate": 7.0246123194038365e-06,
+      "loss": 0.8804,
+      "step": 32660
+    },
+    {
+      "epoch": 1.2920959481105025,
+      "grad_norm": 1.4931997642184218,
+      "learning_rate": 7.022508189182674e-06,
+      "loss": 0.8303,
+      "step": 32670
+    },
+    {
+      "epoch": 1.2924914473293916,
+      "grad_norm": 1.3230802120903722,
+      "learning_rate": 7.020403630628928e-06,
+      "loss": 0.8566,
+      "step": 32680
+    },
+    {
+      "epoch": 1.2928869465482806,
+      "grad_norm": 1.227822924653676,
+      "learning_rate": 7.018298644188306e-06,
+      "loss": 0.8539,
+      "step": 32690
+    },
+    {
+      "epoch": 1.2932824457671697,
+      "grad_norm": 1.1533594887464231,
+      "learning_rate": 7.016193230306609e-06,
+      "loss": 0.8614,
+      "step": 32700
+    },
+    {
+      "epoch": 1.2936779449860587,
+      "grad_norm": 1.1929006616099462,
+      "learning_rate": 7.014087389429729e-06,
+      "loss": 0.8575,
+      "step": 32710
+    },
+    {
+      "epoch": 1.2940734442049477,
+      "grad_norm": 1.1832882787041001,
+      "learning_rate": 7.011981122003644e-06,
+      "loss": 0.8453,
+      "step": 32720
+    },
+    {
+      "epoch": 1.2944689434238368,
+      "grad_norm": 1.1516203437159556,
+      "learning_rate": 7.009874428474428e-06,
+      "loss": 0.8541,
+      "step": 32730
+    },
+    {
+      "epoch": 1.2948644426427258,
+      "grad_norm": 1.2584537903986432,
+      "learning_rate": 7.007767309288241e-06,
+      "loss": 0.8529,
+      "step": 32740
+    },
+    {
+      "epoch": 1.2952599418616149,
+      "grad_norm": 1.279138274868544,
+      "learning_rate": 7.005659764891336e-06,
+      "loss": 0.881,
+      "step": 32750
+    },
+    {
+      "epoch": 1.295655441080504,
+      "grad_norm": 1.2066545095486305,
+      "learning_rate": 7.003551795730053e-06,
+      "loss": 0.8529,
+      "step": 32760
+    },
+    {
+      "epoch": 1.296050940299393,
+      "grad_norm": 1.330795180593828,
+      "learning_rate": 7.001443402250827e-06,
+      "loss": 0.8731,
+      "step": 32770
+    },
+    {
+      "epoch": 1.296446439518282,
+      "grad_norm": 1.2399417490803584,
+      "learning_rate": 6.999334584900176e-06,
+      "loss": 0.8499,
+      "step": 32780
+    },
+    {
+      "epoch": 1.296841938737171,
+      "grad_norm": 1.1636648029297088,
+      "learning_rate": 6.997225344124713e-06,
+      "loss": 0.8628,
+      "step": 32790
+    },
+    {
+      "epoch": 1.29723743795606,
+      "grad_norm": 1.307264817924364,
+      "learning_rate": 6.99511568037114e-06,
+      "loss": 0.8615,
+      "step": 32800
+    },
+    {
+      "epoch": 1.297632937174949,
+      "grad_norm": 1.1892063499841834,
+      "learning_rate": 6.993005594086245e-06,
+      "loss": 0.8601,
+      "step": 32810
+    },
+    {
+      "epoch": 1.2980284363938381,
+      "grad_norm": 1.2240503677208405,
+      "learning_rate": 6.99089508571691e-06,
+      "loss": 0.8462,
+      "step": 32820
+    },
+    {
+      "epoch": 1.2984239356127272,
+      "grad_norm": 1.2772172323264246,
+      "learning_rate": 6.988784155710104e-06,
+      "loss": 0.8637,
+      "step": 32830
+    },
+    {
+      "epoch": 1.2988194348316162,
+      "grad_norm": 1.2301862880175405,
+      "learning_rate": 6.9866728045128865e-06,
+      "loss": 0.8436,
+      "step": 32840
+    },
+    {
+      "epoch": 1.2992149340505053,
+      "grad_norm": 1.6513206215345482,
+      "learning_rate": 6.9845610325724055e-06,
+      "loss": 0.8576,
+      "step": 32850
+    },
+    {
+      "epoch": 1.2996104332693943,
+      "grad_norm": 1.239212025168037,
+      "learning_rate": 6.982448840335898e-06,
+      "loss": 0.8627,
+      "step": 32860
+    },
+    {
+      "epoch": 1.3000059324882833,
+      "grad_norm": 1.2851046566080357,
+      "learning_rate": 6.980336228250688e-06,
+      "loss": 0.8559,
+      "step": 32870
+    },
+    {
+      "epoch": 1.3004014317071724,
+      "grad_norm": 1.5152286359169753,
+      "learning_rate": 6.978223196764193e-06,
+      "loss": 0.8715,
+      "step": 32880
+    },
+    {
+      "epoch": 1.3007969309260614,
+      "grad_norm": 1.1709620035776358,
+      "learning_rate": 6.976109746323918e-06,
+      "loss": 0.88,
+      "step": 32890
+    },
+    {
+      "epoch": 1.3011924301449505,
+      "grad_norm": 1.3306903177738802,
+      "learning_rate": 6.973995877377452e-06,
+      "loss": 0.8581,
+      "step": 32900
+    },
+    {
+      "epoch": 1.3015879293638395,
+      "grad_norm": 1.330333660144508,
+      "learning_rate": 6.971881590372478e-06,
+      "loss": 0.8667,
+      "step": 32910
+    },
+    {
+      "epoch": 1.3019834285827285,
+      "grad_norm": 1.3420758118515692,
+      "learning_rate": 6.969766885756768e-06,
+      "loss": 0.8543,
+      "step": 32920
+    },
+    {
+      "epoch": 1.3023789278016176,
+      "grad_norm": 1.1526467787467316,
+      "learning_rate": 6.967651763978176e-06,
+      "loss": 0.8798,
+      "step": 32930
+    },
+    {
+      "epoch": 1.3027744270205066,
+      "grad_norm": 1.1873607573663607,
+      "learning_rate": 6.96553622548465e-06,
+      "loss": 0.8423,
+      "step": 32940
+    },
+    {
+      "epoch": 1.3031699262393956,
+      "grad_norm": 1.0944298290898553,
+      "learning_rate": 6.963420270724226e-06,
+      "loss": 0.8833,
+      "step": 32950
+    },
+    {
+      "epoch": 1.3035654254582847,
+      "grad_norm": 1.3518531121827924,
+      "learning_rate": 6.961303900145026e-06,
+      "loss": 0.8568,
+      "step": 32960
+    },
+    {
+      "epoch": 1.3039609246771737,
+      "grad_norm": 1.1321459510444931,
+      "learning_rate": 6.959187114195263e-06,
+      "loss": 0.8567,
+      "step": 32970
+    },
+    {
+      "epoch": 1.3043564238960628,
+      "grad_norm": 1.327022156615552,
+      "learning_rate": 6.957069913323235e-06,
+      "loss": 0.8443,
+      "step": 32980
+    },
+    {
+      "epoch": 1.3047519231149518,
+      "grad_norm": 1.2173157735998366,
+      "learning_rate": 6.954952297977326e-06,
+      "loss": 0.8665,
+      "step": 32990
+    },
+    {
+      "epoch": 1.3051474223338408,
+      "grad_norm": 1.0807481929278648,
+      "learning_rate": 6.952834268606012e-06,
+      "loss": 0.8777,
+      "step": 33000
+    },
+    {
+      "epoch": 1.3055429215527299,
+      "grad_norm": 1.3742677359684001,
+      "learning_rate": 6.95071582565786e-06,
+      "loss": 0.8525,
+      "step": 33010
+    },
+    {
+      "epoch": 1.305938420771619,
+      "grad_norm": 1.3606600450995943,
+      "learning_rate": 6.948596969581514e-06,
+      "loss": 0.8384,
+      "step": 33020
+    },
+    {
+      "epoch": 1.306333919990508,
+      "grad_norm": 1.136158220829523,
+      "learning_rate": 6.9464777008257134e-06,
+      "loss": 0.8752,
+      "step": 33030
+    },
+    {
+      "epoch": 1.306729419209397,
+      "grad_norm": 1.1861248660522725,
+      "learning_rate": 6.944358019839282e-06,
+      "loss": 0.8538,
+      "step": 33040
+    },
+    {
+      "epoch": 1.307124918428286,
+      "grad_norm": 1.329806111620537,
+      "learning_rate": 6.942237927071136e-06,
+      "loss": 0.8732,
+      "step": 33050
+    },
+    {
+      "epoch": 1.307520417647175,
+      "grad_norm": 1.206357477593603,
+      "learning_rate": 6.940117422970269e-06,
+      "loss": 0.8291,
+      "step": 33060
+    },
+    {
+      "epoch": 1.3079159168660641,
+      "grad_norm": 1.3414244290383166,
+      "learning_rate": 6.937996507985772e-06,
+      "loss": 0.8689,
+      "step": 33070
+    },
+    {
+      "epoch": 1.3083114160849532,
+      "grad_norm": 1.137362444088307,
+      "learning_rate": 6.935875182566817e-06,
+      "loss": 0.8615,
+      "step": 33080
+    },
+    {
+      "epoch": 1.3087069153038422,
+      "grad_norm": 1.252691957291996,
+      "learning_rate": 6.933753447162663e-06,
+      "loss": 0.8665,
+      "step": 33090
+    },
+    {
+      "epoch": 1.3091024145227312,
+      "grad_norm": 1.2378405415961447,
+      "learning_rate": 6.931631302222659e-06,
+      "loss": 0.8657,
+      "step": 33100
+    },
+    {
+      "epoch": 1.3094979137416203,
+      "grad_norm": 1.2054794615371671,
+      "learning_rate": 6.929508748196238e-06,
+      "loss": 0.8667,
+      "step": 33110
+    },
+    {
+      "epoch": 1.3098934129605095,
+      "grad_norm": 1.291034891508623,
+      "learning_rate": 6.9273857855329205e-06,
+      "loss": 0.8463,
+      "step": 33120
+    },
+    {
+      "epoch": 1.3102889121793986,
+      "grad_norm": 1.3620104750879796,
+      "learning_rate": 6.9252624146823145e-06,
+      "loss": 0.8573,
+      "step": 33130
+    },
+    {
+      "epoch": 1.3106844113982876,
+      "grad_norm": 1.1131473303813413,
+      "learning_rate": 6.923138636094112e-06,
+      "loss": 0.8596,
+      "step": 33140
+    },
+    {
+      "epoch": 1.3110799106171767,
+      "grad_norm": 1.344715684183009,
+      "learning_rate": 6.921014450218096e-06,
+      "loss": 0.8556,
+      "step": 33150
+    },
+    {
+      "epoch": 1.3114754098360657,
+      "grad_norm": 1.3155803367901568,
+      "learning_rate": 6.9188898575041285e-06,
+      "loss": 0.8596,
+      "step": 33160
+    },
+    {
+      "epoch": 1.3118709090549547,
+      "grad_norm": 1.3241146761456173,
+      "learning_rate": 6.916764858402165e-06,
+      "loss": 0.8465,
+      "step": 33170
+    },
+    {
+      "epoch": 1.3122664082738438,
+      "grad_norm": 1.143356208665181,
+      "learning_rate": 6.914639453362243e-06,
+      "loss": 0.873,
+      "step": 33180
+    },
+    {
+      "epoch": 1.3126619074927328,
+      "grad_norm": 1.0973811587257378,
+      "learning_rate": 6.912513642834486e-06,
+      "loss": 0.8522,
+      "step": 33190
+    },
+    {
+      "epoch": 1.3130574067116219,
+      "grad_norm": 1.185291657251887,
+      "learning_rate": 6.910387427269105e-06,
+      "loss": 0.8321,
+      "step": 33200
+    },
+    {
+      "epoch": 1.313452905930511,
+      "grad_norm": 1.2142142428488651,
+      "learning_rate": 6.908260807116397e-06,
+      "loss": 0.8692,
+      "step": 33210
+    },
+    {
+      "epoch": 1.3138484051494,
+      "grad_norm": 1.1024804805762416,
+      "learning_rate": 6.906133782826743e-06,
+      "loss": 0.8821,
+      "step": 33220
+    },
+    {
+      "epoch": 1.314243904368289,
+      "grad_norm": 1.1021963214872073,
+      "learning_rate": 6.904006354850609e-06,
+      "loss": 0.8774,
+      "step": 33230
+    },
+    {
+      "epoch": 1.314639403587178,
+      "grad_norm": 1.1987763176503048,
+      "learning_rate": 6.90187852363855e-06,
+      "loss": 0.872,
+      "step": 33240
+    },
+    {
+      "epoch": 1.315034902806067,
+      "grad_norm": 1.3930806447843427,
+      "learning_rate": 6.899750289641203e-06,
+      "loss": 0.8433,
+      "step": 33250
+    },
+    {
+      "epoch": 1.315430402024956,
+      "grad_norm": 1.247507100098502,
+      "learning_rate": 6.89762165330929e-06,
+      "loss": 0.8576,
+      "step": 33260
+    },
+    {
+      "epoch": 1.3158259012438451,
+      "grad_norm": 1.2370389380123525,
+      "learning_rate": 6.895492615093622e-06,
+      "loss": 0.8533,
+      "step": 33270
+    },
+    {
+      "epoch": 1.3162214004627342,
+      "grad_norm": 1.158688418804201,
+      "learning_rate": 6.893363175445091e-06,
+      "loss": 0.8695,
+      "step": 33280
+    },
+    {
+      "epoch": 1.3166168996816232,
+      "grad_norm": 1.2126993221703164,
+      "learning_rate": 6.891233334814679e-06,
+      "loss": 0.8694,
+      "step": 33290
+    },
+    {
+      "epoch": 1.3170123989005122,
+      "grad_norm": 1.086577661018436,
+      "learning_rate": 6.889103093653446e-06,
+      "loss": 0.8553,
+      "step": 33300
+    },
+    {
+      "epoch": 1.3174078981194013,
+      "grad_norm": 1.2486947425332644,
+      "learning_rate": 6.886972452412544e-06,
+      "loss": 0.8694,
+      "step": 33310
+    },
+    {
+      "epoch": 1.3178033973382903,
+      "grad_norm": 1.1760538021656208,
+      "learning_rate": 6.8848414115432015e-06,
+      "loss": 0.8639,
+      "step": 33320
+    },
+    {
+      "epoch": 1.3181988965571794,
+      "grad_norm": 1.1807796116246225,
+      "learning_rate": 6.882709971496742e-06,
+      "loss": 0.8454,
+      "step": 33330
+    },
+    {
+      "epoch": 1.3185943957760684,
+      "grad_norm": 1.240878394666726,
+      "learning_rate": 6.880578132724564e-06,
+      "loss": 0.8546,
+      "step": 33340
+    },
+    {
+      "epoch": 1.3189898949949574,
+      "grad_norm": 1.1069008351349787,
+      "learning_rate": 6.8784458956781585e-06,
+      "loss": 0.8496,
+      "step": 33350
+    },
+    {
+      "epoch": 1.3193853942138465,
+      "grad_norm": 1.2234019262391245,
+      "learning_rate": 6.876313260809091e-06,
+      "loss": 0.8555,
+      "step": 33360
+    },
+    {
+      "epoch": 1.3197808934327355,
+      "grad_norm": 1.2111034994332135,
+      "learning_rate": 6.8741802285690215e-06,
+      "loss": 0.8604,
+      "step": 33370
+    },
+    {
+      "epoch": 1.3201763926516246,
+      "grad_norm": 1.3217721402187455,
+      "learning_rate": 6.872046799409688e-06,
+      "loss": 0.8537,
+      "step": 33380
+    },
+    {
+      "epoch": 1.3205718918705136,
+      "grad_norm": 1.4869478535469731,
+      "learning_rate": 6.869912973782916e-06,
+      "loss": 0.8646,
+      "step": 33390
+    },
+    {
+      "epoch": 1.3209673910894026,
+      "grad_norm": 1.3113989351988935,
+      "learning_rate": 6.8677787521406106e-06,
+      "loss": 0.8518,
+      "step": 33400
+    },
+    {
+      "epoch": 1.3213628903082917,
+      "grad_norm": 1.2487815947994365,
+      "learning_rate": 6.865644134934765e-06,
+      "loss": 0.8489,
+      "step": 33410
+    },
+    {
+      "epoch": 1.3217583895271807,
+      "grad_norm": 1.3276884583763622,
+      "learning_rate": 6.863509122617455e-06,
+      "loss": 0.8794,
+      "step": 33420
+    },
+    {
+      "epoch": 1.3221538887460698,
+      "grad_norm": 1.2430325573801633,
+      "learning_rate": 6.861373715640838e-06,
+      "loss": 0.8557,
+      "step": 33430
+    },
+    {
+      "epoch": 1.3225493879649588,
+      "grad_norm": 1.1889424770083057,
+      "learning_rate": 6.859237914457158e-06,
+      "loss": 0.8544,
+      "step": 33440
+    },
+    {
+      "epoch": 1.3229448871838478,
+      "grad_norm": 1.0634004600507065,
+      "learning_rate": 6.857101719518741e-06,
+      "loss": 0.8466,
+      "step": 33450
+    },
+    {
+      "epoch": 1.3233403864027369,
+      "grad_norm": 1.3031637661453688,
+      "learning_rate": 6.854965131277994e-06,
+      "loss": 0.8485,
+      "step": 33460
+    },
+    {
+      "epoch": 1.323735885621626,
+      "grad_norm": 1.3549717105227457,
+      "learning_rate": 6.8528281501874125e-06,
+      "loss": 0.8518,
+      "step": 33470
+    },
+    {
+      "epoch": 1.324131384840515,
+      "grad_norm": 1.2769228880239212,
+      "learning_rate": 6.850690776699574e-06,
+      "loss": 0.8333,
+      "step": 33480
+    },
+    {
+      "epoch": 1.324526884059404,
+      "grad_norm": 1.4371119507840768,
+      "learning_rate": 6.848553011267133e-06,
+      "loss": 0.8389,
+      "step": 33490
+    },
+    {
+      "epoch": 1.324922383278293,
+      "grad_norm": 1.1592394839803528,
+      "learning_rate": 6.846414854342834e-06,
+      "loss": 0.852,
+      "step": 33500
+    },
+    {
+      "epoch": 1.325317882497182,
+      "grad_norm": 1.1356945536930556,
+      "learning_rate": 6.844276306379502e-06,
+      "loss": 0.8475,
+      "step": 33510
+    },
+    {
+      "epoch": 1.3257133817160711,
+      "grad_norm": 1.3949911427559105,
+      "learning_rate": 6.8421373678300455e-06,
+      "loss": 0.8538,
+      "step": 33520
+    },
+    {
+      "epoch": 1.3261088809349602,
+      "grad_norm": 1.0593474204779023,
+      "learning_rate": 6.839998039147454e-06,
+      "loss": 0.8697,
+      "step": 33530
+    },
+    {
+      "epoch": 1.3265043801538492,
+      "grad_norm": 1.318571640354346,
+      "learning_rate": 6.837858320784801e-06,
+      "loss": 0.8623,
+      "step": 33540
+    },
+    {
+      "epoch": 1.3268998793727382,
+      "grad_norm": 1.2059943115831888,
+      "learning_rate": 6.835718213195242e-06,
+      "loss": 0.8628,
+      "step": 33550
+    },
+    {
+      "epoch": 1.3272953785916273,
+      "grad_norm": 1.55282491071514,
+      "learning_rate": 6.833577716832016e-06,
+      "loss": 0.8562,
+      "step": 33560
+    },
+    {
+      "epoch": 1.3276908778105163,
+      "grad_norm": 1.1657498620919489,
+      "learning_rate": 6.831436832148443e-06,
+      "loss": 0.8569,
+      "step": 33570
+    },
+    {
+      "epoch": 1.3280863770294054,
+      "grad_norm": 1.2380918473400322,
+      "learning_rate": 6.829295559597924e-06,
+      "loss": 0.8212,
+      "step": 33580
+    },
+    {
+      "epoch": 1.3284818762482944,
+      "grad_norm": 1.2489695909262502,
+      "learning_rate": 6.827153899633947e-06,
+      "loss": 0.865,
+      "step": 33590
+    },
+    {
+      "epoch": 1.3288773754671834,
+      "grad_norm": 1.3043823752910852,
+      "learning_rate": 6.825011852710077e-06,
+      "loss": 0.8637,
+      "step": 33600
+    },
+    {
+      "epoch": 1.3292728746860725,
+      "grad_norm": 1.2951103741104748,
+      "learning_rate": 6.822869419279963e-06,
+      "loss": 0.8836,
+      "step": 33610
+    },
+    {
+      "epoch": 1.3296683739049615,
+      "grad_norm": 1.2701479460945941,
+      "learning_rate": 6.8207265997973335e-06,
+      "loss": 0.8493,
+      "step": 33620
+    },
+    {
+      "epoch": 1.3300638731238505,
+      "grad_norm": 1.2454785089849922,
+      "learning_rate": 6.818583394716005e-06,
+      "loss": 0.8623,
+      "step": 33630
+    },
+    {
+      "epoch": 1.3304593723427396,
+      "grad_norm": 1.0708018093820824,
+      "learning_rate": 6.816439804489869e-06,
+      "loss": 0.8566,
+      "step": 33640
+    },
+    {
+      "epoch": 1.3308548715616286,
+      "grad_norm": 1.4554464804721812,
+      "learning_rate": 6.814295829572904e-06,
+      "loss": 0.8418,
+      "step": 33650
+    },
+    {
+      "epoch": 1.3312503707805177,
+      "grad_norm": 1.1497445530154269,
+      "learning_rate": 6.812151470419164e-06,
+      "loss": 0.8757,
+      "step": 33660
+    },
+    {
+      "epoch": 1.3316458699994067,
+      "grad_norm": 1.3327215587089964,
+      "learning_rate": 6.810006727482789e-06,
+      "loss": 0.8093,
+      "step": 33670
+    },
+    {
+      "epoch": 1.3320413692182957,
+      "grad_norm": 1.2933360065205524,
+      "learning_rate": 6.807861601217998e-06,
+      "loss": 0.8532,
+      "step": 33680
+    },
+    {
+      "epoch": 1.3324368684371848,
+      "grad_norm": 1.3914905252875656,
+      "learning_rate": 6.805716092079093e-06,
+      "loss": 0.861,
+      "step": 33690
+    },
+    {
+      "epoch": 1.3328323676560738,
+      "grad_norm": 1.200523756782097,
+      "learning_rate": 6.803570200520455e-06,
+      "loss": 0.8316,
+      "step": 33700
+    },
+    {
+      "epoch": 1.3332278668749629,
+      "grad_norm": 1.1503021028837674,
+      "learning_rate": 6.801423926996547e-06,
+      "loss": 0.8557,
+      "step": 33710
+    },
+    {
+      "epoch": 1.333623366093852,
+      "grad_norm": 1.4068837535234189,
+      "learning_rate": 6.799277271961915e-06,
+      "loss": 0.8661,
+      "step": 33720
+    },
+    {
+      "epoch": 1.334018865312741,
+      "grad_norm": 1.1911164786380948,
+      "learning_rate": 6.797130235871182e-06,
+      "loss": 0.8535,
+      "step": 33730
+    },
+    {
+      "epoch": 1.33441436453163,
+      "grad_norm": 1.1319739215457856,
+      "learning_rate": 6.794982819179053e-06,
+      "loss": 0.8611,
+      "step": 33740
+    },
+    {
+      "epoch": 1.334809863750519,
+      "grad_norm": 1.2673198760909448,
+      "learning_rate": 6.7928350223403164e-06,
+      "loss": 0.8591,
+      "step": 33750
+    },
+    {
+      "epoch": 1.335205362969408,
+      "grad_norm": 1.0168022634246991,
+      "learning_rate": 6.790686845809835e-06,
+      "loss": 0.8707,
+      "step": 33760
+    },
+    {
+      "epoch": 1.335600862188297,
+      "grad_norm": 1.2179878258959906,
+      "learning_rate": 6.788538290042559e-06,
+      "loss": 0.8362,
+      "step": 33770
+    },
+    {
+      "epoch": 1.3359963614071861,
+      "grad_norm": 1.2245559461484272,
+      "learning_rate": 6.7863893554935165e-06,
+      "loss": 0.8525,
+      "step": 33780
+    },
+    {
+      "epoch": 1.3363918606260752,
+      "grad_norm": 1.2498041038841285,
+      "learning_rate": 6.784240042617811e-06,
+      "loss": 0.8831,
+      "step": 33790
+    },
+    {
+      "epoch": 1.3367873598449642,
+      "grad_norm": 1.1430104987234004,
+      "learning_rate": 6.782090351870634e-06,
+      "loss": 0.8652,
+      "step": 33800
+    },
+    {
+      "epoch": 1.3371828590638533,
+      "grad_norm": 1.3902395695236862,
+      "learning_rate": 6.77994028370725e-06,
+      "loss": 0.8486,
+      "step": 33810
+    },
+    {
+      "epoch": 1.3375783582827423,
+      "grad_norm": 1.4514697568846802,
+      "learning_rate": 6.777789838583009e-06,
+      "loss": 0.8411,
+      "step": 33820
+    },
+    {
+      "epoch": 1.3379738575016313,
+      "grad_norm": 1.337396064903591,
+      "learning_rate": 6.775639016953337e-06,
+      "loss": 0.875,
+      "step": 33830
+    },
+    {
+      "epoch": 1.3383693567205204,
+      "grad_norm": 1.3687051820712348,
+      "learning_rate": 6.773487819273743e-06,
+      "loss": 0.8469,
+      "step": 33840
+    },
+    {
+      "epoch": 1.3387648559394094,
+      "grad_norm": 1.2431201248578243,
+      "learning_rate": 6.771336245999812e-06,
+      "loss": 0.8405,
+      "step": 33850
+    },
+    {
+      "epoch": 1.3391603551582985,
+      "grad_norm": 1.3211559631646084,
+      "learning_rate": 6.769184297587211e-06,
+      "loss": 0.8824,
+      "step": 33860
+    },
+    {
+      "epoch": 1.3395558543771875,
+      "grad_norm": 1.2124737620325858,
+      "learning_rate": 6.767031974491686e-06,
+      "loss": 0.8442,
+      "step": 33870
+    },
+    {
+      "epoch": 1.3399513535960765,
+      "grad_norm": 1.3429892973983668,
+      "learning_rate": 6.7648792771690605e-06,
+      "loss": 0.8538,
+      "step": 33880
+    },
+    {
+      "epoch": 1.3403468528149656,
+      "grad_norm": 1.2078114742764365,
+      "learning_rate": 6.762726206075243e-06,
+      "loss": 0.8583,
+      "step": 33890
+    },
+    {
+      "epoch": 1.3407423520338546,
+      "grad_norm": 1.2348618830683444,
+      "learning_rate": 6.760572761666213e-06,
+      "loss": 0.8637,
+      "step": 33900
+    },
+    {
+      "epoch": 1.3411378512527437,
+      "grad_norm": 1.1934770414561016,
+      "learning_rate": 6.758418944398034e-06,
+      "loss": 0.8365,
+      "step": 33910
+    },
+    {
+      "epoch": 1.3415333504716327,
+      "grad_norm": 1.3589564437514263,
+      "learning_rate": 6.75626475472685e-06,
+      "loss": 0.844,
+      "step": 33920
+    },
+    {
+      "epoch": 1.3419288496905217,
+      "grad_norm": 1.1779583198085724,
+      "learning_rate": 6.754110193108878e-06,
+      "loss": 0.8483,
+      "step": 33930
+    },
+    {
+      "epoch": 1.3423243489094108,
+      "grad_norm": 1.299740785271766,
+      "learning_rate": 6.751955260000419e-06,
+      "loss": 0.8646,
+      "step": 33940
+    },
+    {
+      "epoch": 1.3427198481282998,
+      "grad_norm": 1.5451437127726837,
+      "learning_rate": 6.74979995585785e-06,
+      "loss": 0.8327,
+      "step": 33950
+    },
+    {
+      "epoch": 1.343115347347189,
+      "grad_norm": 1.1520028216361868,
+      "learning_rate": 6.74764428113763e-06,
+      "loss": 0.8452,
+      "step": 33960
+    },
+    {
+      "epoch": 1.343510846566078,
+      "grad_norm": 1.4193225343393296,
+      "learning_rate": 6.7454882362962914e-06,
+      "loss": 0.8527,
+      "step": 33970
+    },
+    {
+      "epoch": 1.3439063457849671,
+      "grad_norm": 1.3050468634412833,
+      "learning_rate": 6.743331821790449e-06,
+      "loss": 0.8536,
+      "step": 33980
+    },
+    {
+      "epoch": 1.3443018450038562,
+      "grad_norm": 1.1616293448504966,
+      "learning_rate": 6.741175038076792e-06,
+      "loss": 0.8427,
+      "step": 33990
+    },
+    {
+      "epoch": 1.3446973442227452,
+      "grad_norm": 1.1662503446306764,
+      "learning_rate": 6.739017885612094e-06,
+      "loss": 0.8633,
+      "step": 34000
+    },
+    {
+      "epoch": 1.3450928434416343,
+      "grad_norm": 1.2772301061234932,
+      "learning_rate": 6.7368603648532e-06,
+      "loss": 0.8514,
+      "step": 34010
+    },
+    {
+      "epoch": 1.3454883426605233,
+      "grad_norm": 1.4417098315570034,
+      "learning_rate": 6.7347024762570366e-06,
+      "loss": 0.8354,
+      "step": 34020
+    },
+    {
+      "epoch": 1.3458838418794123,
+      "grad_norm": 1.164445967431401,
+      "learning_rate": 6.732544220280609e-06,
+      "loss": 0.8376,
+      "step": 34030
+    },
+    {
+      "epoch": 1.3462793410983014,
+      "grad_norm": 1.251551643742644,
+      "learning_rate": 6.730385597380997e-06,
+      "loss": 0.8511,
+      "step": 34040
+    },
+    {
+      "epoch": 1.3466748403171904,
+      "grad_norm": 1.1414447379897568,
+      "learning_rate": 6.728226608015361e-06,
+      "loss": 0.8296,
+      "step": 34050
+    },
+    {
+      "epoch": 1.3470703395360795,
+      "grad_norm": 1.2432911912788316,
+      "learning_rate": 6.726067252640938e-06,
+      "loss": 0.8437,
+      "step": 34060
+    },
+    {
+      "epoch": 1.3474658387549685,
+      "grad_norm": 1.3156479686323665,
+      "learning_rate": 6.723907531715042e-06,
+      "loss": 0.8248,
+      "step": 34070
+    },
+    {
+      "epoch": 1.3478613379738575,
+      "grad_norm": 1.3211936006580414,
+      "learning_rate": 6.721747445695065e-06,
+      "loss": 0.8214,
+      "step": 34080
+    },
+    {
+      "epoch": 1.3482568371927466,
+      "grad_norm": 1.3908615375464553,
+      "learning_rate": 6.719586995038478e-06,
+      "loss": 0.8495,
+      "step": 34090
+    },
+    {
+      "epoch": 1.3486523364116356,
+      "grad_norm": 1.3802599431946245,
+      "learning_rate": 6.717426180202824e-06,
+      "loss": 0.8368,
+      "step": 34100
+    },
+    {
+      "epoch": 1.3490478356305247,
+      "grad_norm": 1.2727497744015028,
+      "learning_rate": 6.715265001645727e-06,
+      "loss": 0.8541,
+      "step": 34110
+    },
+    {
+      "epoch": 1.3494433348494137,
+      "grad_norm": 1.4506087109245869,
+      "learning_rate": 6.713103459824892e-06,
+      "loss": 0.8588,
+      "step": 34120
+    },
+    {
+      "epoch": 1.3498388340683027,
+      "grad_norm": 1.3463215218321456,
+      "learning_rate": 6.710941555198092e-06,
+      "loss": 0.843,
+      "step": 34130
+    },
+    {
+      "epoch": 1.3502343332871918,
+      "grad_norm": 1.3087961888434054,
+      "learning_rate": 6.708779288223182e-06,
+      "loss": 0.8451,
+      "step": 34140
+    },
+    {
+      "epoch": 1.3506298325060808,
+      "grad_norm": 1.1124481495965013,
+      "learning_rate": 6.706616659358094e-06,
+      "loss": 0.8414,
+      "step": 34150
+    },
+    {
+      "epoch": 1.3510253317249699,
+      "grad_norm": 1.2690093272598362,
+      "learning_rate": 6.704453669060838e-06,
+      "loss": 0.8534,
+      "step": 34160
+    },
+    {
+      "epoch": 1.351420830943859,
+      "grad_norm": 1.1536777586610498,
+      "learning_rate": 6.702290317789493e-06,
+      "loss": 0.8673,
+      "step": 34170
+    },
+    {
+      "epoch": 1.351816330162748,
+      "grad_norm": 1.378546467478324,
+      "learning_rate": 6.700126606002224e-06,
+      "loss": 0.8476,
+      "step": 34180
+    },
+    {
+      "epoch": 1.352211829381637,
+      "grad_norm": 1.1295504136948702,
+      "learning_rate": 6.697962534157266e-06,
+      "loss": 0.8521,
+      "step": 34190
+    },
+    {
+      "epoch": 1.352607328600526,
+      "grad_norm": 1.2999837116331456,
+      "learning_rate": 6.695798102712934e-06,
+      "loss": 0.843,
+      "step": 34200
+    },
+    {
+      "epoch": 1.353002827819415,
+      "grad_norm": 1.453651270531441,
+      "learning_rate": 6.693633312127617e-06,
+      "loss": 0.8429,
+      "step": 34210
+    },
+    {
+      "epoch": 1.353398327038304,
+      "grad_norm": 1.0953123100106354,
+      "learning_rate": 6.691468162859779e-06,
+      "loss": 0.8411,
+      "step": 34220
+    },
+    {
+      "epoch": 1.3537938262571931,
+      "grad_norm": 1.410424971680694,
+      "learning_rate": 6.689302655367962e-06,
+      "loss": 0.85,
+      "step": 34230
+    },
+    {
+      "epoch": 1.3541893254760822,
+      "grad_norm": 1.225733223059829,
+      "learning_rate": 6.687136790110786e-06,
+      "loss": 0.8684,
+      "step": 34240
+    },
+    {
+      "epoch": 1.3545848246949712,
+      "grad_norm": 1.18010720556023,
+      "learning_rate": 6.68497056754694e-06,
+      "loss": 0.8368,
+      "step": 34250
+    },
+    {
+      "epoch": 1.3549803239138603,
+      "grad_norm": 1.3575984163740973,
+      "learning_rate": 6.682803988135196e-06,
+      "loss": 0.8386,
+      "step": 34260
+    },
+    {
+      "epoch": 1.3553758231327493,
+      "grad_norm": 1.2307639756995665,
+      "learning_rate": 6.680637052334399e-06,
+      "loss": 0.855,
+      "step": 34270
+    },
+    {
+      "epoch": 1.3557713223516383,
+      "grad_norm": 1.4622867136964524,
+      "learning_rate": 6.678469760603465e-06,
+      "loss": 0.8356,
+      "step": 34280
+    },
+    {
+      "epoch": 1.3561668215705274,
+      "grad_norm": 1.2123444778673635,
+      "learning_rate": 6.676302113401393e-06,
+      "loss": 0.863,
+      "step": 34290
+    },
+    {
+      "epoch": 1.3565623207894164,
+      "grad_norm": 1.1566884293731667,
+      "learning_rate": 6.674134111187252e-06,
+      "loss": 0.8462,
+      "step": 34300
+    },
+    {
+      "epoch": 1.3569578200083054,
+      "grad_norm": 1.5245027198315069,
+      "learning_rate": 6.671965754420187e-06,
+      "loss": 0.8551,
+      "step": 34310
+    },
+    {
+      "epoch": 1.3573533192271945,
+      "grad_norm": 1.5503509937131879,
+      "learning_rate": 6.669797043559419e-06,
+      "loss": 0.8534,
+      "step": 34320
+    },
+    {
+      "epoch": 1.3577488184460835,
+      "grad_norm": 1.5161801579136003,
+      "learning_rate": 6.667627979064246e-06,
+      "loss": 0.8629,
+      "step": 34330
+    },
+    {
+      "epoch": 1.3581443176649726,
+      "grad_norm": 1.3933323360333776,
+      "learning_rate": 6.665458561394037e-06,
+      "loss": 0.8188,
+      "step": 34340
+    },
+    {
+      "epoch": 1.3585398168838616,
+      "grad_norm": 1.2125322859327403,
+      "learning_rate": 6.663288791008238e-06,
+      "loss": 0.8671,
+      "step": 34350
+    },
+    {
+      "epoch": 1.3589353161027506,
+      "grad_norm": 1.09181004939391,
+      "learning_rate": 6.661118668366369e-06,
+      "loss": 0.8522,
+      "step": 34360
+    },
+    {
+      "epoch": 1.3593308153216397,
+      "grad_norm": 1.1541803179161123,
+      "learning_rate": 6.658948193928023e-06,
+      "loss": 0.8571,
+      "step": 34370
+    },
+    {
+      "epoch": 1.3597263145405287,
+      "grad_norm": 1.1401198392344825,
+      "learning_rate": 6.656777368152871e-06,
+      "loss": 0.8594,
+      "step": 34380
+    },
+    {
+      "epoch": 1.3601218137594178,
+      "grad_norm": 1.0777414809532697,
+      "learning_rate": 6.654606191500659e-06,
+      "loss": 0.8619,
+      "step": 34390
+    },
+    {
+      "epoch": 1.3605173129783068,
+      "grad_norm": 1.182354873528943,
+      "learning_rate": 6.6524346644311995e-06,
+      "loss": 0.8496,
+      "step": 34400
+    },
+    {
+      "epoch": 1.3609128121971958,
+      "grad_norm": 1.22832152985886,
+      "learning_rate": 6.65026278740439e-06,
+      "loss": 0.8707,
+      "step": 34410
+    },
+    {
+      "epoch": 1.3613083114160849,
+      "grad_norm": 1.0734913876282295,
+      "learning_rate": 6.648090560880194e-06,
+      "loss": 0.8555,
+      "step": 34420
+    },
+    {
+      "epoch": 1.361703810634974,
+      "grad_norm": 1.3219991065495482,
+      "learning_rate": 6.645917985318653e-06,
+      "loss": 0.8441,
+      "step": 34430
+    },
+    {
+      "epoch": 1.362099309853863,
+      "grad_norm": 1.3031809090034743,
+      "learning_rate": 6.6437450611798805e-06,
+      "loss": 0.8383,
+      "step": 34440
+    },
+    {
+      "epoch": 1.362494809072752,
+      "grad_norm": 1.2354011042759256,
+      "learning_rate": 6.641571788924065e-06,
+      "loss": 0.8269,
+      "step": 34450
+    },
+    {
+      "epoch": 1.3628903082916413,
+      "grad_norm": 1.2798956619441368,
+      "learning_rate": 6.63939816901147e-06,
+      "loss": 0.8216,
+      "step": 34460
+    },
+    {
+      "epoch": 1.3632858075105303,
+      "grad_norm": 1.3927668271795661,
+      "learning_rate": 6.637224201902427e-06,
+      "loss": 0.8109,
+      "step": 34470
+    },
+    {
+      "epoch": 1.3636813067294193,
+      "grad_norm": 1.2234481608957164,
+      "learning_rate": 6.635049888057348e-06,
+      "loss": 0.8444,
+      "step": 34480
+    },
+    {
+      "epoch": 1.3640768059483084,
+      "grad_norm": 1.2518580063674394,
+      "learning_rate": 6.632875227936715e-06,
+      "loss": 0.8522,
+      "step": 34490
+    },
+    {
+      "epoch": 1.3644723051671974,
+      "grad_norm": 1.4531456630970876,
+      "learning_rate": 6.6307002220010826e-06,
+      "loss": 0.821,
+      "step": 34500
+    },
+    {
+      "epoch": 1.3648678043860865,
+      "grad_norm": 1.269765392683203,
+      "learning_rate": 6.6285248707110816e-06,
+      "loss": 0.8729,
+      "step": 34510
+    },
+    {
+      "epoch": 1.3652633036049755,
+      "grad_norm": 1.275439457296418,
+      "learning_rate": 6.626349174527413e-06,
+      "loss": 0.8522,
+      "step": 34520
+    },
+    {
+      "epoch": 1.3656588028238645,
+      "grad_norm": 1.247794904540906,
+      "learning_rate": 6.624173133910852e-06,
+      "loss": 0.8498,
+      "step": 34530
+    },
+    {
+      "epoch": 1.3660543020427536,
+      "grad_norm": 1.3635336662393571,
+      "learning_rate": 6.621996749322247e-06,
+      "loss": 0.8154,
+      "step": 34540
+    },
+    {
+      "epoch": 1.3664498012616426,
+      "grad_norm": 1.2233774831688915,
+      "learning_rate": 6.619820021222518e-06,
+      "loss": 0.8413,
+      "step": 34550
+    },
+    {
+      "epoch": 1.3668453004805317,
+      "grad_norm": 1.4141520546962856,
+      "learning_rate": 6.61764295007266e-06,
+      "loss": 0.839,
+      "step": 34560
+    },
+    {
+      "epoch": 1.3672407996994207,
+      "grad_norm": 1.22293431899252,
+      "learning_rate": 6.61546553633374e-06,
+      "loss": 0.8267,
+      "step": 34570
+    },
+    {
+      "epoch": 1.3676362989183097,
+      "grad_norm": 1.1000438815382645,
+      "learning_rate": 6.613287780466895e-06,
+      "loss": 0.8605,
+      "step": 34580
+    },
+    {
+      "epoch": 1.3680317981371988,
+      "grad_norm": 1.1860041159559163,
+      "learning_rate": 6.6111096829333374e-06,
+      "loss": 0.8306,
+      "step": 34590
+    },
+    {
+      "epoch": 1.3684272973560878,
+      "grad_norm": 1.1880673513559445,
+      "learning_rate": 6.608931244194352e-06,
+      "loss": 0.8447,
+      "step": 34600
+    },
+    {
+      "epoch": 1.3688227965749769,
+      "grad_norm": 1.1607562036365922,
+      "learning_rate": 6.606752464711292e-06,
+      "loss": 0.8428,
+      "step": 34610
+    },
+    {
+      "epoch": 1.369218295793866,
+      "grad_norm": 1.3842871467397313,
+      "learning_rate": 6.604573344945587e-06,
+      "loss": 0.8142,
+      "step": 34620
+    },
+    {
+      "epoch": 1.369613795012755,
+      "grad_norm": 1.320298925785561,
+      "learning_rate": 6.602393885358737e-06,
+      "loss": 0.8763,
+      "step": 34630
+    },
+    {
+      "epoch": 1.370009294231644,
+      "grad_norm": 1.2504716389353607,
+      "learning_rate": 6.600214086412317e-06,
+      "loss": 0.8396,
+      "step": 34640
+    },
+    {
+      "epoch": 1.370404793450533,
+      "grad_norm": 1.3968478723056403,
+      "learning_rate": 6.598033948567968e-06,
+      "loss": 0.8349,
+      "step": 34650
+    },
+    {
+      "epoch": 1.370800292669422,
+      "grad_norm": 1.5643325909913062,
+      "learning_rate": 6.595853472287405e-06,
+      "loss": 0.845,
+      "step": 34660
+    },
+    {
+      "epoch": 1.371195791888311,
+      "grad_norm": 1.4103997179463394,
+      "learning_rate": 6.5936726580324174e-06,
+      "loss": 0.8409,
+      "step": 34670
+    },
+    {
+      "epoch": 1.3715912911072001,
+      "grad_norm": 1.305450632095221,
+      "learning_rate": 6.5914915062648645e-06,
+      "loss": 0.844,
+      "step": 34680
+    },
+    {
+      "epoch": 1.3719867903260892,
+      "grad_norm": 1.1567822256370635,
+      "learning_rate": 6.589310017446675e-06,
+      "loss": 0.8573,
+      "step": 34690
+    },
+    {
+      "epoch": 1.3723822895449782,
+      "grad_norm": 1.196494153500487,
+      "learning_rate": 6.587128192039854e-06,
+      "loss": 0.8542,
+      "step": 34700
+    },
+    {
+      "epoch": 1.3727777887638672,
+      "grad_norm": 1.2730896697999838,
+      "learning_rate": 6.584946030506473e-06,
+      "loss": 0.8669,
+      "step": 34710
+    },
+    {
+      "epoch": 1.3731732879827563,
+      "grad_norm": 1.474010777500466,
+      "learning_rate": 6.5827635333086745e-06,
+      "loss": 0.8581,
+      "step": 34720
+    },
+    {
+      "epoch": 1.3735687872016453,
+      "grad_norm": 1.2589007677404542,
+      "learning_rate": 6.580580700908677e-06,
+      "loss": 0.8618,
+      "step": 34730
+    },
+    {
+      "epoch": 1.3739642864205344,
+      "grad_norm": 1.381117426136932,
+      "learning_rate": 6.578397533768765e-06,
+      "loss": 0.8431,
+      "step": 34740
+    },
+    {
+      "epoch": 1.3743597856394234,
+      "grad_norm": 1.2641158865669493,
+      "learning_rate": 6.576214032351298e-06,
+      "loss": 0.8439,
+      "step": 34750
+    },
+    {
+      "epoch": 1.3747552848583124,
+      "grad_norm": 1.3668795160919065,
+      "learning_rate": 6.574030197118703e-06,
+      "loss": 0.8275,
+      "step": 34760
+    },
+    {
+      "epoch": 1.3751507840772015,
+      "grad_norm": 1.0883437943024825,
+      "learning_rate": 6.5718460285334775e-06,
+      "loss": 0.844,
+      "step": 34770
+    },
+    {
+      "epoch": 1.3755462832960905,
+      "grad_norm": 1.2244829089854836,
+      "learning_rate": 6.5696615270581936e-06,
+      "loss": 0.8334,
+      "step": 34780
+    },
+    {
+      "epoch": 1.3759417825149796,
+      "grad_norm": 1.5283477695391745,
+      "learning_rate": 6.567476693155489e-06,
+      "loss": 0.8374,
+      "step": 34790
+    },
+    {
+      "epoch": 1.3763372817338686,
+      "grad_norm": 1.178492190700992,
+      "learning_rate": 6.565291527288076e-06,
+      "loss": 0.862,
+      "step": 34800
+    },
+    {
+      "epoch": 1.3767327809527576,
+      "grad_norm": 1.561607205011345,
+      "learning_rate": 6.563106029918733e-06,
+      "loss": 0.8537,
+      "step": 34810
+    },
+    {
+      "epoch": 1.3771282801716467,
+      "grad_norm": 1.331429117616901,
+      "learning_rate": 6.5609202015103145e-06,
+      "loss": 0.8465,
+      "step": 34820
+    },
+    {
+      "epoch": 1.3775237793905357,
+      "grad_norm": 1.1821776862930318,
+      "learning_rate": 6.558734042525738e-06,
+      "loss": 0.8703,
+      "step": 34830
+    },
+    {
+      "epoch": 1.3779192786094248,
+      "grad_norm": 1.442958513220388,
+      "learning_rate": 6.556547553427996e-06,
+      "loss": 0.8476,
+      "step": 34840
+    },
+    {
+      "epoch": 1.3783147778283138,
+      "grad_norm": 1.366212394312256,
+      "learning_rate": 6.554360734680148e-06,
+      "loss": 0.8386,
+      "step": 34850
+    },
+    {
+      "epoch": 1.3787102770472028,
+      "grad_norm": 1.2017621365721718,
+      "learning_rate": 6.552173586745327e-06,
+      "loss": 0.8558,
+      "step": 34860
+    },
+    {
+      "epoch": 1.3791057762660919,
+      "grad_norm": 1.3860663031720417,
+      "learning_rate": 6.549986110086733e-06,
+      "loss": 0.8349,
+      "step": 34870
+    },
+    {
+      "epoch": 1.379501275484981,
+      "grad_norm": 1.2532840687301947,
+      "learning_rate": 6.547798305167637e-06,
+      "loss": 0.8376,
+      "step": 34880
+    },
+    {
+      "epoch": 1.37989677470387,
+      "grad_norm": 1.18442886458687,
+      "learning_rate": 6.545610172451374e-06,
+      "loss": 0.8441,
+      "step": 34890
+    },
+    {
+      "epoch": 1.380292273922759,
+      "grad_norm": 1.0962881909095727,
+      "learning_rate": 6.543421712401356e-06,
+      "loss": 0.84,
+      "step": 34900
+    },
+    {
+      "epoch": 1.380687773141648,
+      "grad_norm": 1.1955005513882742,
+      "learning_rate": 6.541232925481064e-06,
+      "loss": 0.8486,
+      "step": 34910
+    },
+    {
+      "epoch": 1.381083272360537,
+      "grad_norm": 1.1142234667903104,
+      "learning_rate": 6.539043812154042e-06,
+      "loss": 0.8517,
+      "step": 34920
+    },
+    {
+      "epoch": 1.3814787715794261,
+      "grad_norm": 1.3360733090763144,
+      "learning_rate": 6.536854372883907e-06,
+      "loss": 0.8432,
+      "step": 34930
+    },
+    {
+      "epoch": 1.3818742707983152,
+      "grad_norm": 1.129238387752422,
+      "learning_rate": 6.534664608134347e-06,
+      "loss": 0.8493,
+      "step": 34940
+    },
+    {
+      "epoch": 1.3822697700172042,
+      "grad_norm": 1.3742686297098083,
+      "learning_rate": 6.532474518369114e-06,
+      "loss": 0.8096,
+      "step": 34950
+    },
+    {
+      "epoch": 1.3826652692360932,
+      "grad_norm": 1.107977512462662,
+      "learning_rate": 6.530284104052034e-06,
+      "loss": 0.8471,
+      "step": 34960
+    },
+    {
+      "epoch": 1.3830607684549823,
+      "grad_norm": 1.2079645630114648,
+      "learning_rate": 6.5280933656469966e-06,
+      "loss": 0.8422,
+      "step": 34970
+    },
+    {
+      "epoch": 1.3834562676738713,
+      "grad_norm": 1.248101903361315,
+      "learning_rate": 6.525902303617964e-06,
+      "loss": 0.839,
+      "step": 34980
+    },
+    {
+      "epoch": 1.3838517668927603,
+      "grad_norm": 1.2550406829010792,
+      "learning_rate": 6.5237109184289645e-06,
+      "loss": 0.8557,
+      "step": 34990
+    },
+    {
+      "epoch": 1.3842472661116494,
+      "grad_norm": 1.201568184603744,
+      "learning_rate": 6.5215192105440986e-06,
+      "loss": 0.8299,
+      "step": 35000
+    },
+    {
+      "epoch": 1.3846427653305384,
+      "grad_norm": 1.3283519945453417,
+      "learning_rate": 6.519327180427532e-06,
+      "loss": 0.8368,
+      "step": 35010
+    },
+    {
+      "epoch": 1.3850382645494275,
+      "grad_norm": 1.31814484909803,
+      "learning_rate": 6.5171348285434965e-06,
+      "loss": 0.8475,
+      "step": 35020
+    },
+    {
+      "epoch": 1.3854337637683165,
+      "grad_norm": 1.2239876943549137,
+      "learning_rate": 6.514942155356295e-06,
+      "loss": 0.8545,
+      "step": 35030
+    },
+    {
+      "epoch": 1.3858292629872055,
+      "grad_norm": 1.514229252479359,
+      "learning_rate": 6.512749161330302e-06,
+      "loss": 0.8487,
+      "step": 35040
+    },
+    {
+      "epoch": 1.3862247622060946,
+      "grad_norm": 1.3827485601012202,
+      "learning_rate": 6.510555846929952e-06,
+      "loss": 0.8478,
+      "step": 35050
+    },
+    {
+      "epoch": 1.3866202614249836,
+      "grad_norm": 1.4008524664576336,
+      "learning_rate": 6.508362212619752e-06,
+      "loss": 0.8395,
+      "step": 35060
+    },
+    {
+      "epoch": 1.3870157606438727,
+      "grad_norm": 1.298598566093244,
+      "learning_rate": 6.506168258864278e-06,
+      "loss": 0.8348,
+      "step": 35070
+    },
+    {
+      "epoch": 1.3874112598627617,
+      "grad_norm": 1.1360269054483327,
+      "learning_rate": 6.503973986128171e-06,
+      "loss": 0.828,
+      "step": 35080
+    },
+    {
+      "epoch": 1.3878067590816507,
+      "grad_norm": 1.4389049366690272,
+      "learning_rate": 6.5017793948761384e-06,
+      "loss": 0.8268,
+      "step": 35090
+    },
+    {
+      "epoch": 1.3882022583005398,
+      "grad_norm": 1.2444008890484277,
+      "learning_rate": 6.499584485572959e-06,
+      "loss": 0.8358,
+      "step": 35100
+    },
+    {
+      "epoch": 1.3885977575194288,
+      "grad_norm": 1.3307423019019782,
+      "learning_rate": 6.497389258683477e-06,
+      "loss": 0.8327,
+      "step": 35110
+    },
+    {
+      "epoch": 1.3889932567383179,
+      "grad_norm": 1.269584181289205,
+      "learning_rate": 6.495193714672604e-06,
+      "loss": 0.8289,
+      "step": 35120
+    },
+    {
+      "epoch": 1.389388755957207,
+      "grad_norm": 1.2438158441455647,
+      "learning_rate": 6.49299785400532e-06,
+      "loss": 0.8602,
+      "step": 35130
+    },
+    {
+      "epoch": 1.389784255176096,
+      "grad_norm": 1.1507379633257733,
+      "learning_rate": 6.490801677146666e-06,
+      "loss": 0.8424,
+      "step": 35140
+    },
+    {
+      "epoch": 1.390179754394985,
+      "grad_norm": 1.3685481941316784,
+      "learning_rate": 6.488605184561758e-06,
+      "loss": 0.8402,
+      "step": 35150
+    },
+    {
+      "epoch": 1.390575253613874,
+      "grad_norm": 1.1434655976407526,
+      "learning_rate": 6.486408376715776e-06,
+      "loss": 0.8555,
+      "step": 35160
+    },
+    {
+      "epoch": 1.390970752832763,
+      "grad_norm": 1.1771405203531213,
+      "learning_rate": 6.484211254073965e-06,
+      "loss": 0.8352,
+      "step": 35170
+    },
+    {
+      "epoch": 1.391366252051652,
+      "grad_norm": 1.2965298049115275,
+      "learning_rate": 6.482013817101637e-06,
+      "loss": 0.8527,
+      "step": 35180
+    },
+    {
+      "epoch": 1.3917617512705411,
+      "grad_norm": 1.1961016513012066,
+      "learning_rate": 6.479816066264174e-06,
+      "loss": 0.8689,
+      "step": 35190
+    },
+    {
+      "epoch": 1.3921572504894302,
+      "grad_norm": 1.3965942730266359,
+      "learning_rate": 6.47761800202702e-06,
+      "loss": 0.8459,
+      "step": 35200
+    },
+    {
+      "epoch": 1.3925527497083192,
+      "grad_norm": 1.396158719703272,
+      "learning_rate": 6.475419624855688e-06,
+      "loss": 0.8479,
+      "step": 35210
+    },
+    {
+      "epoch": 1.3929482489272083,
+      "grad_norm": 1.3669426589320808,
+      "learning_rate": 6.473220935215756e-06,
+      "loss": 0.8302,
+      "step": 35220
+    },
+    {
+      "epoch": 1.3933437481460973,
+      "grad_norm": 1.245650658848858,
+      "learning_rate": 6.471021933572871e-06,
+      "loss": 0.8407,
+      "step": 35230
+    },
+    {
+      "epoch": 1.3937392473649863,
+      "grad_norm": 1.3690219620216622,
+      "learning_rate": 6.4688226203927405e-06,
+      "loss": 0.8263,
+      "step": 35240
+    },
+    {
+      "epoch": 1.3941347465838754,
+      "grad_norm": 1.2439929336670337,
+      "learning_rate": 6.466622996141145e-06,
+      "loss": 0.849,
+      "step": 35250
+    },
+    {
+      "epoch": 1.3945302458027644,
+      "grad_norm": 1.3512940828896634,
+      "learning_rate": 6.464423061283925e-06,
+      "loss": 0.8168,
+      "step": 35260
+    },
+    {
+      "epoch": 1.3949257450216535,
+      "grad_norm": 1.343577251605921,
+      "learning_rate": 6.462222816286989e-06,
+      "loss": 0.8197,
+      "step": 35270
+    },
+    {
+      "epoch": 1.3953212442405425,
+      "grad_norm": 1.1356073346978275,
+      "learning_rate": 6.460022261616312e-06,
+      "loss": 0.8872,
+      "step": 35280
+    },
+    {
+      "epoch": 1.3957167434594315,
+      "grad_norm": 1.6360293026849844,
+      "learning_rate": 6.457821397737932e-06,
+      "loss": 0.838,
+      "step": 35290
+    },
+    {
+      "epoch": 1.3961122426783208,
+      "grad_norm": 1.2370393670702475,
+      "learning_rate": 6.455620225117957e-06,
+      "loss": 0.8389,
+      "step": 35300
+    },
+    {
+      "epoch": 1.3965077418972098,
+      "grad_norm": 1.1784010165804566,
+      "learning_rate": 6.453418744222557e-06,
+      "loss": 0.8389,
+      "step": 35310
+    },
+    {
+      "epoch": 1.3969032411160989,
+      "grad_norm": 1.5870653058814774,
+      "learning_rate": 6.451216955517965e-06,
+      "loss": 0.8416,
+      "step": 35320
+    },
+    {
+      "epoch": 1.397298740334988,
+      "grad_norm": 1.2228657375375633,
+      "learning_rate": 6.449014859470486e-06,
+      "loss": 0.8507,
+      "step": 35330
+    },
+    {
+      "epoch": 1.397694239553877,
+      "grad_norm": 1.4883230500958542,
+      "learning_rate": 6.446812456546483e-06,
+      "loss": 0.8496,
+      "step": 35340
+    },
+    {
+      "epoch": 1.398089738772766,
+      "grad_norm": 1.2356745485880056,
+      "learning_rate": 6.444609747212389e-06,
+      "loss": 0.833,
+      "step": 35350
+    },
+    {
+      "epoch": 1.398485237991655,
+      "grad_norm": 1.426448244450631,
+      "learning_rate": 6.4424067319347e-06,
+      "loss": 0.8633,
+      "step": 35360
+    },
+    {
+      "epoch": 1.398880737210544,
+      "grad_norm": 1.2529223923630732,
+      "learning_rate": 6.44020341117998e-06,
+      "loss": 0.847,
+      "step": 35370
+    },
+    {
+      "epoch": 1.399276236429433,
+      "grad_norm": 1.307762331688145,
+      "learning_rate": 6.437999785414848e-06,
+      "loss": 0.8665,
+      "step": 35380
+    },
+    {
+      "epoch": 1.3996717356483221,
+      "grad_norm": 1.4419356349775445,
+      "learning_rate": 6.435795855105997e-06,
+      "loss": 0.8564,
+      "step": 35390
+    },
+    {
+      "epoch": 1.4000672348672112,
+      "grad_norm": 1.3323260987161705,
+      "learning_rate": 6.433591620720184e-06,
+      "loss": 0.8247,
+      "step": 35400
+    },
+    {
+      "epoch": 1.4004627340861002,
+      "grad_norm": 1.2299128387363623,
+      "learning_rate": 6.431387082724225e-06,
+      "loss": 0.8564,
+      "step": 35410
+    },
+    {
+      "epoch": 1.4008582333049893,
+      "grad_norm": 1.1487522444268858,
+      "learning_rate": 6.4291822415850055e-06,
+      "loss": 0.8352,
+      "step": 35420
+    },
+    {
+      "epoch": 1.4012537325238783,
+      "grad_norm": 1.24818888196066,
+      "learning_rate": 6.4269770977694725e-06,
+      "loss": 0.8652,
+      "step": 35430
+    },
+    {
+      "epoch": 1.4016492317427673,
+      "grad_norm": 1.1213990095052735,
+      "learning_rate": 6.424771651744638e-06,
+      "loss": 0.8485,
+      "step": 35440
+    },
+    {
+      "epoch": 1.4020447309616564,
+      "grad_norm": 1.3691700422138138,
+      "learning_rate": 6.422565903977576e-06,
+      "loss": 0.8526,
+      "step": 35450
+    },
+    {
+      "epoch": 1.4024402301805454,
+      "grad_norm": 1.324240156135189,
+      "learning_rate": 6.4203598549354274e-06,
+      "loss": 0.8728,
+      "step": 35460
+    },
+    {
+      "epoch": 1.4028357293994345,
+      "grad_norm": 1.583707136556271,
+      "learning_rate": 6.4181535050853945e-06,
+      "loss": 0.8281,
+      "step": 35470
+    },
+    {
+      "epoch": 1.4032312286183235,
+      "grad_norm": 1.6615911204562193,
+      "learning_rate": 6.415946854894746e-06,
+      "loss": 0.846,
+      "step": 35480
+    },
+    {
+      "epoch": 1.4036267278372125,
+      "grad_norm": 1.2580396177304356,
+      "learning_rate": 6.413739904830813e-06,
+      "loss": 0.8255,
+      "step": 35490
+    },
+    {
+      "epoch": 1.4040222270561016,
+      "grad_norm": 1.3012727725653996,
+      "learning_rate": 6.411532655360988e-06,
+      "loss": 0.8625,
+      "step": 35500
+    },
+    {
+      "epoch": 1.4044177262749906,
+      "grad_norm": 1.2657373861715762,
+      "learning_rate": 6.40932510695273e-06,
+      "loss": 0.8416,
+      "step": 35510
+    },
+    {
+      "epoch": 1.4048132254938797,
+      "grad_norm": 1.5397906675769235,
+      "learning_rate": 6.4071172600735576e-06,
+      "loss": 0.8549,
+      "step": 35520
+    },
+    {
+      "epoch": 1.4052087247127687,
+      "grad_norm": 1.0685179877664777,
+      "learning_rate": 6.404909115191057e-06,
+      "loss": 0.8546,
+      "step": 35530
+    },
+    {
+      "epoch": 1.4056042239316577,
+      "grad_norm": 1.1858034096888677,
+      "learning_rate": 6.402700672772875e-06,
+      "loss": 0.8248,
+      "step": 35540
+    },
+    {
+      "epoch": 1.4059997231505468,
+      "grad_norm": 1.2330167468195987,
+      "learning_rate": 6.400491933286721e-06,
+      "loss": 0.8444,
+      "step": 35550
+    },
+    {
+      "epoch": 1.4063952223694358,
+      "grad_norm": 1.3432494390382297,
+      "learning_rate": 6.398282897200371e-06,
+      "loss": 0.8498,
+      "step": 35560
+    },
+    {
+      "epoch": 1.4067907215883249,
+      "grad_norm": 1.3558475405289643,
+      "learning_rate": 6.396073564981658e-06,
+      "loss": 0.828,
+      "step": 35570
+    },
+    {
+      "epoch": 1.407186220807214,
+      "grad_norm": 1.5339159318347542,
+      "learning_rate": 6.393863937098481e-06,
+      "loss": 0.8322,
+      "step": 35580
+    },
+    {
+      "epoch": 1.407581720026103,
+      "grad_norm": 1.2150207588374102,
+      "learning_rate": 6.391654014018802e-06,
+      "loss": 0.8533,
+      "step": 35590
+    },
+    {
+      "epoch": 1.407977219244992,
+      "grad_norm": 1.6270728236824081,
+      "learning_rate": 6.389443796210646e-06,
+      "loss": 0.8331,
+      "step": 35600
+    },
+    {
+      "epoch": 1.408372718463881,
+      "grad_norm": 1.1381829426689705,
+      "learning_rate": 6.387233284142098e-06,
+      "loss": 0.8313,
+      "step": 35610
+    },
+    {
+      "epoch": 1.40876821768277,
+      "grad_norm": 1.18783313903544,
+      "learning_rate": 6.385022478281307e-06,
+      "loss": 0.8435,
+      "step": 35620
+    },
+    {
+      "epoch": 1.409163716901659,
+      "grad_norm": 1.130601306789989,
+      "learning_rate": 6.382811379096483e-06,
+      "loss": 0.8355,
+      "step": 35630
+    },
+    {
+      "epoch": 1.4095592161205481,
+      "grad_norm": 1.3362577266410565,
+      "learning_rate": 6.3805999870558995e-06,
+      "loss": 0.8428,
+      "step": 35640
+    },
+    {
+      "epoch": 1.4099547153394372,
+      "grad_norm": 1.3882280481564724,
+      "learning_rate": 6.378388302627891e-06,
+      "loss": 0.8199,
+      "step": 35650
+    },
+    {
+      "epoch": 1.4103502145583262,
+      "grad_norm": 1.2303027361035306,
+      "learning_rate": 6.376176326280855e-06,
+      "loss": 0.842,
+      "step": 35660
+    },
+    {
+      "epoch": 1.4107457137772152,
+      "grad_norm": 1.466240552654296,
+      "learning_rate": 6.373964058483251e-06,
+      "loss": 0.8297,
+      "step": 35670
+    },
+    {
+      "epoch": 1.4111412129961043,
+      "grad_norm": 1.126554194397866,
+      "learning_rate": 6.371751499703598e-06,
+      "loss": 0.8276,
+      "step": 35680
+    },
+    {
+      "epoch": 1.4115367122149933,
+      "grad_norm": 1.3181356263017643,
+      "learning_rate": 6.369538650410478e-06,
+      "loss": 0.8321,
+      "step": 35690
+    },
+    {
+      "epoch": 1.4119322114338824,
+      "grad_norm": 1.4080446612084359,
+      "learning_rate": 6.3673255110725356e-06,
+      "loss": 0.8373,
+      "step": 35700
+    },
+    {
+      "epoch": 1.4123277106527714,
+      "grad_norm": 1.0789373482032585,
+      "learning_rate": 6.365112082158475e-06,
+      "loss": 0.8319,
+      "step": 35710
+    },
+    {
+      "epoch": 1.4127232098716604,
+      "grad_norm": 1.3387775402171154,
+      "learning_rate": 6.362898364137064e-06,
+      "loss": 0.8595,
+      "step": 35720
+    },
+    {
+      "epoch": 1.4131187090905495,
+      "grad_norm": 1.386778635985283,
+      "learning_rate": 6.360684357477127e-06,
+      "loss": 0.8472,
+      "step": 35730
+    },
+    {
+      "epoch": 1.4135142083094385,
+      "grad_norm": 1.2753648162906723,
+      "learning_rate": 6.358470062647555e-06,
+      "loss": 0.8258,
+      "step": 35740
+    },
+    {
+      "epoch": 1.4139097075283276,
+      "grad_norm": 1.4459847948997728,
+      "learning_rate": 6.356255480117297e-06,
+      "loss": 0.8419,
+      "step": 35750
+    },
+    {
+      "epoch": 1.4143052067472166,
+      "grad_norm": 1.4685685665231003,
+      "learning_rate": 6.354040610355365e-06,
+      "loss": 0.8439,
+      "step": 35760
+    },
+    {
+      "epoch": 1.4147007059661056,
+      "grad_norm": 1.1569408781766877,
+      "learning_rate": 6.351825453830829e-06,
+      "loss": 0.8598,
+      "step": 35770
+    },
+    {
+      "epoch": 1.4150962051849947,
+      "grad_norm": 1.2892616812704085,
+      "learning_rate": 6.349610011012821e-06,
+      "loss": 0.8508,
+      "step": 35780
+    },
+    {
+      "epoch": 1.4154917044038837,
+      "grad_norm": 1.3250786277952604,
+      "learning_rate": 6.347394282370535e-06,
+      "loss": 0.845,
+      "step": 35790
+    },
+    {
+      "epoch": 1.415887203622773,
+      "grad_norm": 1.4777746649431074,
+      "learning_rate": 6.345178268373224e-06,
+      "loss": 0.8106,
+      "step": 35800
+    },
+    {
+      "epoch": 1.416282702841662,
+      "grad_norm": 1.125273496040668,
+      "learning_rate": 6.342961969490201e-06,
+      "loss": 0.8476,
+      "step": 35810
+    },
+    {
+      "epoch": 1.416678202060551,
+      "grad_norm": 1.2900326565884856,
+      "learning_rate": 6.340745386190841e-06,
+      "loss": 0.8413,
+      "step": 35820
+    },
+    {
+      "epoch": 1.41707370127944,
+      "grad_norm": 1.386187861282352,
+      "learning_rate": 6.338528518944578e-06,
+      "loss": 0.8378,
+      "step": 35830
+    },
+    {
+      "epoch": 1.4174692004983291,
+      "grad_norm": 1.270854554693669,
+      "learning_rate": 6.3363113682209066e-06,
+      "loss": 0.8253,
+      "step": 35840
+    },
+    {
+      "epoch": 1.4178646997172182,
+      "grad_norm": 1.3890910628886686,
+      "learning_rate": 6.334093934489381e-06,
+      "loss": 0.8226,
+      "step": 35850
+    },
+    {
+      "epoch": 1.4182601989361072,
+      "grad_norm": 1.2469668696035763,
+      "learning_rate": 6.331876218219618e-06,
+      "loss": 0.8273,
+      "step": 35860
+    },
+    {
+      "epoch": 1.4186556981549963,
+      "grad_norm": 1.259823769335166,
+      "learning_rate": 6.3296582198812885e-06,
+      "loss": 0.8445,
+      "step": 35870
+    },
+    {
+      "epoch": 1.4190511973738853,
+      "grad_norm": 1.1622495021388048,
+      "learning_rate": 6.3274399399441265e-06,
+      "loss": 0.862,
+      "step": 35880
+    },
+    {
+      "epoch": 1.4194466965927743,
+      "grad_norm": 1.3640435855142807,
+      "learning_rate": 6.32522137887793e-06,
+      "loss": 0.8449,
+      "step": 35890
+    },
+    {
+      "epoch": 1.4198421958116634,
+      "grad_norm": 1.2751562298144632,
+      "learning_rate": 6.323002537152547e-06,
+      "loss": 0.8459,
+      "step": 35900
+    },
+    {
+      "epoch": 1.4202376950305524,
+      "grad_norm": 1.291417434227309,
+      "learning_rate": 6.320783415237894e-06,
+      "loss": 0.8209,
+      "step": 35910
+    },
+    {
+      "epoch": 1.4206331942494415,
+      "grad_norm": 1.2525985481641728,
+      "learning_rate": 6.318564013603942e-06,
+      "loss": 0.8231,
+      "step": 35920
+    },
+    {
+      "epoch": 1.4210286934683305,
+      "grad_norm": 1.3795717676304924,
+      "learning_rate": 6.316344332720721e-06,
+      "loss": 0.846,
+      "step": 35930
+    },
+    {
+      "epoch": 1.4214241926872195,
+      "grad_norm": 1.3725137266759215,
+      "learning_rate": 6.314124373058321e-06,
+      "loss": 0.8365,
+      "step": 35940
+    },
+    {
+      "epoch": 1.4218196919061086,
+      "grad_norm": 1.1988483831546013,
+      "learning_rate": 6.311904135086894e-06,
+      "loss": 0.8445,
+      "step": 35950
+    },
+    {
+      "epoch": 1.4222151911249976,
+      "grad_norm": 1.2198387293440427,
+      "learning_rate": 6.309683619276648e-06,
+      "loss": 0.825,
+      "step": 35960
+    },
+    {
+      "epoch": 1.4226106903438867,
+      "grad_norm": 1.489283287702978,
+      "learning_rate": 6.307462826097847e-06,
+      "loss": 0.8443,
+      "step": 35970
+    },
+    {
+      "epoch": 1.4230061895627757,
+      "grad_norm": 1.2697809338910795,
+      "learning_rate": 6.30524175602082e-06,
+      "loss": 0.8466,
+      "step": 35980
+    },
+    {
+      "epoch": 1.4234016887816647,
+      "grad_norm": 1.3898897181345853,
+      "learning_rate": 6.303020409515952e-06,
+      "loss": 0.8369,
+      "step": 35990
+    },
+    {
+      "epoch": 1.4237971880005538,
+      "grad_norm": 1.2151170739728856,
+      "learning_rate": 6.300798787053684e-06,
+      "loss": 0.8345,
+      "step": 36000
+    },
+    {
+      "epoch": 1.4241926872194428,
+      "grad_norm": 1.072928600803555,
+      "learning_rate": 6.2985768891045176e-06,
+      "loss": 0.8405,
+      "step": 36010
+    },
+    {
+      "epoch": 1.4245881864383318,
+      "grad_norm": 1.3656338652832785,
+      "learning_rate": 6.2963547161390125e-06,
+      "loss": 0.8405,
+      "step": 36020
+    },
+    {
+      "epoch": 1.4249836856572209,
+      "grad_norm": 1.1683124030180605,
+      "learning_rate": 6.2941322686277905e-06,
+      "loss": 0.8585,
+      "step": 36030
+    },
+    {
+      "epoch": 1.42537918487611,
+      "grad_norm": 1.383226672256156,
+      "learning_rate": 6.291909547041524e-06,
+      "loss": 0.8517,
+      "step": 36040
+    },
+    {
+      "epoch": 1.425774684094999,
+      "grad_norm": 1.2166065982446987,
+      "learning_rate": 6.289686551850949e-06,
+      "loss": 0.8507,
+      "step": 36050
+    },
+    {
+      "epoch": 1.426170183313888,
+      "grad_norm": 1.5109477176491386,
+      "learning_rate": 6.287463283526858e-06,
+      "loss": 0.8425,
+      "step": 36060
+    },
+    {
+      "epoch": 1.426565682532777,
+      "grad_norm": 1.3533527668678242,
+      "learning_rate": 6.285239742540099e-06,
+      "loss": 0.8278,
+      "step": 36070
+    },
+    {
+      "epoch": 1.426961181751666,
+      "grad_norm": 1.1536430583476678,
+      "learning_rate": 6.283015929361583e-06,
+      "loss": 0.8631,
+      "step": 36080
+    },
+    {
+      "epoch": 1.4273566809705551,
+      "grad_norm": 1.4189550506206117,
+      "learning_rate": 6.280791844462273e-06,
+      "loss": 0.831,
+      "step": 36090
+    },
+    {
+      "epoch": 1.4277521801894442,
+      "grad_norm": 1.284261736956499,
+      "learning_rate": 6.278567488313194e-06,
+      "loss": 0.8235,
+      "step": 36100
+    },
+    {
+      "epoch": 1.4281476794083332,
+      "grad_norm": 1.0926733735522753,
+      "learning_rate": 6.276342861385426e-06,
+      "loss": 0.8407,
+      "step": 36110
+    },
+    {
+      "epoch": 1.4285431786272222,
+      "grad_norm": 1.1675223229528713,
+      "learning_rate": 6.274117964150106e-06,
+      "loss": 0.8389,
+      "step": 36120
+    },
+    {
+      "epoch": 1.4289386778461113,
+      "grad_norm": 1.257592852723291,
+      "learning_rate": 6.2718927970784285e-06,
+      "loss": 0.8375,
+      "step": 36130
+    },
+    {
+      "epoch": 1.4293341770650003,
+      "grad_norm": 1.655834860263871,
+      "learning_rate": 6.269667360641648e-06,
+      "loss": 0.8429,
+      "step": 36140
+    },
+    {
+      "epoch": 1.4297296762838894,
+      "grad_norm": 1.1256766569647347,
+      "learning_rate": 6.2674416553110715e-06,
+      "loss": 0.8389,
+      "step": 36150
+    },
+    {
+      "epoch": 1.4301251755027784,
+      "grad_norm": 1.406554157074674,
+      "learning_rate": 6.265215681558069e-06,
+      "loss": 0.8639,
+      "step": 36160
+    },
+    {
+      "epoch": 1.4305206747216674,
+      "grad_norm": 1.1943407246557791,
+      "learning_rate": 6.2629894398540594e-06,
+      "loss": 0.8287,
+      "step": 36170
+    },
+    {
+      "epoch": 1.4309161739405565,
+      "grad_norm": 1.2222021787711124,
+      "learning_rate": 6.260762930670524e-06,
+      "loss": 0.8659,
+      "step": 36180
+    },
+    {
+      "epoch": 1.4313116731594455,
+      "grad_norm": 1.270196597741624,
+      "learning_rate": 6.2585361544790005e-06,
+      "loss": 0.8282,
+      "step": 36190
+    },
+    {
+      "epoch": 1.4317071723783346,
+      "grad_norm": 1.1585839158589433,
+      "learning_rate": 6.25630911175108e-06,
+      "loss": 0.8464,
+      "step": 36200
+    },
+    {
+      "epoch": 1.4321026715972236,
+      "grad_norm": 1.3003342717934459,
+      "learning_rate": 6.254081802958414e-06,
+      "loss": 0.8554,
+      "step": 36210
+    },
+    {
+      "epoch": 1.4324981708161126,
+      "grad_norm": 1.453361295281751,
+      "learning_rate": 6.251854228572706e-06,
+      "loss": 0.8402,
+      "step": 36220
+    },
+    {
+      "epoch": 1.4328936700350017,
+      "grad_norm": 1.3499377670640456,
+      "learning_rate": 6.249626389065721e-06,
+      "loss": 0.8237,
+      "step": 36230
+    },
+    {
+      "epoch": 1.4332891692538907,
+      "grad_norm": 1.391179876041505,
+      "learning_rate": 6.2473982849092744e-06,
+      "loss": 0.8317,
+      "step": 36240
+    },
+    {
+      "epoch": 1.4336846684727798,
+      "grad_norm": 1.6524970818903704,
+      "learning_rate": 6.245169916575241e-06,
+      "loss": 0.8197,
+      "step": 36250
+    },
+    {
+      "epoch": 1.4340801676916688,
+      "grad_norm": 1.3887354337349944,
+      "learning_rate": 6.242941284535553e-06,
+      "loss": 0.8229,
+      "step": 36260
+    },
+    {
+      "epoch": 1.4344756669105578,
+      "grad_norm": 1.3847941138826125,
+      "learning_rate": 6.240712389262195e-06,
+      "loss": 0.8206,
+      "step": 36270
+    },
+    {
+      "epoch": 1.4348711661294469,
+      "grad_norm": 1.4589975520150786,
+      "learning_rate": 6.2384832312272085e-06,
+      "loss": 0.847,
+      "step": 36280
+    },
+    {
+      "epoch": 1.435266665348336,
+      "grad_norm": 1.248488246101786,
+      "learning_rate": 6.236253810902693e-06,
+      "loss": 0.8496,
+      "step": 36290
+    },
+    {
+      "epoch": 1.435662164567225,
+      "grad_norm": 1.4083224349325778,
+      "learning_rate": 6.234024128760799e-06,
+      "loss": 0.8422,
+      "step": 36300
+    },
+    {
+      "epoch": 1.436057663786114,
+      "grad_norm": 1.2919165400337989,
+      "learning_rate": 6.231794185273736e-06,
+      "loss": 0.833,
+      "step": 36310
+    },
+    {
+      "epoch": 1.436453163005003,
+      "grad_norm": 1.3020070575781513,
+      "learning_rate": 6.229563980913768e-06,
+      "loss": 0.8357,
+      "step": 36320
+    },
+    {
+      "epoch": 1.436848662223892,
+      "grad_norm": 1.2850784721037123,
+      "learning_rate": 6.2273335161532135e-06,
+      "loss": 0.8185,
+      "step": 36330
+    },
+    {
+      "epoch": 1.437244161442781,
+      "grad_norm": 1.4126580330451735,
+      "learning_rate": 6.225102791464448e-06,
+      "loss": 0.8427,
+      "step": 36340
+    },
+    {
+      "epoch": 1.4376396606616701,
+      "grad_norm": 1.2994788736487213,
+      "learning_rate": 6.2228718073199e-06,
+      "loss": 0.8447,
+      "step": 36350
+    },
+    {
+      "epoch": 1.4380351598805592,
+      "grad_norm": 1.3033189197562505,
+      "learning_rate": 6.220640564192053e-06,
+      "loss": 0.847,
+      "step": 36360
+    },
+    {
+      "epoch": 1.4384306590994482,
+      "grad_norm": 1.1641926442040482,
+      "learning_rate": 6.218409062553448e-06,
+      "loss": 0.8416,
+      "step": 36370
+    },
+    {
+      "epoch": 1.4388261583183373,
+      "grad_norm": 1.2430912178324904,
+      "learning_rate": 6.216177302876676e-06,
+      "loss": 0.8626,
+      "step": 36380
+    },
+    {
+      "epoch": 1.4392216575372263,
+      "grad_norm": 1.3809163425879087,
+      "learning_rate": 6.213945285634388e-06,
+      "loss": 0.8308,
+      "step": 36390
+    },
+    {
+      "epoch": 1.4396171567561153,
+      "grad_norm": 1.1200464988948764,
+      "learning_rate": 6.2117130112992864e-06,
+      "loss": 0.8321,
+      "step": 36400
+    },
+    {
+      "epoch": 1.4400126559750044,
+      "grad_norm": 1.493049074585919,
+      "learning_rate": 6.209480480344127e-06,
+      "loss": 0.8316,
+      "step": 36410
+    },
+    {
+      "epoch": 1.4404081551938934,
+      "grad_norm": 1.0941038659433808,
+      "learning_rate": 6.2072476932417235e-06,
+      "loss": 0.8461,
+      "step": 36420
+    },
+    {
+      "epoch": 1.4408036544127825,
+      "grad_norm": 1.1525182827238343,
+      "learning_rate": 6.205014650464943e-06,
+      "loss": 0.8512,
+      "step": 36430
+    },
+    {
+      "epoch": 1.4411991536316715,
+      "grad_norm": 1.4073071119697516,
+      "learning_rate": 6.202781352486702e-06,
+      "loss": 0.8353,
+      "step": 36440
+    },
+    {
+      "epoch": 1.4415946528505605,
+      "grad_norm": 1.4640427658019493,
+      "learning_rate": 6.200547799779977e-06,
+      "loss": 0.859,
+      "step": 36450
+    },
+    {
+      "epoch": 1.4419901520694496,
+      "grad_norm": 1.1224909034560724,
+      "learning_rate": 6.198313992817796e-06,
+      "loss": 0.8536,
+      "step": 36460
+    },
+    {
+      "epoch": 1.4423856512883386,
+      "grad_norm": 1.2596345934323583,
+      "learning_rate": 6.1960799320732416e-06,
+      "loss": 0.8172,
+      "step": 36470
+    },
+    {
+      "epoch": 1.4427811505072277,
+      "grad_norm": 1.2262890682973089,
+      "learning_rate": 6.1938456180194496e-06,
+      "loss": 0.8738,
+      "step": 36480
+    },
+    {
+      "epoch": 1.4431766497261167,
+      "grad_norm": 1.3084410516890757,
+      "learning_rate": 6.191611051129608e-06,
+      "loss": 0.821,
+      "step": 36490
+    },
+    {
+      "epoch": 1.4435721489450057,
+      "grad_norm": 1.1307231105980697,
+      "learning_rate": 6.1893762318769614e-06,
+      "loss": 0.823,
+      "step": 36500
+    },
+    {
+      "epoch": 1.4439676481638948,
+      "grad_norm": 1.3384245037477136,
+      "learning_rate": 6.187141160734804e-06,
+      "loss": 0.8252,
+      "step": 36510
+    },
+    {
+      "epoch": 1.4443631473827838,
+      "grad_norm": 1.0311457217061848,
+      "learning_rate": 6.184905838176488e-06,
+      "loss": 0.8374,
+      "step": 36520
+    },
+    {
+      "epoch": 1.4447586466016729,
+      "grad_norm": 1.2301340613615994,
+      "learning_rate": 6.182670264675415e-06,
+      "loss": 0.8385,
+      "step": 36530
+    },
+    {
+      "epoch": 1.445154145820562,
+      "grad_norm": 1.3233738786300002,
+      "learning_rate": 6.180434440705043e-06,
+      "loss": 0.8366,
+      "step": 36540
+    },
+    {
+      "epoch": 1.445549645039451,
+      "grad_norm": 1.4269324920365625,
+      "learning_rate": 6.178198366738879e-06,
+      "loss": 0.8388,
+      "step": 36550
+    },
+    {
+      "epoch": 1.44594514425834,
+      "grad_norm": 1.083358497306535,
+      "learning_rate": 6.175962043250487e-06,
+      "loss": 0.8248,
+      "step": 36560
+    },
+    {
+      "epoch": 1.446340643477229,
+      "grad_norm": 1.2333263118621456,
+      "learning_rate": 6.17372547071348e-06,
+      "loss": 0.8408,
+      "step": 36570
+    },
+    {
+      "epoch": 1.446736142696118,
+      "grad_norm": 1.4728295180519482,
+      "learning_rate": 6.1714886496015254e-06,
+      "loss": 0.8318,
+      "step": 36580
+    },
+    {
+      "epoch": 1.447131641915007,
+      "grad_norm": 1.3062771841053484,
+      "learning_rate": 6.1692515803883465e-06,
+      "loss": 0.8378,
+      "step": 36590
+    },
+    {
+      "epoch": 1.4475271411338961,
+      "grad_norm": 1.4450326937321798,
+      "learning_rate": 6.167014263547716e-06,
+      "loss": 0.8217,
+      "step": 36600
+    },
+    {
+      "epoch": 1.4479226403527852,
+      "grad_norm": 1.3443426761961468,
+      "learning_rate": 6.1647766995534565e-06,
+      "loss": 0.844,
+      "step": 36610
+    },
+    {
+      "epoch": 1.4483181395716742,
+      "grad_norm": 1.219261278239387,
+      "learning_rate": 6.162538888879448e-06,
+      "loss": 0.8169,
+      "step": 36620
+    },
+    {
+      "epoch": 1.4487136387905633,
+      "grad_norm": 1.4063018306499722,
+      "learning_rate": 6.1603008319996194e-06,
+      "loss": 0.8392,
+      "step": 36630
+    },
+    {
+      "epoch": 1.4491091380094525,
+      "grad_norm": 1.36000701869999,
+      "learning_rate": 6.158062529387952e-06,
+      "loss": 0.8119,
+      "step": 36640
+    },
+    {
+      "epoch": 1.4495046372283416,
+      "grad_norm": 1.2319344010193962,
+      "learning_rate": 6.1558239815184825e-06,
+      "loss": 0.8055,
+      "step": 36650
+    },
+    {
+      "epoch": 1.4499001364472306,
+      "grad_norm": 1.2903105245619124,
+      "learning_rate": 6.1535851888652966e-06,
+      "loss": 0.8218,
+      "step": 36660
+    },
+    {
+      "epoch": 1.4502956356661196,
+      "grad_norm": 1.3412628803988895,
+      "learning_rate": 6.151346151902529e-06,
+      "loss": 0.8244,
+      "step": 36670
+    },
+    {
+      "epoch": 1.4506911348850087,
+      "grad_norm": 1.4157507052119405,
+      "learning_rate": 6.149106871104371e-06,
+      "loss": 0.8246,
+      "step": 36680
+    },
+    {
+      "epoch": 1.4510866341038977,
+      "grad_norm": 1.2650881608747293,
+      "learning_rate": 6.1468673469450655e-06,
+      "loss": 0.8474,
+      "step": 36690
+    },
+    {
+      "epoch": 1.4514821333227867,
+      "grad_norm": 1.4125684288823825,
+      "learning_rate": 6.144627579898904e-06,
+      "loss": 0.8311,
+      "step": 36700
+    },
+    {
+      "epoch": 1.4518776325416758,
+      "grad_norm": 1.516154326386266,
+      "learning_rate": 6.142387570440231e-06,
+      "loss": 0.8156,
+      "step": 36710
+    },
+    {
+      "epoch": 1.4522731317605648,
+      "grad_norm": 1.19680621479771,
+      "learning_rate": 6.140147319043444e-06,
+      "loss": 0.8556,
+      "step": 36720
+    },
+    {
+      "epoch": 1.4526686309794539,
+      "grad_norm": 1.387951408188166,
+      "learning_rate": 6.1379068261829855e-06,
+      "loss": 0.8196,
+      "step": 36730
+    },
+    {
+      "epoch": 1.453064130198343,
+      "grad_norm": 1.5084295505917156,
+      "learning_rate": 6.135666092333356e-06,
+      "loss": 0.8352,
+      "step": 36740
+    },
+    {
+      "epoch": 1.453459629417232,
+      "grad_norm": 1.3980000408581248,
+      "learning_rate": 6.133425117969105e-06,
+      "loss": 0.8103,
+      "step": 36750
+    },
+    {
+      "epoch": 1.453855128636121,
+      "grad_norm": 1.4649643342356247,
+      "learning_rate": 6.131183903564833e-06,
+      "loss": 0.8393,
+      "step": 36760
+    },
+    {
+      "epoch": 1.45425062785501,
+      "grad_norm": 1.283565079727838,
+      "learning_rate": 6.12894244959519e-06,
+      "loss": 0.8378,
+      "step": 36770
+    },
+    {
+      "epoch": 1.454646127073899,
+      "grad_norm": 1.1290183544167811,
+      "learning_rate": 6.126700756534877e-06,
+      "loss": 0.8406,
+      "step": 36780
+    },
+    {
+      "epoch": 1.455041626292788,
+      "grad_norm": 1.1936587206096785,
+      "learning_rate": 6.124458824858647e-06,
+      "loss": 0.8159,
+      "step": 36790
+    },
+    {
+      "epoch": 1.4554371255116771,
+      "grad_norm": 1.5963402188574836,
+      "learning_rate": 6.122216655041301e-06,
+      "loss": 0.8476,
+      "step": 36800
+    },
+    {
+      "epoch": 1.4558326247305662,
+      "grad_norm": 1.3528111394379871,
+      "learning_rate": 6.119974247557694e-06,
+      "loss": 0.8291,
+      "step": 36810
+    },
+    {
+      "epoch": 1.4562281239494552,
+      "grad_norm": 1.3036486327570647,
+      "learning_rate": 6.117731602882729e-06,
+      "loss": 0.821,
+      "step": 36820
+    },
+    {
+      "epoch": 1.4566236231683443,
+      "grad_norm": 1.1921543304832969,
+      "learning_rate": 6.115488721491361e-06,
+      "loss": 0.8385,
+      "step": 36830
+    },
+    {
+      "epoch": 1.4570191223872333,
+      "grad_norm": 1.421552177735124,
+      "learning_rate": 6.113245603858592e-06,
+      "loss": 0.8188,
+      "step": 36840
+    },
+    {
+      "epoch": 1.4574146216061223,
+      "grad_norm": 1.183513087872989,
+      "learning_rate": 6.1110022504594755e-06,
+      "loss": 0.8481,
+      "step": 36850
+    },
+    {
+      "epoch": 1.4578101208250114,
+      "grad_norm": 1.3333946298831874,
+      "learning_rate": 6.108758661769117e-06,
+      "loss": 0.83,
+      "step": 36860
+    },
+    {
+      "epoch": 1.4582056200439004,
+      "grad_norm": 1.4122821665540046,
+      "learning_rate": 6.10651483826267e-06,
+      "loss": 0.8096,
+      "step": 36870
+    },
+    {
+      "epoch": 1.4586011192627895,
+      "grad_norm": 1.131854316531454,
+      "learning_rate": 6.1042707804153354e-06,
+      "loss": 0.8241,
+      "step": 36880
+    },
+    {
+      "epoch": 1.4589966184816785,
+      "grad_norm": 1.4914839187652236,
+      "learning_rate": 6.10202648870237e-06,
+      "loss": 0.8208,
+      "step": 36890
+    },
+    {
+      "epoch": 1.4593921177005675,
+      "grad_norm": 1.3434019975159093,
+      "learning_rate": 6.099781963599074e-06,
+      "loss": 0.7995,
+      "step": 36900
+    },
+    {
+      "epoch": 1.4597876169194566,
+      "grad_norm": 1.5275499973186812,
+      "learning_rate": 6.097537205580799e-06,
+      "loss": 0.8319,
+      "step": 36910
+    },
+    {
+      "epoch": 1.4601831161383456,
+      "grad_norm": 1.2879733736229924,
+      "learning_rate": 6.095292215122948e-06,
+      "loss": 0.8359,
+      "step": 36920
+    },
+    {
+      "epoch": 1.4605786153572347,
+      "grad_norm": 1.182222761910963,
+      "learning_rate": 6.093046992700969e-06,
+      "loss": 0.8134,
+      "step": 36930
+    },
+    {
+      "epoch": 1.4609741145761237,
+      "grad_norm": 1.4132263899337456,
+      "learning_rate": 6.090801538790364e-06,
+      "loss": 0.8306,
+      "step": 36940
+    },
+    {
+      "epoch": 1.4613696137950127,
+      "grad_norm": 1.229301065063994,
+      "learning_rate": 6.088555853866681e-06,
+      "loss": 0.8213,
+      "step": 36950
+    },
+    {
+      "epoch": 1.4617651130139018,
+      "grad_norm": 1.1830134562667467,
+      "learning_rate": 6.086309938405517e-06,
+      "loss": 0.8205,
+      "step": 36960
+    },
+    {
+      "epoch": 1.4621606122327908,
+      "grad_norm": 1.4243336232067556,
+      "learning_rate": 6.084063792882521e-06,
+      "loss": 0.8258,
+      "step": 36970
+    },
+    {
+      "epoch": 1.4625561114516799,
+      "grad_norm": 1.2494875825490133,
+      "learning_rate": 6.081817417773385e-06,
+      "loss": 0.8442,
+      "step": 36980
+    },
+    {
+      "epoch": 1.462951610670569,
+      "grad_norm": 1.2359816079357258,
+      "learning_rate": 6.079570813553852e-06,
+      "loss": 0.8278,
+      "step": 36990
+    },
+    {
+      "epoch": 1.463347109889458,
+      "grad_norm": 1.490059423288807,
+      "learning_rate": 6.077323980699717e-06,
+      "loss": 0.8276,
+      "step": 37000
+    },
+    {
+      "epoch": 1.463742609108347,
+      "grad_norm": 1.1769040254613954,
+      "learning_rate": 6.075076919686821e-06,
+      "loss": 0.8385,
+      "step": 37010
+    },
+    {
+      "epoch": 1.464138108327236,
+      "grad_norm": 1.22391141428612,
+      "learning_rate": 6.07282963099105e-06,
+      "loss": 0.8415,
+      "step": 37020
+    },
+    {
+      "epoch": 1.464533607546125,
+      "grad_norm": 1.2994719463292181,
+      "learning_rate": 6.070582115088346e-06,
+      "loss": 0.8313,
+      "step": 37030
+    },
+    {
+      "epoch": 1.464929106765014,
+      "grad_norm": 1.341785209887261,
+      "learning_rate": 6.0683343724546896e-06,
+      "loss": 0.8283,
+      "step": 37040
+    },
+    {
+      "epoch": 1.4653246059839031,
+      "grad_norm": 1.1625691324888021,
+      "learning_rate": 6.066086403566116e-06,
+      "loss": 0.8064,
+      "step": 37050
+    },
+    {
+      "epoch": 1.4657201052027922,
+      "grad_norm": 1.36481340690133,
+      "learning_rate": 6.063838208898706e-06,
+      "loss": 0.8031,
+      "step": 37060
+    },
+    {
+      "epoch": 1.4661156044216812,
+      "grad_norm": 1.4192336644419723,
+      "learning_rate": 6.06158978892859e-06,
+      "loss": 0.8467,
+      "step": 37070
+    },
+    {
+      "epoch": 1.4665111036405702,
+      "grad_norm": 1.186430424563176,
+      "learning_rate": 6.059341144131945e-06,
+      "loss": 0.8396,
+      "step": 37080
+    },
+    {
+      "epoch": 1.4669066028594593,
+      "grad_norm": 1.2446238173646171,
+      "learning_rate": 6.057092274984992e-06,
+      "loss": 0.8187,
+      "step": 37090
+    },
+    {
+      "epoch": 1.4673021020783483,
+      "grad_norm": 1.3571676136191617,
+      "learning_rate": 6.054843181964009e-06,
+      "loss": 0.8398,
+      "step": 37100
+    },
+    {
+      "epoch": 1.4676976012972374,
+      "grad_norm": 1.5060615168232727,
+      "learning_rate": 6.052593865545308e-06,
+      "loss": 0.8517,
+      "step": 37110
+    },
+    {
+      "epoch": 1.4680931005161264,
+      "grad_norm": 1.0813971615890645,
+      "learning_rate": 6.050344326205262e-06,
+      "loss": 0.8287,
+      "step": 37120
+    },
+    {
+      "epoch": 1.4684885997350154,
+      "grad_norm": 1.1773988089100857,
+      "learning_rate": 6.048094564420282e-06,
+      "loss": 0.8245,
+      "step": 37130
+    },
+    {
+      "epoch": 1.4688840989539047,
+      "grad_norm": 1.423950483262472,
+      "learning_rate": 6.0458445806668285e-06,
+      "loss": 0.8006,
+      "step": 37140
+    },
+    {
+      "epoch": 1.4692795981727937,
+      "grad_norm": 1.1024504405308695,
+      "learning_rate": 6.043594375421411e-06,
+      "loss": 0.8469,
+      "step": 37150
+    },
+    {
+      "epoch": 1.4696750973916828,
+      "grad_norm": 1.3152587216061993,
+      "learning_rate": 6.041343949160584e-06,
+      "loss": 0.8365,
+      "step": 37160
+    },
+    {
+      "epoch": 1.4700705966105718,
+      "grad_norm": 1.462331091482157,
+      "learning_rate": 6.039093302360949e-06,
+      "loss": 0.8293,
+      "step": 37170
+    },
+    {
+      "epoch": 1.4704660958294609,
+      "grad_norm": 1.424162052591826,
+      "learning_rate": 6.036842435499154e-06,
+      "loss": 0.8276,
+      "step": 37180
+    },
+    {
+      "epoch": 1.47086159504835,
+      "grad_norm": 1.0954194165113156,
+      "learning_rate": 6.034591349051895e-06,
+      "loss": 0.7963,
+      "step": 37190
+    },
+    {
+      "epoch": 1.471257094267239,
+      "grad_norm": 1.20751470439838,
+      "learning_rate": 6.032340043495912e-06,
+      "loss": 0.8328,
+      "step": 37200
+    },
+    {
+      "epoch": 1.471652593486128,
+      "grad_norm": 1.4879899591354708,
+      "learning_rate": 6.030088519307996e-06,
+      "loss": 0.8196,
+      "step": 37210
+    },
+    {
+      "epoch": 1.472048092705017,
+      "grad_norm": 1.2449159188305148,
+      "learning_rate": 6.0278367769649794e-06,
+      "loss": 0.8467,
+      "step": 37220
+    },
+    {
+      "epoch": 1.472443591923906,
+      "grad_norm": 1.437138029614525,
+      "learning_rate": 6.02558481694374e-06,
+      "loss": 0.8196,
+      "step": 37230
+    },
+    {
+      "epoch": 1.472839091142795,
+      "grad_norm": 1.4491916110053167,
+      "learning_rate": 6.023332639721209e-06,
+      "loss": 0.8327,
+      "step": 37240
+    },
+    {
+      "epoch": 1.4732345903616841,
+      "grad_norm": 1.1596629612558578,
+      "learning_rate": 6.021080245774356e-06,
+      "loss": 0.8317,
+      "step": 37250
+    },
+    {
+      "epoch": 1.4736300895805732,
+      "grad_norm": 1.2298806725917324,
+      "learning_rate": 6.0188276355802e-06,
+      "loss": 0.8263,
+      "step": 37260
+    },
+    {
+      "epoch": 1.4740255887994622,
+      "grad_norm": 1.2669568040670434,
+      "learning_rate": 6.016574809615807e-06,
+      "loss": 0.8364,
+      "step": 37270
+    },
+    {
+      "epoch": 1.4744210880183513,
+      "grad_norm": 1.583310653351235,
+      "learning_rate": 6.014321768358284e-06,
+      "loss": 0.8376,
+      "step": 37280
+    },
+    {
+      "epoch": 1.4748165872372403,
+      "grad_norm": 1.183803876079209,
+      "learning_rate": 6.0120685122847874e-06,
+      "loss": 0.8211,
+      "step": 37290
+    },
+    {
+      "epoch": 1.4752120864561293,
+      "grad_norm": 1.3282787364058297,
+      "learning_rate": 6.009815041872521e-06,
+      "loss": 0.8275,
+      "step": 37300
+    },
+    {
+      "epoch": 1.4756075856750184,
+      "grad_norm": 1.0757896601858954,
+      "learning_rate": 6.007561357598728e-06,
+      "loss": 0.8158,
+      "step": 37310
+    },
+    {
+      "epoch": 1.4760030848939074,
+      "grad_norm": 1.2939334557774662,
+      "learning_rate": 6.005307459940701e-06,
+      "loss": 0.8133,
+      "step": 37320
+    },
+    {
+      "epoch": 1.4763985841127965,
+      "grad_norm": 1.3771816047501217,
+      "learning_rate": 6.003053349375778e-06,
+      "loss": 0.8434,
+      "step": 37330
+    },
+    {
+      "epoch": 1.4767940833316855,
+      "grad_norm": 1.4588897531342568,
+      "learning_rate": 6.00079902638134e-06,
+      "loss": 0.8416,
+      "step": 37340
+    },
+    {
+      "epoch": 1.4771895825505745,
+      "grad_norm": 1.2612260439292184,
+      "learning_rate": 5.998544491434813e-06,
+      "loss": 0.838,
+      "step": 37350
+    },
+    {
+      "epoch": 1.4775850817694636,
+      "grad_norm": 1.2659100294037402,
+      "learning_rate": 5.99628974501367e-06,
+      "loss": 0.8596,
+      "step": 37360
+    },
+    {
+      "epoch": 1.4779805809883526,
+      "grad_norm": 1.259165546633377,
+      "learning_rate": 5.994034787595428e-06,
+      "loss": 0.8336,
+      "step": 37370
+    },
+    {
+      "epoch": 1.4783760802072416,
+      "grad_norm": 1.5323825732780485,
+      "learning_rate": 5.991779619657648e-06,
+      "loss": 0.7855,
+      "step": 37380
+    },
+    {
+      "epoch": 1.4787715794261307,
+      "grad_norm": 1.3628968111836337,
+      "learning_rate": 5.989524241677937e-06,
+      "loss": 0.846,
+      "step": 37390
+    },
+    {
+      "epoch": 1.4791670786450197,
+      "grad_norm": 1.1787882684609086,
+      "learning_rate": 5.987268654133943e-06,
+      "loss": 0.8469,
+      "step": 37400
+    },
+    {
+      "epoch": 1.4795625778639088,
+      "grad_norm": 1.2524777227261528,
+      "learning_rate": 5.985012857503363e-06,
+      "loss": 0.807,
+      "step": 37410
+    },
+    {
+      "epoch": 1.4799580770827978,
+      "grad_norm": 1.2042231250137807,
+      "learning_rate": 5.982756852263933e-06,
+      "loss": 0.8332,
+      "step": 37420
+    },
+    {
+      "epoch": 1.4803535763016868,
+      "grad_norm": 1.2762052873356005,
+      "learning_rate": 5.980500638893441e-06,
+      "loss": 0.7956,
+      "step": 37430
+    },
+    {
+      "epoch": 1.4807490755205759,
+      "grad_norm": 1.3528425128326769,
+      "learning_rate": 5.978244217869711e-06,
+      "loss": 0.8336,
+      "step": 37440
+    },
+    {
+      "epoch": 1.481144574739465,
+      "grad_norm": 1.3367232606982922,
+      "learning_rate": 5.9759875896706144e-06,
+      "loss": 0.8522,
+      "step": 37450
+    },
+    {
+      "epoch": 1.481540073958354,
+      "grad_norm": 1.1979888871980506,
+      "learning_rate": 5.973730754774068e-06,
+      "loss": 0.8423,
+      "step": 37460
+    },
+    {
+      "epoch": 1.481935573177243,
+      "grad_norm": 1.2021681961300046,
+      "learning_rate": 5.97147371365803e-06,
+      "loss": 0.8368,
+      "step": 37470
+    },
+    {
+      "epoch": 1.482331072396132,
+      "grad_norm": 1.4639923441014542,
+      "learning_rate": 5.969216466800503e-06,
+      "loss": 0.854,
+      "step": 37480
+    },
+    {
+      "epoch": 1.482726571615021,
+      "grad_norm": 1.3036101092964403,
+      "learning_rate": 5.966959014679532e-06,
+      "loss": 0.8394,
+      "step": 37490
+    },
+    {
+      "epoch": 1.4831220708339101,
+      "grad_norm": 1.1478280757190291,
+      "learning_rate": 5.9647013577732085e-06,
+      "loss": 0.8338,
+      "step": 37500
+    },
+    {
+      "epoch": 1.4835175700527992,
+      "grad_norm": 1.3669648344395982,
+      "learning_rate": 5.962443496559664e-06,
+      "loss": 0.8299,
+      "step": 37510
+    },
+    {
+      "epoch": 1.4839130692716882,
+      "grad_norm": 1.4891719958825982,
+      "learning_rate": 5.960185431517078e-06,
+      "loss": 0.8272,
+      "step": 37520
+    },
+    {
+      "epoch": 1.4843085684905772,
+      "grad_norm": 1.2579270922428791,
+      "learning_rate": 5.957927163123666e-06,
+      "loss": 0.8445,
+      "step": 37530
+    },
+    {
+      "epoch": 1.4847040677094663,
+      "grad_norm": 1.4280221776954765,
+      "learning_rate": 5.9556686918576925e-06,
+      "loss": 0.8271,
+      "step": 37540
+    },
+    {
+      "epoch": 1.4850995669283553,
+      "grad_norm": 1.1818080329358007,
+      "learning_rate": 5.953410018197463e-06,
+      "loss": 0.8366,
+      "step": 37550
+    },
+    {
+      "epoch": 1.4854950661472444,
+      "grad_norm": 1.1059097849343014,
+      "learning_rate": 5.951151142621326e-06,
+      "loss": 0.8188,
+      "step": 37560
+    },
+    {
+      "epoch": 1.4858905653661334,
+      "grad_norm": 1.345852113770174,
+      "learning_rate": 5.948892065607671e-06,
+      "loss": 0.8114,
+      "step": 37570
+    },
+    {
+      "epoch": 1.4862860645850224,
+      "grad_norm": 1.2958864841619009,
+      "learning_rate": 5.946632787634935e-06,
+      "loss": 0.7949,
+      "step": 37580
+    },
+    {
+      "epoch": 1.4866815638039115,
+      "grad_norm": 1.2859607747850244,
+      "learning_rate": 5.94437330918159e-06,
+      "loss": 0.8301,
+      "step": 37590
+    },
+    {
+      "epoch": 1.4870770630228005,
+      "grad_norm": 1.3126549447166243,
+      "learning_rate": 5.942113630726159e-06,
+      "loss": 0.8546,
+      "step": 37600
+    },
+    {
+      "epoch": 1.4874725622416896,
+      "grad_norm": 1.4237549890014323,
+      "learning_rate": 5.939853752747201e-06,
+      "loss": 0.8307,
+      "step": 37610
+    },
+    {
+      "epoch": 1.4878680614605786,
+      "grad_norm": 1.2107557720978932,
+      "learning_rate": 5.93759367572332e-06,
+      "loss": 0.8443,
+      "step": 37620
+    },
+    {
+      "epoch": 1.4882635606794676,
+      "grad_norm": 1.2695176508678736,
+      "learning_rate": 5.935333400133161e-06,
+      "loss": 0.8311,
+      "step": 37630
+    },
+    {
+      "epoch": 1.4886590598983567,
+      "grad_norm": 1.3945072147387503,
+      "learning_rate": 5.9330729264554134e-06,
+      "loss": 0.827,
+      "step": 37640
+    },
+    {
+      "epoch": 1.4890545591172457,
+      "grad_norm": 1.2194459760584162,
+      "learning_rate": 5.930812255168805e-06,
+      "loss": 0.8389,
+      "step": 37650
+    },
+    {
+      "epoch": 1.4894500583361348,
+      "grad_norm": 1.1869027175102502,
+      "learning_rate": 5.928551386752108e-06,
+      "loss": 0.8567,
+      "step": 37660
+    },
+    {
+      "epoch": 1.4898455575550238,
+      "grad_norm": 1.2630589516417647,
+      "learning_rate": 5.9262903216841335e-06,
+      "loss": 0.8087,
+      "step": 37670
+    },
+    {
+      "epoch": 1.4902410567739128,
+      "grad_norm": 1.1144199673584247,
+      "learning_rate": 5.92402906044374e-06,
+      "loss": 0.8266,
+      "step": 37680
+    },
+    {
+      "epoch": 1.4906365559928019,
+      "grad_norm": 1.2847134248761494,
+      "learning_rate": 5.921767603509822e-06,
+      "loss": 0.833,
+      "step": 37690
+    },
+    {
+      "epoch": 1.491032055211691,
+      "grad_norm": 1.701931469947877,
+      "learning_rate": 5.919505951361317e-06,
+      "loss": 0.8221,
+      "step": 37700
+    },
+    {
+      "epoch": 1.49142755443058,
+      "grad_norm": 1.1979917350570792,
+      "learning_rate": 5.917244104477205e-06,
+      "loss": 0.8411,
+      "step": 37710
+    },
+    {
+      "epoch": 1.491823053649469,
+      "grad_norm": 1.2665887906873012,
+      "learning_rate": 5.914982063336507e-06,
+      "loss": 0.8031,
+      "step": 37720
+    },
+    {
+      "epoch": 1.492218552868358,
+      "grad_norm": 1.2559536434755125,
+      "learning_rate": 5.9127198284182815e-06,
+      "loss": 0.8494,
+      "step": 37730
+    },
+    {
+      "epoch": 1.492614052087247,
+      "grad_norm": 1.1510544961983886,
+      "learning_rate": 5.9104574002016345e-06,
+      "loss": 0.8332,
+      "step": 37740
+    },
+    {
+      "epoch": 1.493009551306136,
+      "grad_norm": 1.156594506877186,
+      "learning_rate": 5.908194779165709e-06,
+      "loss": 0.8304,
+      "step": 37750
+    },
+    {
+      "epoch": 1.4934050505250251,
+      "grad_norm": 1.38808820957291,
+      "learning_rate": 5.905931965789688e-06,
+      "loss": 0.8192,
+      "step": 37760
+    },
+    {
+      "epoch": 1.4938005497439142,
+      "grad_norm": 1.3605551768171145,
+      "learning_rate": 5.903668960552797e-06,
+      "loss": 0.848,
+      "step": 37770
+    },
+    {
+      "epoch": 1.4941960489628032,
+      "grad_norm": 1.302042985659683,
+      "learning_rate": 5.9014057639343025e-06,
+      "loss": 0.8347,
+      "step": 37780
+    },
+    {
+      "epoch": 1.4945915481816923,
+      "grad_norm": 1.0357546967364923,
+      "learning_rate": 5.89914237641351e-06,
+      "loss": 0.8469,
+      "step": 37790
+    },
+    {
+      "epoch": 1.4949870474005813,
+      "grad_norm": 1.6929498588586158,
+      "learning_rate": 5.896878798469766e-06,
+      "loss": 0.8396,
+      "step": 37800
+    },
+    {
+      "epoch": 1.4953825466194703,
+      "grad_norm": 1.2867864716344224,
+      "learning_rate": 5.894615030582458e-06,
+      "loss": 0.813,
+      "step": 37810
+    },
+    {
+      "epoch": 1.4957780458383594,
+      "grad_norm": 1.4546439129944488,
+      "learning_rate": 5.892351073231015e-06,
+      "loss": 0.7983,
+      "step": 37820
+    },
+    {
+      "epoch": 1.4961735450572484,
+      "grad_norm": 1.2433124092412393,
+      "learning_rate": 5.8900869268949e-06,
+      "loss": 0.8437,
+      "step": 37830
+    },
+    {
+      "epoch": 1.4965690442761375,
+      "grad_norm": 1.1100349419437714,
+      "learning_rate": 5.887822592053624e-06,
+      "loss": 0.8461,
+      "step": 37840
+    },
+    {
+      "epoch": 1.4969645434950265,
+      "grad_norm": 1.2182905934411132,
+      "learning_rate": 5.885558069186735e-06,
+      "loss": 0.8467,
+      "step": 37850
+    },
+    {
+      "epoch": 1.4973600427139155,
+      "grad_norm": 1.5044139421435199,
+      "learning_rate": 5.883293358773816e-06,
+      "loss": 0.8312,
+      "step": 37860
+    },
+    {
+      "epoch": 1.4977555419328046,
+      "grad_norm": 1.1134418938793038,
+      "learning_rate": 5.881028461294497e-06,
+      "loss": 0.824,
+      "step": 37870
+    },
+    {
+      "epoch": 1.4981510411516936,
+      "grad_norm": 1.3832119834690153,
+      "learning_rate": 5.878763377228445e-06,
+      "loss": 0.8512,
+      "step": 37880
+    },
+    {
+      "epoch": 1.4985465403705827,
+      "grad_norm": 1.3427739153613933,
+      "learning_rate": 5.876498107055364e-06,
+      "loss": 0.8188,
+      "step": 37890
+    },
+    {
+      "epoch": 1.4989420395894717,
+      "grad_norm": 1.2139155355396918,
+      "learning_rate": 5.874232651255e-06,
+      "loss": 0.8164,
+      "step": 37900
+    },
+    {
+      "epoch": 1.4993375388083607,
+      "grad_norm": 1.4188225361176172,
+      "learning_rate": 5.871967010307138e-06,
+      "loss": 0.843,
+      "step": 37910
+    },
+    {
+      "epoch": 1.4997330380272498,
+      "grad_norm": 1.383659359057121,
+      "learning_rate": 5.8697011846916015e-06,
+      "loss": 0.8425,
+      "step": 37920
+    },
+    {
+      "epoch": 1.5001285372461388,
+      "grad_norm": 1.5233622271899518,
+      "learning_rate": 5.867435174888255e-06,
+      "loss": 0.8268,
+      "step": 37930
+    },
+    {
+      "epoch": 1.5005240364650279,
+      "grad_norm": 1.347771444737541,
+      "learning_rate": 5.8651689813769985e-06,
+      "loss": 0.8022,
+      "step": 37940
+    },
+    {
+      "epoch": 1.500919535683917,
+      "grad_norm": 1.3324271983277738,
+      "learning_rate": 5.862902604637776e-06,
+      "loss": 0.7998,
+      "step": 37950
+    },
+    {
+      "epoch": 1.501315034902806,
+      "grad_norm": 1.465709640988423,
+      "learning_rate": 5.860636045150564e-06,
+      "loss": 0.8259,
+      "step": 37960
+    },
+    {
+      "epoch": 1.501710534121695,
+      "grad_norm": 1.2580932460732994,
+      "learning_rate": 5.858369303395381e-06,
+      "loss": 0.8217,
+      "step": 37970
+    },
+    {
+      "epoch": 1.502106033340584,
+      "grad_norm": 1.366923687056205,
+      "learning_rate": 5.856102379852286e-06,
+      "loss": 0.8249,
+      "step": 37980
+    },
+    {
+      "epoch": 1.502501532559473,
+      "grad_norm": 1.1264984940009826,
+      "learning_rate": 5.853835275001376e-06,
+      "loss": 0.8122,
+      "step": 37990
+    },
+    {
+      "epoch": 1.502897031778362,
+      "grad_norm": 1.6154287876343538,
+      "learning_rate": 5.85156798932278e-06,
+      "loss": 0.821,
+      "step": 38000
+    },
+    {
+      "epoch": 1.5032925309972511,
+      "grad_norm": 1.248839585307766,
+      "learning_rate": 5.8493005232966745e-06,
+      "loss": 0.8242,
+      "step": 38010
+    },
+    {
+      "epoch": 1.5036880302161402,
+      "grad_norm": 1.3317870582014166,
+      "learning_rate": 5.847032877403269e-06,
+      "loss": 0.8282,
+      "step": 38020
+    },
+    {
+      "epoch": 1.5040835294350292,
+      "grad_norm": 1.2429661026045349,
+      "learning_rate": 5.844765052122811e-06,
+      "loss": 0.8127,
+      "step": 38030
+    },
+    {
+      "epoch": 1.5044790286539182,
+      "grad_norm": 1.3860656503761577,
+      "learning_rate": 5.842497047935587e-06,
+      "loss": 0.8107,
+      "step": 38040
+    },
+    {
+      "epoch": 1.5048745278728073,
+      "grad_norm": 1.3758474824046636,
+      "learning_rate": 5.840228865321923e-06,
+      "loss": 0.8025,
+      "step": 38050
+    },
+    {
+      "epoch": 1.5052700270916963,
+      "grad_norm": 1.4312116041976664,
+      "learning_rate": 5.837960504762179e-06,
+      "loss": 0.8288,
+      "step": 38060
+    },
+    {
+      "epoch": 1.5056655263105856,
+      "grad_norm": 1.1801788536325402,
+      "learning_rate": 5.835691966736758e-06,
+      "loss": 0.8433,
+      "step": 38070
+    },
+    {
+      "epoch": 1.5060610255294746,
+      "grad_norm": 1.3758792360793468,
+      "learning_rate": 5.833423251726095e-06,
+      "loss": 0.806,
+      "step": 38080
+    },
+    {
+      "epoch": 1.5064565247483637,
+      "grad_norm": 1.2949389581126536,
+      "learning_rate": 5.8311543602106645e-06,
+      "loss": 0.8246,
+      "step": 38090
+    },
+    {
+      "epoch": 1.5068520239672527,
+      "grad_norm": 1.2828666889168154,
+      "learning_rate": 5.82888529267098e-06,
+      "loss": 0.8297,
+      "step": 38100
+    },
+    {
+      "epoch": 1.5072475231861417,
+      "grad_norm": 1.3342565393902486,
+      "learning_rate": 5.826616049587592e-06,
+      "loss": 0.8145,
+      "step": 38110
+    },
+    {
+      "epoch": 1.5076430224050308,
+      "grad_norm": 1.3157019500235356,
+      "learning_rate": 5.824346631441087e-06,
+      "loss": 0.816,
+      "step": 38120
+    },
+    {
+      "epoch": 1.5080385216239198,
+      "grad_norm": 1.655739436905969,
+      "learning_rate": 5.822077038712088e-06,
+      "loss": 0.8184,
+      "step": 38130
+    },
+    {
+      "epoch": 1.5084340208428089,
+      "grad_norm": 1.4923378536205325,
+      "learning_rate": 5.819807271881256e-06,
+      "loss": 0.8384,
+      "step": 38140
+    },
+    {
+      "epoch": 1.508829520061698,
+      "grad_norm": 1.2598760898229802,
+      "learning_rate": 5.817537331429288e-06,
+      "loss": 0.839,
+      "step": 38150
+    },
+    {
+      "epoch": 1.509225019280587,
+      "grad_norm": 1.2044143246664853,
+      "learning_rate": 5.815267217836921e-06,
+      "loss": 0.8273,
+      "step": 38160
+    },
+    {
+      "epoch": 1.509620518499476,
+      "grad_norm": 1.2279039866063677,
+      "learning_rate": 5.812996931584923e-06,
+      "loss": 0.8377,
+      "step": 38170
+    },
+    {
+      "epoch": 1.510016017718365,
+      "grad_norm": 1.4662122144383176,
+      "learning_rate": 5.810726473154105e-06,
+      "loss": 0.8309,
+      "step": 38180
+    },
+    {
+      "epoch": 1.510411516937254,
+      "grad_norm": 1.2157490964608086,
+      "learning_rate": 5.808455843025309e-06,
+      "loss": 0.8147,
+      "step": 38190
+    },
+    {
+      "epoch": 1.510807016156143,
+      "grad_norm": 1.1510863989916291,
+      "learning_rate": 5.806185041679415e-06,
+      "loss": 0.8442,
+      "step": 38200
+    },
+    {
+      "epoch": 1.5112025153750321,
+      "grad_norm": 1.431497260337306,
+      "learning_rate": 5.803914069597342e-06,
+      "loss": 0.8076,
+      "step": 38210
+    },
+    {
+      "epoch": 1.5115980145939212,
+      "grad_norm": 1.2612330752418943,
+      "learning_rate": 5.801642927260042e-06,
+      "loss": 0.8295,
+      "step": 38220
+    },
+    {
+      "epoch": 1.5119935138128102,
+      "grad_norm": 1.7086484756243665,
+      "learning_rate": 5.7993716151485035e-06,
+      "loss": 0.8128,
+      "step": 38230
+    },
+    {
+      "epoch": 1.5123890130316993,
+      "grad_norm": 1.5717014989466744,
+      "learning_rate": 5.797100133743752e-06,
+      "loss": 0.8167,
+      "step": 38240
+    },
+    {
+      "epoch": 1.5127845122505883,
+      "grad_norm": 1.4099521550238716,
+      "learning_rate": 5.794828483526848e-06,
+      "loss": 0.8107,
+      "step": 38250
+    },
+    {
+      "epoch": 1.5131800114694773,
+      "grad_norm": 1.2373223873898105,
+      "learning_rate": 5.792556664978888e-06,
+      "loss": 0.8382,
+      "step": 38260
+    },
+    {
+      "epoch": 1.5135755106883664,
+      "grad_norm": 1.2390773061535927,
+      "learning_rate": 5.790284678581005e-06,
+      "loss": 0.7994,
+      "step": 38270
+    },
+    {
+      "epoch": 1.5139710099072554,
+      "grad_norm": 1.4390099847144664,
+      "learning_rate": 5.788012524814366e-06,
+      "loss": 0.8317,
+      "step": 38280
+    },
+    {
+      "epoch": 1.5143665091261445,
+      "grad_norm": 1.274542478005831,
+      "learning_rate": 5.785740204160175e-06,
+      "loss": 0.8163,
+      "step": 38290
+    },
+    {
+      "epoch": 1.5147620083450335,
+      "grad_norm": 1.3614774800966511,
+      "learning_rate": 5.783467717099669e-06,
+      "loss": 0.811,
+      "step": 38300
+    },
+    {
+      "epoch": 1.5151575075639225,
+      "grad_norm": 1.2151554711209784,
+      "learning_rate": 5.7811950641141255e-06,
+      "loss": 0.8232,
+      "step": 38310
+    },
+    {
+      "epoch": 1.5155530067828116,
+      "grad_norm": 1.2655687855067057,
+      "learning_rate": 5.778922245684849e-06,
+      "loss": 0.8149,
+      "step": 38320
+    },
+    {
+      "epoch": 1.5159485060017006,
+      "grad_norm": 1.3683987419298556,
+      "learning_rate": 5.776649262293187e-06,
+      "loss": 0.8318,
+      "step": 38330
+    },
+    {
+      "epoch": 1.5163440052205897,
+      "grad_norm": 1.1952612250781876,
+      "learning_rate": 5.774376114420516e-06,
+      "loss": 0.8197,
+      "step": 38340
+    },
+    {
+      "epoch": 1.5167395044394787,
+      "grad_norm": 1.362103756476559,
+      "learning_rate": 5.77210280254825e-06,
+      "loss": 0.8202,
+      "step": 38350
+    },
+    {
+      "epoch": 1.5171350036583677,
+      "grad_norm": 1.4018015765718346,
+      "learning_rate": 5.769829327157839e-06,
+      "loss": 0.8355,
+      "step": 38360
+    },
+    {
+      "epoch": 1.5175305028772568,
+      "grad_norm": 1.192026713496226,
+      "learning_rate": 5.767555688730766e-06,
+      "loss": 0.8268,
+      "step": 38370
+    },
+    {
+      "epoch": 1.5179260020961458,
+      "grad_norm": 1.4241913696623545,
+      "learning_rate": 5.765281887748547e-06,
+      "loss": 0.8198,
+      "step": 38380
+    },
+    {
+      "epoch": 1.5183215013150348,
+      "grad_norm": 1.1317361164699646,
+      "learning_rate": 5.763007924692736e-06,
+      "loss": 0.8242,
+      "step": 38390
+    },
+    {
+      "epoch": 1.518717000533924,
+      "grad_norm": 1.1389342124437951,
+      "learning_rate": 5.760733800044918e-06,
+      "loss": 0.8207,
+      "step": 38400
+    },
+    {
+      "epoch": 1.5191124997528132,
+      "grad_norm": 1.4947415854809551,
+      "learning_rate": 5.758459514286714e-06,
+      "loss": 0.8075,
+      "step": 38410
+    },
+    {
+      "epoch": 1.5195079989717022,
+      "grad_norm": 1.3781584549107786,
+      "learning_rate": 5.756185067899779e-06,
+      "loss": 0.804,
+      "step": 38420
+    },
+    {
+      "epoch": 1.5199034981905912,
+      "grad_norm": 1.5099450872277451,
+      "learning_rate": 5.753910461365803e-06,
+      "loss": 0.8226,
+      "step": 38430
+    },
+    {
+      "epoch": 1.5202989974094803,
+      "grad_norm": 1.4031260466275093,
+      "learning_rate": 5.751635695166506e-06,
+      "loss": 0.8405,
+      "step": 38440
+    },
+    {
+      "epoch": 1.5206944966283693,
+      "grad_norm": 1.487364272797396,
+      "learning_rate": 5.749360769783646e-06,
+      "loss": 0.7901,
+      "step": 38450
+    },
+    {
+      "epoch": 1.5210899958472583,
+      "grad_norm": 1.3330407414576706,
+      "learning_rate": 5.747085685699014e-06,
+      "loss": 0.8431,
+      "step": 38460
+    },
+    {
+      "epoch": 1.5214854950661474,
+      "grad_norm": 1.293191964983223,
+      "learning_rate": 5.744810443394433e-06,
+      "loss": 0.8364,
+      "step": 38470
+    },
+    {
+      "epoch": 1.5218809942850364,
+      "grad_norm": 1.2398317086987976,
+      "learning_rate": 5.74253504335176e-06,
+      "loss": 0.8296,
+      "step": 38480
+    },
+    {
+      "epoch": 1.5222764935039255,
+      "grad_norm": 1.3165726996602467,
+      "learning_rate": 5.740259486052885e-06,
+      "loss": 0.8137,
+      "step": 38490
+    },
+    {
+      "epoch": 1.5226719927228145,
+      "grad_norm": 1.5773461675449112,
+      "learning_rate": 5.737983771979735e-06,
+      "loss": 0.8151,
+      "step": 38500
+    },
+    {
+      "epoch": 1.5230674919417035,
+      "grad_norm": 1.5939505912160084,
+      "learning_rate": 5.735707901614265e-06,
+      "loss": 0.8113,
+      "step": 38510
+    },
+    {
+      "epoch": 1.5234629911605926,
+      "grad_norm": 1.152646501145914,
+      "learning_rate": 5.733431875438465e-06,
+      "loss": 0.8243,
+      "step": 38520
+    },
+    {
+      "epoch": 1.5238584903794816,
+      "grad_norm": 1.4188531112423826,
+      "learning_rate": 5.731155693934358e-06,
+      "loss": 0.8408,
+      "step": 38530
+    },
+    {
+      "epoch": 1.5242539895983707,
+      "grad_norm": 1.3942520359681672,
+      "learning_rate": 5.728879357584003e-06,
+      "loss": 0.8056,
+      "step": 38540
+    },
+    {
+      "epoch": 1.5246494888172597,
+      "grad_norm": 1.3777318987014022,
+      "learning_rate": 5.7266028668694865e-06,
+      "loss": 0.8124,
+      "step": 38550
+    },
+    {
+      "epoch": 1.5250449880361487,
+      "grad_norm": 1.274104693710163,
+      "learning_rate": 5.724326222272933e-06,
+      "loss": 0.8262,
+      "step": 38560
+    },
+    {
+      "epoch": 1.5254404872550378,
+      "grad_norm": 1.2887173676796957,
+      "learning_rate": 5.722049424276492e-06,
+      "loss": 0.8273,
+      "step": 38570
+    },
+    {
+      "epoch": 1.5258359864739268,
+      "grad_norm": 1.1666885061187304,
+      "learning_rate": 5.719772473362355e-06,
+      "loss": 0.8394,
+      "step": 38580
+    },
+    {
+      "epoch": 1.5262314856928159,
+      "grad_norm": 1.3426124384217368,
+      "learning_rate": 5.717495370012739e-06,
+      "loss": 0.8173,
+      "step": 38590
+    },
+    {
+      "epoch": 1.526626984911705,
+      "grad_norm": 1.2059245551634135,
+      "learning_rate": 5.715218114709895e-06,
+      "loss": 0.8444,
+      "step": 38600
+    },
+    {
+      "epoch": 1.527022484130594,
+      "grad_norm": 1.2570956074149853,
+      "learning_rate": 5.712940707936109e-06,
+      "loss": 0.827,
+      "step": 38610
+    },
+    {
+      "epoch": 1.527417983349483,
+      "grad_norm": 1.2693911755556908,
+      "learning_rate": 5.710663150173696e-06,
+      "loss": 0.818,
+      "step": 38620
+    },
+    {
+      "epoch": 1.527813482568372,
+      "grad_norm": 1.2997327068177411,
+      "learning_rate": 5.708385441905001e-06,
+      "loss": 0.8258,
+      "step": 38630
+    },
+    {
+      "epoch": 1.528208981787261,
+      "grad_norm": 1.342851020895316,
+      "learning_rate": 5.706107583612406e-06,
+      "loss": 0.8016,
+      "step": 38640
+    },
+    {
+      "epoch": 1.52860448100615,
+      "grad_norm": 1.1774531878378258,
+      "learning_rate": 5.703829575778322e-06,
+      "loss": 0.8402,
+      "step": 38650
+    },
+    {
+      "epoch": 1.5289999802250391,
+      "grad_norm": 1.278433876473585,
+      "learning_rate": 5.701551418885192e-06,
+      "loss": 0.8189,
+      "step": 38660
+    },
+    {
+      "epoch": 1.5293954794439282,
+      "grad_norm": 1.68305763879144,
+      "learning_rate": 5.6992731134154925e-06,
+      "loss": 0.7945,
+      "step": 38670
+    },
+    {
+      "epoch": 1.5297909786628172,
+      "grad_norm": 1.5358066405040551,
+      "learning_rate": 5.6969946598517256e-06,
+      "loss": 0.8316,
+      "step": 38680
+    },
+    {
+      "epoch": 1.5301864778817063,
+      "grad_norm": 1.4941027103869715,
+      "learning_rate": 5.6947160586764315e-06,
+      "loss": 0.8441,
+      "step": 38690
+    },
+    {
+      "epoch": 1.5305819771005953,
+      "grad_norm": 1.5287196036694184,
+      "learning_rate": 5.692437310372179e-06,
+      "loss": 0.8315,
+      "step": 38700
+    },
+    {
+      "epoch": 1.5309774763194843,
+      "grad_norm": 1.3015179121246494,
+      "learning_rate": 5.690158415421565e-06,
+      "loss": 0.82,
+      "step": 38710
+    },
+    {
+      "epoch": 1.5313729755383734,
+      "grad_norm": 1.3168439931327458,
+      "learning_rate": 5.687879374307223e-06,
+      "loss": 0.824,
+      "step": 38720
+    },
+    {
+      "epoch": 1.5317684747572624,
+      "grad_norm": 1.2524563671976618,
+      "learning_rate": 5.685600187511815e-06,
+      "loss": 0.8358,
+      "step": 38730
+    },
+    {
+      "epoch": 1.5321639739761514,
+      "grad_norm": 1.602970456884873,
+      "learning_rate": 5.683320855518034e-06,
+      "loss": 0.8079,
+      "step": 38740
+    },
+    {
+      "epoch": 1.5325594731950405,
+      "grad_norm": 1.087151182620091,
+      "learning_rate": 5.681041378808602e-06,
+      "loss": 0.8203,
+      "step": 38750
+    },
+    {
+      "epoch": 1.5329549724139295,
+      "grad_norm": 1.5933289063697031,
+      "learning_rate": 5.678761757866273e-06,
+      "loss": 0.7905,
+      "step": 38760
+    },
+    {
+      "epoch": 1.5333504716328186,
+      "grad_norm": 1.571698507898866,
+      "learning_rate": 5.676481993173832e-06,
+      "loss": 0.824,
+      "step": 38770
+    },
+    {
+      "epoch": 1.5337459708517076,
+      "grad_norm": 1.3836422282780558,
+      "learning_rate": 5.674202085214096e-06,
+      "loss": 0.822,
+      "step": 38780
+    },
+    {
+      "epoch": 1.5341414700705966,
+      "grad_norm": 1.3477182619705774,
+      "learning_rate": 5.6719220344699076e-06,
+      "loss": 0.8098,
+      "step": 38790
+    },
+    {
+      "epoch": 1.5345369692894857,
+      "grad_norm": 1.6234631116214455,
+      "learning_rate": 5.669641841424145e-06,
+      "loss": 0.819,
+      "step": 38800
+    },
+    {
+      "epoch": 1.5349324685083747,
+      "grad_norm": 1.3631566579980516,
+      "learning_rate": 5.667361506559712e-06,
+      "loss": 0.8222,
+      "step": 38810
+    },
+    {
+      "epoch": 1.5353279677272638,
+      "grad_norm": 1.225113445298731,
+      "learning_rate": 5.6650810303595445e-06,
+      "loss": 0.8276,
+      "step": 38820
+    },
+    {
+      "epoch": 1.5357234669461528,
+      "grad_norm": 1.2518426705100616,
+      "learning_rate": 5.662800413306611e-06,
+      "loss": 0.7989,
+      "step": 38830
+    },
+    {
+      "epoch": 1.5361189661650418,
+      "grad_norm": 1.454598850463559,
+      "learning_rate": 5.6605196558839035e-06,
+      "loss": 0.8169,
+      "step": 38840
+    },
+    {
+      "epoch": 1.5365144653839309,
+      "grad_norm": 1.4120882843212048,
+      "learning_rate": 5.658238758574451e-06,
+      "loss": 0.7972,
+      "step": 38850
+    },
+    {
+      "epoch": 1.53690996460282,
+      "grad_norm": 1.4655033143313787,
+      "learning_rate": 5.655957721861305e-06,
+      "loss": 0.8293,
+      "step": 38860
+    },
+    {
+      "epoch": 1.537305463821709,
+      "grad_norm": 1.1867759851838549,
+      "learning_rate": 5.653676546227551e-06,
+      "loss": 0.8394,
+      "step": 38870
+    },
+    {
+      "epoch": 1.537700963040598,
+      "grad_norm": 1.3861241414366707,
+      "learning_rate": 5.651395232156305e-06,
+      "loss": 0.828,
+      "step": 38880
+    },
+    {
+      "epoch": 1.538096462259487,
+      "grad_norm": 1.2570524199306334,
+      "learning_rate": 5.649113780130708e-06,
+      "loss": 0.8086,
+      "step": 38890
+    },
+    {
+      "epoch": 1.538491961478376,
+      "grad_norm": 1.2338247393546447,
+      "learning_rate": 5.646832190633933e-06,
+      "loss": 0.8104,
+      "step": 38900
+    },
+    {
+      "epoch": 1.5388874606972651,
+      "grad_norm": 1.4890857361789498,
+      "learning_rate": 5.644550464149181e-06,
+      "loss": 0.808,
+      "step": 38910
+    },
+    {
+      "epoch": 1.5392829599161542,
+      "grad_norm": 1.745385794419566,
+      "learning_rate": 5.642268601159684e-06,
+      "loss": 0.8093,
+      "step": 38920
+    },
+    {
+      "epoch": 1.5396784591350432,
+      "grad_norm": 1.307365814179379,
+      "learning_rate": 5.639986602148701e-06,
+      "loss": 0.7943,
+      "step": 38930
+    },
+    {
+      "epoch": 1.5400739583539322,
+      "grad_norm": 1.5926438350605667,
+      "learning_rate": 5.637704467599519e-06,
+      "loss": 0.8178,
+      "step": 38940
+    },
+    {
+      "epoch": 1.5404694575728213,
+      "grad_norm": 1.1807889793922797,
+      "learning_rate": 5.635422197995457e-06,
+      "loss": 0.8034,
+      "step": 38950
+    },
+    {
+      "epoch": 1.5408649567917103,
+      "grad_norm": 1.2122033798033789,
+      "learning_rate": 5.6331397938198594e-06,
+      "loss": 0.8103,
+      "step": 38960
+    },
+    {
+      "epoch": 1.5412604560105994,
+      "grad_norm": 1.5957206217105713,
+      "learning_rate": 5.6308572555561e-06,
+      "loss": 0.7969,
+      "step": 38970
+    },
+    {
+      "epoch": 1.5416559552294884,
+      "grad_norm": 1.0827701728754713,
+      "learning_rate": 5.628574583687582e-06,
+      "loss": 0.836,
+      "step": 38980
+    },
+    {
+      "epoch": 1.5420514544483774,
+      "grad_norm": 1.3355434796487196,
+      "learning_rate": 5.626291778697737e-06,
+      "loss": 0.8238,
+      "step": 38990
+    },
+    {
+      "epoch": 1.5424469536672665,
+      "grad_norm": 1.1769896925261032,
+      "learning_rate": 5.6240088410700226e-06,
+      "loss": 0.8471,
+      "step": 39000
+    },
+    {
+      "epoch": 1.5428424528861555,
+      "grad_norm": 1.2407091295908643,
+      "learning_rate": 5.6217257712879254e-06,
+      "loss": 0.8281,
+      "step": 39010
+    },
+    {
+      "epoch": 1.5432379521050446,
+      "grad_norm": 1.3636080737320468,
+      "learning_rate": 5.6194425698349615e-06,
+      "loss": 0.8096,
+      "step": 39020
+    },
+    {
+      "epoch": 1.5436334513239336,
+      "grad_norm": 1.2245634746187997,
+      "learning_rate": 5.617159237194675e-06,
+      "loss": 0.7887,
+      "step": 39030
+    },
+    {
+      "epoch": 1.5440289505428226,
+      "grad_norm": 1.2467842244493976,
+      "learning_rate": 5.614875773850633e-06,
+      "loss": 0.8041,
+      "step": 39040
+    },
+    {
+      "epoch": 1.5444244497617117,
+      "grad_norm": 1.0935122439457097,
+      "learning_rate": 5.612592180286439e-06,
+      "loss": 0.825,
+      "step": 39050
+    },
+    {
+      "epoch": 1.5448199489806007,
+      "grad_norm": 1.239647065182398,
+      "learning_rate": 5.610308456985716e-06,
+      "loss": 0.8227,
+      "step": 39060
+    },
+    {
+      "epoch": 1.5452154481994897,
+      "grad_norm": 1.482679233583797,
+      "learning_rate": 5.608024604432117e-06,
+      "loss": 0.7949,
+      "step": 39070
+    },
+    {
+      "epoch": 1.5456109474183788,
+      "grad_norm": 1.3375920596273634,
+      "learning_rate": 5.605740623109322e-06,
+      "loss": 0.7955,
+      "step": 39080
+    },
+    {
+      "epoch": 1.5460064466372678,
+      "grad_norm": 1.4103959220782802,
+      "learning_rate": 5.603456513501042e-06,
+      "loss": 0.7977,
+      "step": 39090
+    },
+    {
+      "epoch": 1.5464019458561569,
+      "grad_norm": 1.1429661050424258,
+      "learning_rate": 5.60117227609101e-06,
+      "loss": 0.8235,
+      "step": 39100
+    },
+    {
+      "epoch": 1.546797445075046,
+      "grad_norm": 1.1668869977872303,
+      "learning_rate": 5.598887911362992e-06,
+      "loss": 0.8057,
+      "step": 39110
+    },
+    {
+      "epoch": 1.547192944293935,
+      "grad_norm": 1.1816970950651535,
+      "learning_rate": 5.596603419800772e-06,
+      "loss": 0.8402,
+      "step": 39120
+    },
+    {
+      "epoch": 1.547588443512824,
+      "grad_norm": 1.632450542651483,
+      "learning_rate": 5.59431880188817e-06,
+      "loss": 0.8025,
+      "step": 39130
+    },
+    {
+      "epoch": 1.547983942731713,
+      "grad_norm": 1.2093192572076203,
+      "learning_rate": 5.59203405810903e-06,
+      "loss": 0.8135,
+      "step": 39140
+    },
+    {
+      "epoch": 1.548379441950602,
+      "grad_norm": 1.2139579886991492,
+      "learning_rate": 5.589749188947216e-06,
+      "loss": 0.8009,
+      "step": 39150
+    },
+    {
+      "epoch": 1.548774941169491,
+      "grad_norm": 1.3890540832478753,
+      "learning_rate": 5.587464194886628e-06,
+      "loss": 0.8041,
+      "step": 39160
+    },
+    {
+      "epoch": 1.5491704403883801,
+      "grad_norm": 1.2213136005106913,
+      "learning_rate": 5.585179076411189e-06,
+      "loss": 0.8455,
+      "step": 39170
+    },
+    {
+      "epoch": 1.5495659396072692,
+      "grad_norm": 1.752192437863592,
+      "learning_rate": 5.5828938340048465e-06,
+      "loss": 0.8037,
+      "step": 39180
+    },
+    {
+      "epoch": 1.5499614388261582,
+      "grad_norm": 1.1913841083070977,
+      "learning_rate": 5.580608468151576e-06,
+      "loss": 0.828,
+      "step": 39190
+    },
+    {
+      "epoch": 1.5503569380450473,
+      "grad_norm": 1.3050077075471247,
+      "learning_rate": 5.5783229793353785e-06,
+      "loss": 0.819,
+      "step": 39200
+    },
+    {
+      "epoch": 1.5507524372639363,
+      "grad_norm": 1.470225449093407,
+      "learning_rate": 5.576037368040282e-06,
+      "loss": 0.8058,
+      "step": 39210
+    },
+    {
+      "epoch": 1.5511479364828253,
+      "grad_norm": 1.2350903834665412,
+      "learning_rate": 5.57375163475034e-06,
+      "loss": 0.8328,
+      "step": 39220
+    },
+    {
+      "epoch": 1.5515434357017144,
+      "grad_norm": 1.21422056049844,
+      "learning_rate": 5.571465779949633e-06,
+      "loss": 0.8091,
+      "step": 39230
+    },
+    {
+      "epoch": 1.5519389349206034,
+      "grad_norm": 1.4081869618346856,
+      "learning_rate": 5.569179804122263e-06,
+      "loss": 0.8257,
+      "step": 39240
+    },
+    {
+      "epoch": 1.5523344341394925,
+      "grad_norm": 1.363848958047758,
+      "learning_rate": 5.566893707752362e-06,
+      "loss": 0.7901,
+      "step": 39250
+    },
+    {
+      "epoch": 1.5527299333583815,
+      "grad_norm": 1.7840250259116874,
+      "learning_rate": 5.564607491324085e-06,
+      "loss": 0.8223,
+      "step": 39260
+    },
+    {
+      "epoch": 1.5531254325772705,
+      "grad_norm": 1.2380626087172715,
+      "learning_rate": 5.562321155321615e-06,
+      "loss": 0.8024,
+      "step": 39270
+    },
+    {
+      "epoch": 1.5535209317961596,
+      "grad_norm": 1.43218123769,
+      "learning_rate": 5.560034700229157e-06,
+      "loss": 0.8238,
+      "step": 39280
+    },
+    {
+      "epoch": 1.5539164310150486,
+      "grad_norm": 1.3984581186758427,
+      "learning_rate": 5.557748126530946e-06,
+      "loss": 0.7966,
+      "step": 39290
+    },
+    {
+      "epoch": 1.5543119302339377,
+      "grad_norm": 1.472340457391162,
+      "learning_rate": 5.555461434711237e-06,
+      "loss": 0.8256,
+      "step": 39300
+    },
+    {
+      "epoch": 1.5547074294528267,
+      "grad_norm": 1.4758825007263823,
+      "learning_rate": 5.553174625254312e-06,
+      "loss": 0.8007,
+      "step": 39310
+    },
+    {
+      "epoch": 1.5551029286717157,
+      "grad_norm": 1.2369053120366102,
+      "learning_rate": 5.5508876986444774e-06,
+      "loss": 0.803,
+      "step": 39320
+    },
+    {
+      "epoch": 1.5554984278906048,
+      "grad_norm": 1.2940420396551047,
+      "learning_rate": 5.5486006553660665e-06,
+      "loss": 0.8045,
+      "step": 39330
+    },
+    {
+      "epoch": 1.5558939271094938,
+      "grad_norm": 1.1060473545686946,
+      "learning_rate": 5.546313495903436e-06,
+      "loss": 0.8153,
+      "step": 39340
+    },
+    {
+      "epoch": 1.5562894263283829,
+      "grad_norm": 1.2265893512972266,
+      "learning_rate": 5.544026220740968e-06,
+      "loss": 0.8311,
+      "step": 39350
+    },
+    {
+      "epoch": 1.556684925547272,
+      "grad_norm": 1.6550990508506043,
+      "learning_rate": 5.541738830363065e-06,
+      "loss": 0.8236,
+      "step": 39360
+    },
+    {
+      "epoch": 1.557080424766161,
+      "grad_norm": 1.1458236898666863,
+      "learning_rate": 5.5394513252541594e-06,
+      "loss": 0.8388,
+      "step": 39370
+    },
+    {
+      "epoch": 1.55747592398505,
+      "grad_norm": 1.495186838260878,
+      "learning_rate": 5.5371637058987046e-06,
+      "loss": 0.8086,
+      "step": 39380
+    },
+    {
+      "epoch": 1.557871423203939,
+      "grad_norm": 1.4717040694086956,
+      "learning_rate": 5.534875972781181e-06,
+      "loss": 0.8225,
+      "step": 39390
+    },
+    {
+      "epoch": 1.558266922422828,
+      "grad_norm": 1.2305717977973327,
+      "learning_rate": 5.532588126386088e-06,
+      "loss": 0.8348,
+      "step": 39400
+    },
+    {
+      "epoch": 1.5586624216417173,
+      "grad_norm": 1.4143999990427574,
+      "learning_rate": 5.530300167197955e-06,
+      "loss": 0.8165,
+      "step": 39410
+    },
+    {
+      "epoch": 1.5590579208606063,
+      "grad_norm": 1.443370757375486,
+      "learning_rate": 5.528012095701334e-06,
+      "loss": 0.8036,
+      "step": 39420
+    },
+    {
+      "epoch": 1.5594534200794954,
+      "grad_norm": 1.4501652394376046,
+      "learning_rate": 5.525723912380794e-06,
+      "loss": 0.8091,
+      "step": 39430
+    },
+    {
+      "epoch": 1.5598489192983844,
+      "grad_norm": 1.338755523797807,
+      "learning_rate": 5.523435617720937e-06,
+      "loss": 0.7694,
+      "step": 39440
+    },
+    {
+      "epoch": 1.5602444185172735,
+      "grad_norm": 1.2820511550016698,
+      "learning_rate": 5.521147212206385e-06,
+      "loss": 0.8227,
+      "step": 39450
+    },
+    {
+      "epoch": 1.5606399177361625,
+      "grad_norm": 1.5250704885892234,
+      "learning_rate": 5.518858696321781e-06,
+      "loss": 0.7936,
+      "step": 39460
+    },
+    {
+      "epoch": 1.5610354169550515,
+      "grad_norm": 1.2487698246875318,
+      "learning_rate": 5.516570070551794e-06,
+      "loss": 0.8252,
+      "step": 39470
+    },
+    {
+      "epoch": 1.5614309161739406,
+      "grad_norm": 1.3328166987955157,
+      "learning_rate": 5.514281335381116e-06,
+      "loss": 0.8287,
+      "step": 39480
+    },
+    {
+      "epoch": 1.5618264153928296,
+      "grad_norm": 1.42088727589414,
+      "learning_rate": 5.511992491294462e-06,
+      "loss": 0.8079,
+      "step": 39490
+    },
+    {
+      "epoch": 1.5622219146117187,
+      "grad_norm": 1.4988100766902124,
+      "learning_rate": 5.509703538776571e-06,
+      "loss": 0.8082,
+      "step": 39500
+    },
+    {
+      "epoch": 1.5626174138306077,
+      "grad_norm": 1.3833842461176957,
+      "learning_rate": 5.5074144783122005e-06,
+      "loss": 0.8321,
+      "step": 39510
+    },
+    {
+      "epoch": 1.5630129130494967,
+      "grad_norm": 1.464308309676638,
+      "learning_rate": 5.505125310386138e-06,
+      "loss": 0.8123,
+      "step": 39520
+    },
+    {
+      "epoch": 1.5634084122683858,
+      "grad_norm": 1.4910036244046405,
+      "learning_rate": 5.502836035483189e-06,
+      "loss": 0.8427,
+      "step": 39530
+    },
+    {
+      "epoch": 1.5638039114872748,
+      "grad_norm": 1.4540376600595513,
+      "learning_rate": 5.500546654088184e-06,
+      "loss": 0.8087,
+      "step": 39540
+    },
+    {
+      "epoch": 1.5641994107061639,
+      "grad_norm": 1.1440511797426696,
+      "learning_rate": 5.49825716668597e-06,
+      "loss": 0.8393,
+      "step": 39550
+    },
+    {
+      "epoch": 1.564594909925053,
+      "grad_norm": 1.4075786996648874,
+      "learning_rate": 5.495967573761425e-06,
+      "loss": 0.7981,
+      "step": 39560
+    },
+    {
+      "epoch": 1.564990409143942,
+      "grad_norm": 1.3099595994992026,
+      "learning_rate": 5.493677875799446e-06,
+      "loss": 0.8222,
+      "step": 39570
+    },
+    {
+      "epoch": 1.565385908362831,
+      "grad_norm": 1.2218317511865187,
+      "learning_rate": 5.491388073284951e-06,
+      "loss": 0.8061,
+      "step": 39580
+    },
+    {
+      "epoch": 1.56578140758172,
+      "grad_norm": 1.206393200809857,
+      "learning_rate": 5.48909816670288e-06,
+      "loss": 0.8226,
+      "step": 39590
+    },
+    {
+      "epoch": 1.566176906800609,
+      "grad_norm": 1.3708673103211968,
+      "learning_rate": 5.4868081565381955e-06,
+      "loss": 0.8078,
+      "step": 39600
+    },
+    {
+      "epoch": 1.566572406019498,
+      "grad_norm": 1.4120005717905835,
+      "learning_rate": 5.484518043275885e-06,
+      "loss": 0.8337,
+      "step": 39610
+    },
+    {
+      "epoch": 1.5669679052383871,
+      "grad_norm": 1.4600630693904642,
+      "learning_rate": 5.482227827400953e-06,
+      "loss": 0.8068,
+      "step": 39620
+    },
+    {
+      "epoch": 1.5673634044572762,
+      "grad_norm": 1.670653620704324,
+      "learning_rate": 5.4799375093984285e-06,
+      "loss": 0.8004,
+      "step": 39630
+    },
+    {
+      "epoch": 1.5677589036761652,
+      "grad_norm": 1.1515165217257826,
+      "learning_rate": 5.477647089753363e-06,
+      "loss": 0.8158,
+      "step": 39640
+    },
+    {
+      "epoch": 1.5681544028950543,
+      "grad_norm": 1.300203640787178,
+      "learning_rate": 5.475356568950826e-06,
+      "loss": 0.8155,
+      "step": 39650
+    },
+    {
+      "epoch": 1.5685499021139433,
+      "grad_norm": 1.5027638851013374,
+      "learning_rate": 5.473065947475913e-06,
+      "loss": 0.81,
+      "step": 39660
+    },
+    {
+      "epoch": 1.5689454013328323,
+      "grad_norm": 1.12842357234813,
+      "learning_rate": 5.470775225813736e-06,
+      "loss": 0.8334,
+      "step": 39670
+    },
+    {
+      "epoch": 1.5693409005517214,
+      "grad_norm": 1.3292738681420568,
+      "learning_rate": 5.4684844044494314e-06,
+      "loss": 0.8316,
+      "step": 39680
+    },
+    {
+      "epoch": 1.5697363997706104,
+      "grad_norm": 1.495904681940552,
+      "learning_rate": 5.466193483868155e-06,
+      "loss": 0.827,
+      "step": 39690
+    },
+    {
+      "epoch": 1.5701318989894995,
+      "grad_norm": 1.4214349460061635,
+      "learning_rate": 5.463902464555088e-06,
+      "loss": 0.7888,
+      "step": 39700
+    },
+    {
+      "epoch": 1.5705273982083885,
+      "grad_norm": 1.599978081555621,
+      "learning_rate": 5.4616113469954245e-06,
+      "loss": 0.7827,
+      "step": 39710
+    },
+    {
+      "epoch": 1.5709228974272775,
+      "grad_norm": 1.0861151853314188,
+      "learning_rate": 5.459320131674388e-06,
+      "loss": 0.8268,
+      "step": 39720
+    },
+    {
+      "epoch": 1.5713183966461666,
+      "grad_norm": 1.1588730546634205,
+      "learning_rate": 5.4570288190772156e-06,
+      "loss": 0.8226,
+      "step": 39730
+    },
+    {
+      "epoch": 1.5717138958650558,
+      "grad_norm": 1.3819229872228296,
+      "learning_rate": 5.454737409689169e-06,
+      "loss": 0.7843,
+      "step": 39740
+    },
+    {
+      "epoch": 1.5721093950839449,
+      "grad_norm": 1.2112121186132216,
+      "learning_rate": 5.45244590399553e-06,
+      "loss": 0.8125,
+      "step": 39750
+    },
+    {
+      "epoch": 1.572504894302834,
+      "grad_norm": 1.1913765900022304,
+      "learning_rate": 5.4501543024816005e-06,
+      "loss": 0.8208,
+      "step": 39760
+    },
+    {
+      "epoch": 1.572900393521723,
+      "grad_norm": 1.5073803555928857,
+      "learning_rate": 5.447862605632701e-06,
+      "loss": 0.7918,
+      "step": 39770
+    },
+    {
+      "epoch": 1.573295892740612,
+      "grad_norm": 1.221168009029546,
+      "learning_rate": 5.4455708139341764e-06,
+      "loss": 0.8189,
+      "step": 39780
+    },
+    {
+      "epoch": 1.573691391959501,
+      "grad_norm": 1.307264926017239,
+      "learning_rate": 5.443278927871385e-06,
+      "loss": 0.8012,
+      "step": 39790
+    },
+    {
+      "epoch": 1.57408689117839,
+      "grad_norm": 1.1350453704583348,
+      "learning_rate": 5.440986947929712e-06,
+      "loss": 0.7895,
+      "step": 39800
+    },
+    {
+      "epoch": 1.574482390397279,
+      "grad_norm": 1.3002718909237414,
+      "learning_rate": 5.438694874594558e-06,
+      "loss": 0.8162,
+      "step": 39810
+    },
+    {
+      "epoch": 1.5748778896161681,
+      "grad_norm": 1.5794201658229785,
+      "learning_rate": 5.436402708351346e-06,
+      "loss": 0.7872,
+      "step": 39820
+    },
+    {
+      "epoch": 1.5752733888350572,
+      "grad_norm": 1.2507161420546935,
+      "learning_rate": 5.434110449685517e-06,
+      "loss": 0.8193,
+      "step": 39830
+    },
+    {
+      "epoch": 1.5756688880539462,
+      "grad_norm": 1.1983513610804764,
+      "learning_rate": 5.431818099082532e-06,
+      "loss": 0.7852,
+      "step": 39840
+    },
+    {
+      "epoch": 1.5760643872728353,
+      "grad_norm": 1.4340925872168722,
+      "learning_rate": 5.429525657027871e-06,
+      "loss": 0.8006,
+      "step": 39850
+    },
+    {
+      "epoch": 1.5764598864917243,
+      "grad_norm": 1.4561711851947867,
+      "learning_rate": 5.427233124007034e-06,
+      "loss": 0.7818,
+      "step": 39860
+    },
+    {
+      "epoch": 1.5768553857106133,
+      "grad_norm": 1.3427937770208374,
+      "learning_rate": 5.424940500505541e-06,
+      "loss": 0.8214,
+      "step": 39870
+    },
+    {
+      "epoch": 1.5772508849295024,
+      "grad_norm": 1.424175563943074,
+      "learning_rate": 5.42264778700893e-06,
+      "loss": 0.7879,
+      "step": 39880
+    },
+    {
+      "epoch": 1.5776463841483914,
+      "grad_norm": 1.2015772096184594,
+      "learning_rate": 5.420354984002759e-06,
+      "loss": 0.8103,
+      "step": 39890
+    },
+    {
+      "epoch": 1.5780418833672805,
+      "grad_norm": 1.6775521195256204,
+      "learning_rate": 5.418062091972604e-06,
+      "loss": 0.8057,
+      "step": 39900
+    },
+    {
+      "epoch": 1.5784373825861695,
+      "grad_norm": 1.4559484766066624,
+      "learning_rate": 5.415769111404061e-06,
+      "loss": 0.8121,
+      "step": 39910
+    },
+    {
+      "epoch": 1.5788328818050585,
+      "grad_norm": 1.4466132892633963,
+      "learning_rate": 5.413476042782742e-06,
+      "loss": 0.8239,
+      "step": 39920
+    },
+    {
+      "epoch": 1.5792283810239476,
+      "grad_norm": 1.4088345948193022,
+      "learning_rate": 5.4111828865942825e-06,
+      "loss": 0.7919,
+      "step": 39930
+    },
+    {
+      "epoch": 1.5796238802428366,
+      "grad_norm": 1.3034516680823378,
+      "learning_rate": 5.408889643324331e-06,
+      "loss": 0.824,
+      "step": 39940
+    },
+    {
+      "epoch": 1.5800193794617257,
+      "grad_norm": 1.4245272053754119,
+      "learning_rate": 5.406596313458558e-06,
+      "loss": 0.808,
+      "step": 39950
+    },
+    {
+      "epoch": 1.5804148786806147,
+      "grad_norm": 1.4489805746654498,
+      "learning_rate": 5.404302897482652e-06,
+      "loss": 0.821,
+      "step": 39960
+    },
+    {
+      "epoch": 1.5808103778995037,
+      "grad_norm": 1.2340763438717153,
+      "learning_rate": 5.402009395882319e-06,
+      "loss": 0.8279,
+      "step": 39970
+    },
+    {
+      "epoch": 1.5812058771183928,
+      "grad_norm": 1.252012477117537,
+      "learning_rate": 5.3997158091432835e-06,
+      "loss": 0.8126,
+      "step": 39980
+    },
+    {
+      "epoch": 1.5816013763372818,
+      "grad_norm": 1.3248169842244182,
+      "learning_rate": 5.397422137751287e-06,
+      "loss": 0.8099,
+      "step": 39990
+    },
+    {
+      "epoch": 1.5819968755561709,
+      "grad_norm": 1.3475216947297723,
+      "learning_rate": 5.395128382192091e-06,
+      "loss": 0.8102,
+      "step": 40000
+    },
+    {
+      "epoch": 1.58239237477506,
+      "grad_norm": 1.1762188778251867,
+      "learning_rate": 5.392834542951472e-06,
+      "loss": 0.8142,
+      "step": 40010
+    },
+    {
+      "epoch": 1.582787873993949,
+      "grad_norm": 1.2710464444756748,
+      "learning_rate": 5.390540620515229e-06,
+      "loss": 0.7998,
+      "step": 40020
+    },
+    {
+      "epoch": 1.583183373212838,
+      "grad_norm": 1.3264818674691536,
+      "learning_rate": 5.388246615369171e-06,
+      "loss": 0.8138,
+      "step": 40030
+    },
+    {
+      "epoch": 1.583578872431727,
+      "grad_norm": 1.4441310045795963,
+      "learning_rate": 5.385952527999132e-06,
+      "loss": 0.8053,
+      "step": 40040
+    },
+    {
+      "epoch": 1.583974371650616,
+      "grad_norm": 1.2704095797848778,
+      "learning_rate": 5.3836583588909615e-06,
+      "loss": 0.8165,
+      "step": 40050
+    },
+    {
+      "epoch": 1.584369870869505,
+      "grad_norm": 1.338339622896242,
+      "learning_rate": 5.381364108530523e-06,
+      "loss": 0.799,
+      "step": 40060
+    },
+    {
+      "epoch": 1.5847653700883941,
+      "grad_norm": 1.3619915388159567,
+      "learning_rate": 5.379069777403698e-06,
+      "loss": 0.7947,
+      "step": 40070
+    },
+    {
+      "epoch": 1.5851608693072832,
+      "grad_norm": 1.5434712344785235,
+      "learning_rate": 5.37677536599639e-06,
+      "loss": 0.7803,
+      "step": 40080
+    },
+    {
+      "epoch": 1.5855563685261722,
+      "grad_norm": 1.2760463512195508,
+      "learning_rate": 5.374480874794514e-06,
+      "loss": 0.8089,
+      "step": 40090
+    },
+    {
+      "epoch": 1.5859518677450613,
+      "grad_norm": 1.553780124801252,
+      "learning_rate": 5.372186304284005e-06,
+      "loss": 0.8367,
+      "step": 40100
+    },
+    {
+      "epoch": 1.5863473669639503,
+      "grad_norm": 1.2886073410975674,
+      "learning_rate": 5.369891654950812e-06,
+      "loss": 0.7911,
+      "step": 40110
+    },
+    {
+      "epoch": 1.5867428661828393,
+      "grad_norm": 1.5419804686856209,
+      "learning_rate": 5.367596927280904e-06,
+      "loss": 0.7809,
+      "step": 40120
+    },
+    {
+      "epoch": 1.5871383654017284,
+      "grad_norm": 1.654616091305112,
+      "learning_rate": 5.365302121760264e-06,
+      "loss": 0.8076,
+      "step": 40130
+    },
+    {
+      "epoch": 1.5875338646206174,
+      "grad_norm": 1.2812944189204107,
+      "learning_rate": 5.363007238874895e-06,
+      "loss": 0.8074,
+      "step": 40140
+    },
+    {
+      "epoch": 1.5879293638395064,
+      "grad_norm": 1.3803435979578003,
+      "learning_rate": 5.36071227911081e-06,
+      "loss": 0.8003,
+      "step": 40150
+    },
+    {
+      "epoch": 1.5883248630583955,
+      "grad_norm": 1.5265592135017254,
+      "learning_rate": 5.358417242954045e-06,
+      "loss": 0.7965,
+      "step": 40160
+    },
+    {
+      "epoch": 1.5887203622772845,
+      "grad_norm": 1.3098743695519768,
+      "learning_rate": 5.356122130890647e-06,
+      "loss": 0.8268,
+      "step": 40170
+    },
+    {
+      "epoch": 1.5891158614961736,
+      "grad_norm": 1.2590320720747743,
+      "learning_rate": 5.3538269434066846e-06,
+      "loss": 0.8503,
+      "step": 40180
+    },
+    {
+      "epoch": 1.5895113607150626,
+      "grad_norm": 1.3319176043086327,
+      "learning_rate": 5.351531680988237e-06,
+      "loss": 0.8192,
+      "step": 40190
+    },
+    {
+      "epoch": 1.5899068599339516,
+      "grad_norm": 1.3214017728416376,
+      "learning_rate": 5.349236344121401e-06,
+      "loss": 0.8023,
+      "step": 40200
+    },
+    {
+      "epoch": 1.5903023591528407,
+      "grad_norm": 1.4876331372802387,
+      "learning_rate": 5.346940933292291e-06,
+      "loss": 0.8069,
+      "step": 40210
+    },
+    {
+      "epoch": 1.5906978583717297,
+      "grad_norm": 1.4313982345901055,
+      "learning_rate": 5.344645448987036e-06,
+      "loss": 0.8192,
+      "step": 40220
+    },
+    {
+      "epoch": 1.5910933575906188,
+      "grad_norm": 1.2245650169287001,
+      "learning_rate": 5.342349891691778e-06,
+      "loss": 0.8115,
+      "step": 40230
+    },
+    {
+      "epoch": 1.5914888568095078,
+      "grad_norm": 1.5035978513353114,
+      "learning_rate": 5.3400542618926795e-06,
+      "loss": 0.7973,
+      "step": 40240
+    },
+    {
+      "epoch": 1.5918843560283968,
+      "grad_norm": 1.3267528883074913,
+      "learning_rate": 5.337758560075913e-06,
+      "loss": 0.7738,
+      "step": 40250
+    },
+    {
+      "epoch": 1.5922798552472859,
+      "grad_norm": 1.3623002922820524,
+      "learning_rate": 5.335462786727672e-06,
+      "loss": 0.8245,
+      "step": 40260
+    },
+    {
+      "epoch": 1.592675354466175,
+      "grad_norm": 1.1705826085042157,
+      "learning_rate": 5.3331669423341585e-06,
+      "loss": 0.7871,
+      "step": 40270
+    },
+    {
+      "epoch": 1.593070853685064,
+      "grad_norm": 1.2336945135349124,
+      "learning_rate": 5.330871027381594e-06,
+      "loss": 0.8137,
+      "step": 40280
+    },
+    {
+      "epoch": 1.593466352903953,
+      "grad_norm": 1.2116229779499763,
+      "learning_rate": 5.3285750423562155e-06,
+      "loss": 0.8062,
+      "step": 40290
+    },
+    {
+      "epoch": 1.593861852122842,
+      "grad_norm": 1.7707626236067646,
+      "learning_rate": 5.326278987744272e-06,
+      "loss": 0.8158,
+      "step": 40300
+    },
+    {
+      "epoch": 1.594257351341731,
+      "grad_norm": 1.3341719764154734,
+      "learning_rate": 5.323982864032028e-06,
+      "loss": 0.8073,
+      "step": 40310
+    },
+    {
+      "epoch": 1.5946528505606201,
+      "grad_norm": 1.3027250424657377,
+      "learning_rate": 5.321686671705765e-06,
+      "loss": 0.8092,
+      "step": 40320
+    },
+    {
+      "epoch": 1.5950483497795092,
+      "grad_norm": 1.332325718292871,
+      "learning_rate": 5.319390411251776e-06,
+      "loss": 0.8107,
+      "step": 40330
+    },
+    {
+      "epoch": 1.5954438489983982,
+      "grad_norm": 1.4501848648546483,
+      "learning_rate": 5.317094083156369e-06,
+      "loss": 0.7978,
+      "step": 40340
+    },
+    {
+      "epoch": 1.5958393482172872,
+      "grad_norm": 1.354568540251385,
+      "learning_rate": 5.314797687905868e-06,
+      "loss": 0.8059,
+      "step": 40350
+    },
+    {
+      "epoch": 1.5962348474361763,
+      "grad_norm": 1.4799730802658775,
+      "learning_rate": 5.31250122598661e-06,
+      "loss": 0.7823,
+      "step": 40360
+    },
+    {
+      "epoch": 1.5966303466550653,
+      "grad_norm": 1.3690694929723226,
+      "learning_rate": 5.310204697884945e-06,
+      "loss": 0.8095,
+      "step": 40370
+    },
+    {
+      "epoch": 1.5970258458739544,
+      "grad_norm": 1.3694042457107873,
+      "learning_rate": 5.307908104087241e-06,
+      "loss": 0.8236,
+      "step": 40380
+    },
+    {
+      "epoch": 1.5974213450928434,
+      "grad_norm": 1.2863554246821167,
+      "learning_rate": 5.305611445079875e-06,
+      "loss": 0.8159,
+      "step": 40390
+    },
+    {
+      "epoch": 1.5978168443117324,
+      "grad_norm": 1.418322845417851,
+      "learning_rate": 5.303314721349242e-06,
+      "loss": 0.7964,
+      "step": 40400
+    },
+    {
+      "epoch": 1.5982123435306215,
+      "grad_norm": 1.4567542728239116,
+      "learning_rate": 5.301017933381745e-06,
+      "loss": 0.8127,
+      "step": 40410
+    },
+    {
+      "epoch": 1.5986078427495105,
+      "grad_norm": 1.5296405578181327,
+      "learning_rate": 5.298721081663809e-06,
+      "loss": 0.8236,
+      "step": 40420
+    },
+    {
+      "epoch": 1.5990033419683995,
+      "grad_norm": 1.3380548661247318,
+      "learning_rate": 5.296424166681865e-06,
+      "loss": 0.8047,
+      "step": 40430
+    },
+    {
+      "epoch": 1.5993988411872886,
+      "grad_norm": 1.6403274708926723,
+      "learning_rate": 5.294127188922361e-06,
+      "loss": 0.7651,
+      "step": 40440
+    },
+    {
+      "epoch": 1.5997943404061776,
+      "grad_norm": 1.396002379922597,
+      "learning_rate": 5.291830148871757e-06,
+      "loss": 0.8116,
+      "step": 40450
+    },
+    {
+      "epoch": 1.6001898396250667,
+      "grad_norm": 1.5885888554485912,
+      "learning_rate": 5.289533047016528e-06,
+      "loss": 0.8041,
+      "step": 40460
+    },
+    {
+      "epoch": 1.6005853388439557,
+      "grad_norm": 1.4895143411119116,
+      "learning_rate": 5.287235883843159e-06,
+      "loss": 0.802,
+      "step": 40470
+    },
+    {
+      "epoch": 1.6009808380628447,
+      "grad_norm": 1.4963376550456207,
+      "learning_rate": 5.2849386598381515e-06,
+      "loss": 0.8034,
+      "step": 40480
+    },
+    {
+      "epoch": 1.6013763372817338,
+      "grad_norm": 1.534133783545266,
+      "learning_rate": 5.282641375488018e-06,
+      "loss": 0.8292,
+      "step": 40490
+    },
+    {
+      "epoch": 1.6017718365006228,
+      "grad_norm": 1.3632980225984834,
+      "learning_rate": 5.280344031279282e-06,
+      "loss": 0.8215,
+      "step": 40500
+    },
+    {
+      "epoch": 1.6021673357195119,
+      "grad_norm": 1.4154749144285832,
+      "learning_rate": 5.278046627698483e-06,
+      "loss": 0.8128,
+      "step": 40510
+    },
+    {
+      "epoch": 1.602562834938401,
+      "grad_norm": 1.3333665684610525,
+      "learning_rate": 5.275749165232173e-06,
+      "loss": 0.8039,
+      "step": 40520
+    },
+    {
+      "epoch": 1.60295833415729,
+      "grad_norm": 1.5310751848501931,
+      "learning_rate": 5.273451644366913e-06,
+      "loss": 0.7967,
+      "step": 40530
+    },
+    {
+      "epoch": 1.603353833376179,
+      "grad_norm": 1.6498015241780608,
+      "learning_rate": 5.27115406558928e-06,
+      "loss": 0.8103,
+      "step": 40540
+    },
+    {
+      "epoch": 1.603749332595068,
+      "grad_norm": 1.4229058599630109,
+      "learning_rate": 5.2688564293858615e-06,
+      "loss": 0.8098,
+      "step": 40550
+    },
+    {
+      "epoch": 1.604144831813957,
+      "grad_norm": 1.531581110951035,
+      "learning_rate": 5.266558736243257e-06,
+      "loss": 0.8032,
+      "step": 40560
+    },
+    {
+      "epoch": 1.604540331032846,
+      "grad_norm": 1.4889568355933234,
+      "learning_rate": 5.264260986648079e-06,
+      "loss": 0.8007,
+      "step": 40570
+    },
+    {
+      "epoch": 1.6049358302517351,
+      "grad_norm": 1.309551517550046,
+      "learning_rate": 5.261963181086953e-06,
+      "loss": 0.8256,
+      "step": 40580
+    },
+    {
+      "epoch": 1.6053313294706242,
+      "grad_norm": 1.2697547210999371,
+      "learning_rate": 5.259665320046511e-06,
+      "loss": 0.8112,
+      "step": 40590
+    },
+    {
+      "epoch": 1.6057268286895132,
+      "grad_norm": 1.5415879654948754,
+      "learning_rate": 5.257367404013404e-06,
+      "loss": 0.8025,
+      "step": 40600
+    },
+    {
+      "epoch": 1.6061223279084023,
+      "grad_norm": 1.3342375567267324,
+      "learning_rate": 5.255069433474289e-06,
+      "loss": 0.8105,
+      "step": 40610
+    },
+    {
+      "epoch": 1.6065178271272913,
+      "grad_norm": 1.4440148664344064,
+      "learning_rate": 5.252771408915839e-06,
+      "loss": 0.7837,
+      "step": 40620
+    },
+    {
+      "epoch": 1.6069133263461803,
+      "grad_norm": 1.406281251365491,
+      "learning_rate": 5.2504733308247335e-06,
+      "loss": 0.7984,
+      "step": 40630
+    },
+    {
+      "epoch": 1.6073088255650694,
+      "grad_norm": 1.4063659622937021,
+      "learning_rate": 5.248175199687671e-06,
+      "loss": 0.8093,
+      "step": 40640
+    },
+    {
+      "epoch": 1.6077043247839584,
+      "grad_norm": 1.6771433662018442,
+      "learning_rate": 5.24587701599135e-06,
+      "loss": 0.8092,
+      "step": 40650
+    },
+    {
+      "epoch": 1.6080998240028475,
+      "grad_norm": 1.2120573884464543,
+      "learning_rate": 5.24357878022249e-06,
+      "loss": 0.8054,
+      "step": 40660
+    },
+    {
+      "epoch": 1.6084953232217365,
+      "grad_norm": 1.2195005934195515,
+      "learning_rate": 5.2412804928678175e-06,
+      "loss": 0.7845,
+      "step": 40670
+    },
+    {
+      "epoch": 1.6088908224406255,
+      "grad_norm": 1.4574078451411203,
+      "learning_rate": 5.238982154414071e-06,
+      "loss": 0.8252,
+      "step": 40680
+    },
+    {
+      "epoch": 1.6092863216595146,
+      "grad_norm": 1.3620365469514852,
+      "learning_rate": 5.236683765347997e-06,
+      "loss": 0.7976,
+      "step": 40690
+    },
+    {
+      "epoch": 1.6096818208784036,
+      "grad_norm": 1.3738660603992838,
+      "learning_rate": 5.234385326156358e-06,
+      "loss": 0.8261,
+      "step": 40700
+    },
+    {
+      "epoch": 1.6100773200972927,
+      "grad_norm": 1.3483139459114069,
+      "learning_rate": 5.232086837325921e-06,
+      "loss": 0.7827,
+      "step": 40710
+    },
+    {
+      "epoch": 1.6104728193161817,
+      "grad_norm": 1.3099654534445502,
+      "learning_rate": 5.229788299343469e-06,
+      "loss": 0.8153,
+      "step": 40720
+    },
+    {
+      "epoch": 1.6108683185350707,
+      "grad_norm": 1.333028879245161,
+      "learning_rate": 5.227489712695794e-06,
+      "loss": 0.8052,
+      "step": 40730
+    },
+    {
+      "epoch": 1.6112638177539598,
+      "grad_norm": 1.189247195036392,
+      "learning_rate": 5.225191077869692e-06,
+      "loss": 0.819,
+      "step": 40740
+    },
+    {
+      "epoch": 1.611659316972849,
+      "grad_norm": 1.4282237240003406,
+      "learning_rate": 5.222892395351979e-06,
+      "loss": 0.7846,
+      "step": 40750
+    },
+    {
+      "epoch": 1.612054816191738,
+      "grad_norm": 1.4333835930506296,
+      "learning_rate": 5.220593665629476e-06,
+      "loss": 0.8121,
+      "step": 40760
+    },
+    {
+      "epoch": 1.612450315410627,
+      "grad_norm": 1.2786809764135276,
+      "learning_rate": 5.218294889189012e-06,
+      "loss": 0.8151,
+      "step": 40770
+    },
+    {
+      "epoch": 1.6128458146295162,
+      "grad_norm": 1.46649829050338,
+      "learning_rate": 5.215996066517432e-06,
+      "loss": 0.8015,
+      "step": 40780
+    },
+    {
+      "epoch": 1.6132413138484052,
+      "grad_norm": 1.369463227511982,
+      "learning_rate": 5.213697198101584e-06,
+      "loss": 0.8091,
+      "step": 40790
+    },
+    {
+      "epoch": 1.6136368130672942,
+      "grad_norm": 1.6166295093265266,
+      "learning_rate": 5.211398284428331e-06,
+      "loss": 0.7918,
+      "step": 40800
+    },
+    {
+      "epoch": 1.6140323122861833,
+      "grad_norm": 1.4687774927015027,
+      "learning_rate": 5.2090993259845435e-06,
+      "loss": 0.7835,
+      "step": 40810
+    },
+    {
+      "epoch": 1.6144278115050723,
+      "grad_norm": 1.2612010247542904,
+      "learning_rate": 5.206800323257102e-06,
+      "loss": 0.7975,
+      "step": 40820
+    },
+    {
+      "epoch": 1.6148233107239613,
+      "grad_norm": 1.4430852737062814,
+      "learning_rate": 5.204501276732894e-06,
+      "loss": 0.8064,
+      "step": 40830
+    },
+    {
+      "epoch": 1.6152188099428504,
+      "grad_norm": 1.4229175577473279,
+      "learning_rate": 5.202202186898819e-06,
+      "loss": 0.8177,
+      "step": 40840
+    },
+    {
+      "epoch": 1.6156143091617394,
+      "grad_norm": 1.4340347986726354,
+      "learning_rate": 5.199903054241785e-06,
+      "loss": 0.8082,
+      "step": 40850
+    },
+    {
+      "epoch": 1.6160098083806285,
+      "grad_norm": 1.3192594531785078,
+      "learning_rate": 5.1976038792487086e-06,
+      "loss": 0.7978,
+      "step": 40860
+    },
+    {
+      "epoch": 1.6164053075995175,
+      "grad_norm": 1.4188118970579786,
+      "learning_rate": 5.195304662406516e-06,
+      "loss": 0.7886,
+      "step": 40870
+    },
+    {
+      "epoch": 1.6168008068184065,
+      "grad_norm": 1.3508353889334082,
+      "learning_rate": 5.1930054042021425e-06,
+      "loss": 0.7818,
+      "step": 40880
+    },
+    {
+      "epoch": 1.6171963060372956,
+      "grad_norm": 1.4638681954061876,
+      "learning_rate": 5.1907061051225315e-06,
+      "loss": 0.8128,
+      "step": 40890
+    },
+    {
+      "epoch": 1.6175918052561846,
+      "grad_norm": 1.459273468962672,
+      "learning_rate": 5.188406765654634e-06,
+      "loss": 0.806,
+      "step": 40900
+    },
+    {
+      "epoch": 1.6179873044750737,
+      "grad_norm": 1.44578084287467,
+      "learning_rate": 5.186107386285411e-06,
+      "loss": 0.7833,
+      "step": 40910
+    },
+    {
+      "epoch": 1.6183828036939627,
+      "grad_norm": 1.5753975152670685,
+      "learning_rate": 5.1838079675018315e-06,
+      "loss": 0.8028,
+      "step": 40920
+    },
+    {
+      "epoch": 1.6187783029128517,
+      "grad_norm": 1.422009266154339,
+      "learning_rate": 5.181508509790874e-06,
+      "loss": 0.8227,
+      "step": 40930
+    },
+    {
+      "epoch": 1.6191738021317408,
+      "grad_norm": 1.297346489643188,
+      "learning_rate": 5.179209013639526e-06,
+      "loss": 0.7827,
+      "step": 40940
+    },
+    {
+      "epoch": 1.6195693013506298,
+      "grad_norm": 1.183255862240372,
+      "learning_rate": 5.1769094795347765e-06,
+      "loss": 0.8028,
+      "step": 40950
+    },
+    {
+      "epoch": 1.6199648005695189,
+      "grad_norm": 1.3829413149807757,
+      "learning_rate": 5.174609907963632e-06,
+      "loss": 0.7969,
+      "step": 40960
+    },
+    {
+      "epoch": 1.620360299788408,
+      "grad_norm": 1.3219613632842353,
+      "learning_rate": 5.1723102994130994e-06,
+      "loss": 0.8132,
+      "step": 40970
+    },
+    {
+      "epoch": 1.620755799007297,
+      "grad_norm": 1.3746912111761755,
+      "learning_rate": 5.170010654370197e-06,
+      "loss": 0.8046,
+      "step": 40980
+    },
+    {
+      "epoch": 1.621151298226186,
+      "grad_norm": 1.5284944514785503,
+      "learning_rate": 5.167710973321951e-06,
+      "loss": 0.771,
+      "step": 40990
+    },
+    {
+      "epoch": 1.621546797445075,
+      "grad_norm": 1.1941270662955368,
+      "learning_rate": 5.165411256755394e-06,
+      "loss": 0.8068,
+      "step": 41000
+    },
+    {
+      "epoch": 1.621942296663964,
+      "grad_norm": 1.5626063117347866,
+      "learning_rate": 5.163111505157568e-06,
+      "loss": 0.7796,
+      "step": 41010
+    },
+    {
+      "epoch": 1.622337795882853,
+      "grad_norm": 1.3242677983623634,
+      "learning_rate": 5.160811719015517e-06,
+      "loss": 0.819,
+      "step": 41020
+    },
+    {
+      "epoch": 1.6227332951017421,
+      "grad_norm": 1.4458638941451125,
+      "learning_rate": 5.1585118988163005e-06,
+      "loss": 0.7966,
+      "step": 41030
+    },
+    {
+      "epoch": 1.6231287943206312,
+      "grad_norm": 1.3637171913114798,
+      "learning_rate": 5.15621204504698e-06,
+      "loss": 0.8185,
+      "step": 41040
+    },
+    {
+      "epoch": 1.6235242935395202,
+      "grad_norm": 1.2603522704197314,
+      "learning_rate": 5.153912158194623e-06,
+      "loss": 0.8106,
+      "step": 41050
+    },
+    {
+      "epoch": 1.6239197927584093,
+      "grad_norm": 1.1415050399577653,
+      "learning_rate": 5.1516122387463085e-06,
+      "loss": 0.7883,
+      "step": 41060
+    },
+    {
+      "epoch": 1.6243152919772983,
+      "grad_norm": 1.4815100543142539,
+      "learning_rate": 5.149312287189121e-06,
+      "loss": 0.7984,
+      "step": 41070
+    },
+    {
+      "epoch": 1.6247107911961876,
+      "grad_norm": 1.151794784175854,
+      "learning_rate": 5.147012304010147e-06,
+      "loss": 0.7956,
+      "step": 41080
+    },
+    {
+      "epoch": 1.6251062904150766,
+      "grad_norm": 1.2311399933124467,
+      "learning_rate": 5.1447122896964865e-06,
+      "loss": 0.7991,
+      "step": 41090
+    },
+    {
+      "epoch": 1.6255017896339656,
+      "grad_norm": 1.4993124530336446,
+      "learning_rate": 5.1424122447352424e-06,
+      "loss": 0.8159,
+      "step": 41100
+    },
+    {
+      "epoch": 1.6258972888528547,
+      "grad_norm": 1.2230048244228209,
+      "learning_rate": 5.1401121696135235e-06,
+      "loss": 0.7885,
+      "step": 41110
+    },
+    {
+      "epoch": 1.6262927880717437,
+      "grad_norm": 1.4699198884997282,
+      "learning_rate": 5.137812064818448e-06,
+      "loss": 0.7789,
+      "step": 41120
+    },
+    {
+      "epoch": 1.6266882872906328,
+      "grad_norm": 1.4290286243301284,
+      "learning_rate": 5.13551193083714e-06,
+      "loss": 0.8024,
+      "step": 41130
+    },
+    {
+      "epoch": 1.6270837865095218,
+      "grad_norm": 1.3227082632051894,
+      "learning_rate": 5.133211768156725e-06,
+      "loss": 0.8026,
+      "step": 41140
+    },
+    {
+      "epoch": 1.6274792857284108,
+      "grad_norm": 1.5142721017223066,
+      "learning_rate": 5.130911577264339e-06,
+      "loss": 0.8151,
+      "step": 41150
+    },
+    {
+      "epoch": 1.6278747849472999,
+      "grad_norm": 1.518370424170619,
+      "learning_rate": 5.128611358647125e-06,
+      "loss": 0.8026,
+      "step": 41160
+    },
+    {
+      "epoch": 1.628270284166189,
+      "grad_norm": 1.2265411947058955,
+      "learning_rate": 5.126311112792229e-06,
+      "loss": 0.8043,
+      "step": 41170
+    },
+    {
+      "epoch": 1.628665783385078,
+      "grad_norm": 1.2756620215500682,
+      "learning_rate": 5.124010840186803e-06,
+      "loss": 0.794,
+      "step": 41180
+    },
+    {
+      "epoch": 1.629061282603967,
+      "grad_norm": 1.6144680072120252,
+      "learning_rate": 5.121710541318005e-06,
+      "loss": 0.813,
+      "step": 41190
+    },
+    {
+      "epoch": 1.629456781822856,
+      "grad_norm": 1.1864032667168556,
+      "learning_rate": 5.119410216673e-06,
+      "loss": 0.8348,
+      "step": 41200
+    },
+    {
+      "epoch": 1.629852281041745,
+      "grad_norm": 1.2891299525580995,
+      "learning_rate": 5.117109866738956e-06,
+      "loss": 0.8015,
+      "step": 41210
+    },
+    {
+      "epoch": 1.630247780260634,
+      "grad_norm": 1.4370258261290074,
+      "learning_rate": 5.11480949200305e-06,
+      "loss": 0.7943,
+      "step": 41220
+    },
+    {
+      "epoch": 1.6306432794795231,
+      "grad_norm": 1.2958430551134368,
+      "learning_rate": 5.112509092952459e-06,
+      "loss": 0.7808,
+      "step": 41230
+    },
+    {
+      "epoch": 1.6310387786984122,
+      "grad_norm": 1.4127537081316506,
+      "learning_rate": 5.1102086700743705e-06,
+      "loss": 0.8101,
+      "step": 41240
+    },
+    {
+      "epoch": 1.6314342779173012,
+      "grad_norm": 1.2759522436620703,
+      "learning_rate": 5.107908223855974e-06,
+      "loss": 0.8085,
+      "step": 41250
+    },
+    {
+      "epoch": 1.6318297771361903,
+      "grad_norm": 1.335467702209931,
+      "learning_rate": 5.105607754784464e-06,
+      "loss": 0.7899,
+      "step": 41260
+    },
+    {
+      "epoch": 1.6322252763550793,
+      "grad_norm": 1.4066176742411196,
+      "learning_rate": 5.103307263347042e-06,
+      "loss": 0.7988,
+      "step": 41270
+    },
+    {
+      "epoch": 1.6326207755739683,
+      "grad_norm": 1.7410660983956376,
+      "learning_rate": 5.101006750030909e-06,
+      "loss": 0.8114,
+      "step": 41280
+    },
+    {
+      "epoch": 1.6330162747928574,
+      "grad_norm": 1.1572830853662208,
+      "learning_rate": 5.098706215323278e-06,
+      "loss": 0.7978,
+      "step": 41290
+    },
+    {
+      "epoch": 1.6334117740117464,
+      "grad_norm": 2.038659243784275,
+      "learning_rate": 5.096405659711362e-06,
+      "loss": 0.7752,
+      "step": 41300
+    },
+    {
+      "epoch": 1.6338072732306355,
+      "grad_norm": 1.1956590106167422,
+      "learning_rate": 5.09410508368238e-06,
+      "loss": 0.8269,
+      "step": 41310
+    },
+    {
+      "epoch": 1.6342027724495245,
+      "grad_norm": 1.4121952198194214,
+      "learning_rate": 5.091804487723552e-06,
+      "loss": 0.8079,
+      "step": 41320
+    },
+    {
+      "epoch": 1.6345982716684135,
+      "grad_norm": 1.484749633932209,
+      "learning_rate": 5.089503872322106e-06,
+      "loss": 0.8186,
+      "step": 41330
+    },
+    {
+      "epoch": 1.6349937708873026,
+      "grad_norm": 1.2017763707371538,
+      "learning_rate": 5.087203237965274e-06,
+      "loss": 0.7991,
+      "step": 41340
+    },
+    {
+      "epoch": 1.6353892701061916,
+      "grad_norm": 1.3291992319748185,
+      "learning_rate": 5.08490258514029e-06,
+      "loss": 0.8186,
+      "step": 41350
+    },
+    {
+      "epoch": 1.6357847693250807,
+      "grad_norm": 1.3216493155035252,
+      "learning_rate": 5.082601914334392e-06,
+      "loss": 0.7812,
+      "step": 41360
+    },
+    {
+      "epoch": 1.6361802685439697,
+      "grad_norm": 1.2400719889180203,
+      "learning_rate": 5.080301226034826e-06,
+      "loss": 0.7867,
+      "step": 41370
+    },
+    {
+      "epoch": 1.6365757677628587,
+      "grad_norm": 1.5642923134596862,
+      "learning_rate": 5.078000520728835e-06,
+      "loss": 0.8107,
+      "step": 41380
+    },
+    {
+      "epoch": 1.6369712669817478,
+      "grad_norm": 1.3374933689334152,
+      "learning_rate": 5.07569979890367e-06,
+      "loss": 0.8052,
+      "step": 41390
+    },
+    {
+      "epoch": 1.6373667662006368,
+      "grad_norm": 1.1892639563490934,
+      "learning_rate": 5.073399061046584e-06,
+      "loss": 0.792,
+      "step": 41400
+    },
+    {
+      "epoch": 1.6377622654195259,
+      "grad_norm": 1.6774346530220643,
+      "learning_rate": 5.071098307644835e-06,
+      "loss": 0.7915,
+      "step": 41410
+    },
+    {
+      "epoch": 1.638157764638415,
+      "grad_norm": 1.3469751914426114,
+      "learning_rate": 5.0687975391856825e-06,
+      "loss": 0.7943,
+      "step": 41420
+    },
+    {
+      "epoch": 1.638553263857304,
+      "grad_norm": 1.3360890070486067,
+      "learning_rate": 5.06649675615639e-06,
+      "loss": 0.8082,
+      "step": 41430
+    },
+    {
+      "epoch": 1.638948763076193,
+      "grad_norm": 1.4637269478159602,
+      "learning_rate": 5.064195959044224e-06,
+      "loss": 0.8404,
+      "step": 41440
+    },
+    {
+      "epoch": 1.639344262295082,
+      "grad_norm": 1.5566142242572265,
+      "learning_rate": 5.061895148336452e-06,
+      "loss": 0.8174,
+      "step": 41450
+    },
+    {
+      "epoch": 1.639739761513971,
+      "grad_norm": 1.4233634252833747,
+      "learning_rate": 5.059594324520348e-06,
+      "loss": 0.8062,
+      "step": 41460
+    },
+    {
+      "epoch": 1.64013526073286,
+      "grad_norm": 1.5718870081810636,
+      "learning_rate": 5.057293488083188e-06,
+      "loss": 0.8108,
+      "step": 41470
+    },
+    {
+      "epoch": 1.6405307599517491,
+      "grad_norm": 1.3328920880261494,
+      "learning_rate": 5.05499263951225e-06,
+      "loss": 0.7941,
+      "step": 41480
+    },
+    {
+      "epoch": 1.6409262591706382,
+      "grad_norm": 1.5310802655594322,
+      "learning_rate": 5.0526917792948105e-06,
+      "loss": 0.8004,
+      "step": 41490
+    },
+    {
+      "epoch": 1.6413217583895272,
+      "grad_norm": 1.3720082816217016,
+      "learning_rate": 5.0503909079181576e-06,
+      "loss": 0.8185,
+      "step": 41500
+    },
+    {
+      "epoch": 1.6417172576084162,
+      "grad_norm": 1.3001261214761737,
+      "learning_rate": 5.048090025869572e-06,
+      "loss": 0.7769,
+      "step": 41510
+    },
+    {
+      "epoch": 1.6421127568273053,
+      "grad_norm": 1.459594603957872,
+      "learning_rate": 5.045789133636343e-06,
+      "loss": 0.8207,
+      "step": 41520
+    },
+    {
+      "epoch": 1.6425082560461943,
+      "grad_norm": 1.528547742547802,
+      "learning_rate": 5.04348823170576e-06,
+      "loss": 0.8178,
+      "step": 41530
+    },
+    {
+      "epoch": 1.6429037552650834,
+      "grad_norm": 1.6085092021760932,
+      "learning_rate": 5.041187320565115e-06,
+      "loss": 0.8093,
+      "step": 41540
+    },
+    {
+      "epoch": 1.6432992544839724,
+      "grad_norm": 1.5470087310200578,
+      "learning_rate": 5.0388864007017e-06,
+      "loss": 0.7922,
+      "step": 41550
+    },
+    {
+      "epoch": 1.6436947537028614,
+      "grad_norm": 1.6178933920682441,
+      "learning_rate": 5.036585472602814e-06,
+      "loss": 0.7933,
+      "step": 41560
+    },
+    {
+      "epoch": 1.6440902529217505,
+      "grad_norm": 1.2731993922742746,
+      "learning_rate": 5.03428453675575e-06,
+      "loss": 0.8038,
+      "step": 41570
+    },
+    {
+      "epoch": 1.6444857521406395,
+      "grad_norm": 1.274779109720121,
+      "learning_rate": 5.031983593647808e-06,
+      "loss": 0.8127,
+      "step": 41580
+    },
+    {
+      "epoch": 1.6448812513595286,
+      "grad_norm": 1.4217855981866225,
+      "learning_rate": 5.02968264376629e-06,
+      "loss": 0.8063,
+      "step": 41590
+    },
+    {
+      "epoch": 1.6452767505784176,
+      "grad_norm": 1.1475518373650064,
+      "learning_rate": 5.0273816875984945e-06,
+      "loss": 0.8292,
+      "step": 41600
+    },
+    {
+      "epoch": 1.6456722497973066,
+      "grad_norm": 1.4813545809653703,
+      "learning_rate": 5.0250807256317305e-06,
+      "loss": 0.8053,
+      "step": 41610
+    },
+    {
+      "epoch": 1.6460677490161957,
+      "grad_norm": 1.5924575905158627,
+      "learning_rate": 5.022779758353296e-06,
+      "loss": 0.787,
+      "step": 41620
+    },
+    {
+      "epoch": 1.6464632482350847,
+      "grad_norm": 1.6631214353540575,
+      "learning_rate": 5.020478786250498e-06,
+      "loss": 0.7841,
+      "step": 41630
+    },
+    {
+      "epoch": 1.6468587474539738,
+      "grad_norm": 1.361685365546339,
+      "learning_rate": 5.018177809810646e-06,
+      "loss": 0.7766,
+      "step": 41640
+    },
+    {
+      "epoch": 1.6472542466728628,
+      "grad_norm": 1.51754234697285,
+      "learning_rate": 5.015876829521045e-06,
+      "loss": 0.792,
+      "step": 41650
+    },
+    {
+      "epoch": 1.6476497458917518,
+      "grad_norm": 1.2264930998008845,
+      "learning_rate": 5.013575845869002e-06,
+      "loss": 0.8055,
+      "step": 41660
+    },
+    {
+      "epoch": 1.6480452451106409,
+      "grad_norm": 1.3978468973402076,
+      "learning_rate": 5.011274859341828e-06,
+      "loss": 0.7912,
+      "step": 41670
+    },
+    {
+      "epoch": 1.64844074432953,
+      "grad_norm": 1.307692278637964,
+      "learning_rate": 5.0089738704268336e-06,
+      "loss": 0.7927,
+      "step": 41680
+    },
+    {
+      "epoch": 1.648836243548419,
+      "grad_norm": 1.3138834727499307,
+      "learning_rate": 5.006672879611325e-06,
+      "loss": 0.8048,
+      "step": 41690
+    },
+    {
+      "epoch": 1.649231742767308,
+      "grad_norm": 1.4437893592989504,
+      "learning_rate": 5.004371887382615e-06,
+      "loss": 0.7721,
+      "step": 41700
+    },
+    {
+      "epoch": 1.649627241986197,
+      "grad_norm": 1.3997530085345107,
+      "learning_rate": 5.002070894228015e-06,
+      "loss": 0.8021,
+      "step": 41710
+    },
+    {
+      "epoch": 1.650022741205086,
+      "grad_norm": 1.4303570839058444,
+      "learning_rate": 4.999769900634835e-06,
+      "loss": 0.8109,
+      "step": 41720
+    },
+    {
+      "epoch": 1.6504182404239751,
+      "grad_norm": 1.3448357092680352,
+      "learning_rate": 4.997468907090385e-06,
+      "loss": 0.7735,
+      "step": 41730
+    },
+    {
+      "epoch": 1.6508137396428642,
+      "grad_norm": 1.3548187282963036,
+      "learning_rate": 4.995167914081979e-06,
+      "loss": 0.8075,
+      "step": 41740
+    },
+    {
+      "epoch": 1.6512092388617532,
+      "grad_norm": 1.4015841055562521,
+      "learning_rate": 4.9928669220969245e-06,
+      "loss": 0.7894,
+      "step": 41750
+    },
+    {
+      "epoch": 1.6516047380806422,
+      "grad_norm": 1.4101541375930886,
+      "learning_rate": 4.990565931622534e-06,
+      "loss": 0.8233,
+      "step": 41760
+    },
+    {
+      "epoch": 1.6520002372995313,
+      "grad_norm": 1.4053939714558688,
+      "learning_rate": 4.98826494314612e-06,
+      "loss": 0.8151,
+      "step": 41770
+    },
+    {
+      "epoch": 1.6523957365184203,
+      "grad_norm": 1.5853674425593267,
+      "learning_rate": 4.985963957154988e-06,
+      "loss": 0.8015,
+      "step": 41780
+    },
+    {
+      "epoch": 1.6527912357373093,
+      "grad_norm": 1.279673576255129,
+      "learning_rate": 4.9836629741364485e-06,
+      "loss": 0.7916,
+      "step": 41790
+    },
+    {
+      "epoch": 1.6531867349561984,
+      "grad_norm": 1.6211416265648426,
+      "learning_rate": 4.981361994577812e-06,
+      "loss": 0.7682,
+      "step": 41800
+    },
+    {
+      "epoch": 1.6535822341750874,
+      "grad_norm": 1.2474906969955124,
+      "learning_rate": 4.979061018966385e-06,
+      "loss": 0.7821,
+      "step": 41810
+    },
+    {
+      "epoch": 1.6539777333939765,
+      "grad_norm": 1.4355222449291372,
+      "learning_rate": 4.976760047789476e-06,
+      "loss": 0.7989,
+      "step": 41820
+    },
+    {
+      "epoch": 1.6543732326128655,
+      "grad_norm": 1.335955536813259,
+      "learning_rate": 4.974459081534391e-06,
+      "loss": 0.806,
+      "step": 41830
+    },
+    {
+      "epoch": 1.6547687318317545,
+      "grad_norm": 1.2633356330201375,
+      "learning_rate": 4.972158120688435e-06,
+      "loss": 0.8147,
+      "step": 41840
+    },
+    {
+      "epoch": 1.6551642310506436,
+      "grad_norm": 1.3893167873568655,
+      "learning_rate": 4.9698571657389126e-06,
+      "loss": 0.8093,
+      "step": 41850
+    },
+    {
+      "epoch": 1.6555597302695326,
+      "grad_norm": 1.1974974826775149,
+      "learning_rate": 4.9675562171731255e-06,
+      "loss": 0.8065,
+      "step": 41860
+    },
+    {
+      "epoch": 1.6559552294884217,
+      "grad_norm": 1.459057806211302,
+      "learning_rate": 4.965255275478375e-06,
+      "loss": 0.7989,
+      "step": 41870
+    },
+    {
+      "epoch": 1.6563507287073107,
+      "grad_norm": 1.3100690740574463,
+      "learning_rate": 4.962954341141962e-06,
+      "loss": 0.7968,
+      "step": 41880
+    },
+    {
+      "epoch": 1.6567462279261997,
+      "grad_norm": 1.2798098844745645,
+      "learning_rate": 4.960653414651185e-06,
+      "loss": 0.8228,
+      "step": 41890
+    },
+    {
+      "epoch": 1.6571417271450888,
+      "grad_norm": 1.284988533619987,
+      "learning_rate": 4.958352496493344e-06,
+      "loss": 0.8234,
+      "step": 41900
+    },
+    {
+      "epoch": 1.6575372263639778,
+      "grad_norm": 1.6586809908788955,
+      "learning_rate": 4.9560515871557275e-06,
+      "loss": 0.8053,
+      "step": 41910
+    },
+    {
+      "epoch": 1.6579327255828669,
+      "grad_norm": 1.3544940087804016,
+      "learning_rate": 4.953750687125632e-06,
+      "loss": 0.8153,
+      "step": 41920
+    },
+    {
+      "epoch": 1.658328224801756,
+      "grad_norm": 1.3540182816989457,
+      "learning_rate": 4.951449796890349e-06,
+      "loss": 0.8046,
+      "step": 41930
+    },
+    {
+      "epoch": 1.658723724020645,
+      "grad_norm": 1.2846223969690125,
+      "learning_rate": 4.949148916937166e-06,
+      "loss": 0.7978,
+      "step": 41940
+    },
+    {
+      "epoch": 1.659119223239534,
+      "grad_norm": 1.1978172717837627,
+      "learning_rate": 4.946848047753372e-06,
+      "loss": 0.7793,
+      "step": 41950
+    },
+    {
+      "epoch": 1.659514722458423,
+      "grad_norm": 1.6702083592952615,
+      "learning_rate": 4.944547189826252e-06,
+      "loss": 0.791,
+      "step": 41960
+    },
+    {
+      "epoch": 1.659910221677312,
+      "grad_norm": 1.3200634483638447,
+      "learning_rate": 4.942246343643086e-06,
+      "loss": 0.7974,
+      "step": 41970
+    },
+    {
+      "epoch": 1.660305720896201,
+      "grad_norm": 1.3251496709118895,
+      "learning_rate": 4.939945509691155e-06,
+      "loss": 0.7787,
+      "step": 41980
+    },
+    {
+      "epoch": 1.6607012201150901,
+      "grad_norm": 1.4667522010162886,
+      "learning_rate": 4.937644688457735e-06,
+      "loss": 0.8195,
+      "step": 41990
+    },
+    {
+      "epoch": 1.6610967193339792,
+      "grad_norm": 1.572741584792104,
+      "learning_rate": 4.935343880430104e-06,
+      "loss": 0.788,
+      "step": 42000
+    },
+    {
+      "epoch": 1.6614922185528682,
+      "grad_norm": 1.5311403619254402,
+      "learning_rate": 4.93304308609553e-06,
+      "loss": 0.7796,
+      "step": 42010
+    },
+    {
+      "epoch": 1.6618877177717573,
+      "grad_norm": 1.1239757199185472,
+      "learning_rate": 4.930742305941286e-06,
+      "loss": 0.8199,
+      "step": 42020
+    },
+    {
+      "epoch": 1.6622832169906463,
+      "grad_norm": 1.4821709652376787,
+      "learning_rate": 4.928441540454633e-06,
+      "loss": 0.8008,
+      "step": 42030
+    },
+    {
+      "epoch": 1.6626787162095353,
+      "grad_norm": 1.3214292561972814,
+      "learning_rate": 4.926140790122835e-06,
+      "loss": 0.7966,
+      "step": 42040
+    },
+    {
+      "epoch": 1.6630742154284244,
+      "grad_norm": 1.3409218031743169,
+      "learning_rate": 4.923840055433153e-06,
+      "loss": 0.7605,
+      "step": 42050
+    },
+    {
+      "epoch": 1.6634697146473134,
+      "grad_norm": 1.470378145593192,
+      "learning_rate": 4.921539336872843e-06,
+      "loss": 0.8095,
+      "step": 42060
+    },
+    {
+      "epoch": 1.6638652138662025,
+      "grad_norm": 1.211922381443777,
+      "learning_rate": 4.919238634929156e-06,
+      "loss": 0.7845,
+      "step": 42070
+    },
+    {
+      "epoch": 1.6642607130850915,
+      "grad_norm": 1.4309073000503374,
+      "learning_rate": 4.9169379500893435e-06,
+      "loss": 0.8018,
+      "step": 42080
+    },
+    {
+      "epoch": 1.6646562123039808,
+      "grad_norm": 1.2459282904481337,
+      "learning_rate": 4.91463728284065e-06,
+      "loss": 0.8244,
+      "step": 42090
+    },
+    {
+      "epoch": 1.6650517115228698,
+      "grad_norm": 1.300560462748635,
+      "learning_rate": 4.912336633670317e-06,
+      "loss": 0.8191,
+      "step": 42100
+    },
+    {
+      "epoch": 1.6654472107417588,
+      "grad_norm": 1.5664068497204677,
+      "learning_rate": 4.910036003065584e-06,
+      "loss": 0.7726,
+      "step": 42110
+    },
+    {
+      "epoch": 1.6658427099606479,
+      "grad_norm": 1.4343503619714235,
+      "learning_rate": 4.907735391513683e-06,
+      "loss": 0.8152,
+      "step": 42120
+    },
+    {
+      "epoch": 1.666238209179537,
+      "grad_norm": 1.3538463598837878,
+      "learning_rate": 4.905434799501846e-06,
+      "loss": 0.8081,
+      "step": 42130
+    },
+    {
+      "epoch": 1.666633708398426,
+      "grad_norm": 1.4546245671485367,
+      "learning_rate": 4.9031342275173e-06,
+      "loss": 0.7889,
+      "step": 42140
+    },
+    {
+      "epoch": 1.667029207617315,
+      "grad_norm": 1.460979398214131,
+      "learning_rate": 4.900833676047264e-06,
+      "loss": 0.816,
+      "step": 42150
+    },
+    {
+      "epoch": 1.667424706836204,
+      "grad_norm": 1.4005379869488497,
+      "learning_rate": 4.8985331455789555e-06,
+      "loss": 0.8093,
+      "step": 42160
+    },
+    {
+      "epoch": 1.667820206055093,
+      "grad_norm": 1.461123299834793,
+      "learning_rate": 4.896232636599589e-06,
+      "loss": 0.8049,
+      "step": 42170
+    },
+    {
+      "epoch": 1.668215705273982,
+      "grad_norm": 1.498751814254091,
+      "learning_rate": 4.8939321495963725e-06,
+      "loss": 0.8109,
+      "step": 42180
+    },
+    {
+      "epoch": 1.6686112044928711,
+      "grad_norm": 1.3214552818503533,
+      "learning_rate": 4.8916316850565085e-06,
+      "loss": 0.7971,
+      "step": 42190
+    },
+    {
+      "epoch": 1.6690067037117602,
+      "grad_norm": 1.2904600340977768,
+      "learning_rate": 4.889331243467198e-06,
+      "loss": 0.8199,
+      "step": 42200
+    },
+    {
+      "epoch": 1.6694022029306492,
+      "grad_norm": 1.8066773438161514,
+      "learning_rate": 4.887030825315634e-06,
+      "loss": 0.793,
+      "step": 42210
+    },
+    {
+      "epoch": 1.6697977021495383,
+      "grad_norm": 1.4727567463500029,
+      "learning_rate": 4.884730431089005e-06,
+      "loss": 0.8071,
+      "step": 42220
+    },
+    {
+      "epoch": 1.6701932013684273,
+      "grad_norm": 1.328195378699885,
+      "learning_rate": 4.882430061274497e-06,
+      "loss": 0.7843,
+      "step": 42230
+    },
+    {
+      "epoch": 1.6705887005873163,
+      "grad_norm": 1.418817685555987,
+      "learning_rate": 4.880129716359287e-06,
+      "loss": 0.7891,
+      "step": 42240
+    },
+    {
+      "epoch": 1.6709841998062054,
+      "grad_norm": 1.3605698534039914,
+      "learning_rate": 4.87782939683055e-06,
+      "loss": 0.8008,
+      "step": 42250
+    },
+    {
+      "epoch": 1.6713796990250944,
+      "grad_norm": 1.7000954090105216,
+      "learning_rate": 4.875529103175456e-06,
+      "loss": 0.7688,
+      "step": 42260
+    },
+    {
+      "epoch": 1.6717751982439835,
+      "grad_norm": 1.4566491742786827,
+      "learning_rate": 4.873228835881162e-06,
+      "loss": 0.7561,
+      "step": 42270
+    },
+    {
+      "epoch": 1.6721706974628725,
+      "grad_norm": 1.3160028803144155,
+      "learning_rate": 4.87092859543483e-06,
+      "loss": 0.7863,
+      "step": 42280
+    },
+    {
+      "epoch": 1.6725661966817615,
+      "grad_norm": 1.2901877704331015,
+      "learning_rate": 4.86862838232361e-06,
+      "loss": 0.797,
+      "step": 42290
+    },
+    {
+      "epoch": 1.6729616959006506,
+      "grad_norm": 1.2041622522828366,
+      "learning_rate": 4.866328197034648e-06,
+      "loss": 0.8062,
+      "step": 42300
+    },
+    {
+      "epoch": 1.6733571951195396,
+      "grad_norm": 1.467281123581637,
+      "learning_rate": 4.864028040055083e-06,
+      "loss": 0.7804,
+      "step": 42310
+    },
+    {
+      "epoch": 1.6737526943384287,
+      "grad_norm": 1.5623391677438745,
+      "learning_rate": 4.8617279118720514e-06,
+      "loss": 0.7746,
+      "step": 42320
+    },
+    {
+      "epoch": 1.6741481935573177,
+      "grad_norm": 1.2405064822533243,
+      "learning_rate": 4.859427812972678e-06,
+      "loss": 0.8185,
+      "step": 42330
+    },
+    {
+      "epoch": 1.6745436927762067,
+      "grad_norm": 1.427040683576127,
+      "learning_rate": 4.857127743844085e-06,
+      "loss": 0.7971,
+      "step": 42340
+    },
+    {
+      "epoch": 1.6749391919950958,
+      "grad_norm": 1.3788531581757955,
+      "learning_rate": 4.8548277049733885e-06,
+      "loss": 0.7875,
+      "step": 42350
+    },
+    {
+      "epoch": 1.6753346912139848,
+      "grad_norm": 1.3577203505759141,
+      "learning_rate": 4.852527696847697e-06,
+      "loss": 0.7904,
+      "step": 42360
+    },
+    {
+      "epoch": 1.6757301904328739,
+      "grad_norm": 1.3937128443814712,
+      "learning_rate": 4.850227719954113e-06,
+      "loss": 0.8197,
+      "step": 42370
+    },
+    {
+      "epoch": 1.676125689651763,
+      "grad_norm": 1.501564988224279,
+      "learning_rate": 4.847927774779732e-06,
+      "loss": 0.786,
+      "step": 42380
+    },
+    {
+      "epoch": 1.676521188870652,
+      "grad_norm": 1.4957662122876973,
+      "learning_rate": 4.845627861811645e-06,
+      "loss": 0.7608,
+      "step": 42390
+    },
+    {
+      "epoch": 1.676916688089541,
+      "grad_norm": 1.4240772072539798,
+      "learning_rate": 4.8433279815369296e-06,
+      "loss": 0.7822,
+      "step": 42400
+    },
+    {
+      "epoch": 1.67731218730843,
+      "grad_norm": 1.2154685029738814,
+      "learning_rate": 4.841028134442664e-06,
+      "loss": 0.7841,
+      "step": 42410
+    },
+    {
+      "epoch": 1.677707686527319,
+      "grad_norm": 1.3227397609642766,
+      "learning_rate": 4.838728321015916e-06,
+      "loss": 0.7734,
+      "step": 42420
+    },
+    {
+      "epoch": 1.6781031857462083,
+      "grad_norm": 1.266335843013026,
+      "learning_rate": 4.836428541743746e-06,
+      "loss": 0.7721,
+      "step": 42430
+    },
+    {
+      "epoch": 1.6784986849650974,
+      "grad_norm": 1.443548428620844,
+      "learning_rate": 4.834128797113209e-06,
+      "loss": 0.7606,
+      "step": 42440
+    },
+    {
+      "epoch": 1.6788941841839864,
+      "grad_norm": 1.5556808427805768,
+      "learning_rate": 4.831829087611351e-06,
+      "loss": 0.7987,
+      "step": 42450
+    },
+    {
+      "epoch": 1.6792896834028754,
+      "grad_norm": 1.2562177980775426,
+      "learning_rate": 4.829529413725211e-06,
+      "loss": 0.7999,
+      "step": 42460
+    },
+    {
+      "epoch": 1.6796851826217645,
+      "grad_norm": 1.437104020335207,
+      "learning_rate": 4.827229775941821e-06,
+      "loss": 0.7979,
+      "step": 42470
+    },
+    {
+      "epoch": 1.6800806818406535,
+      "grad_norm": 1.4806773656333854,
+      "learning_rate": 4.824930174748205e-06,
+      "loss": 0.797,
+      "step": 42480
+    },
+    {
+      "epoch": 1.6804761810595426,
+      "grad_norm": 1.362515207104312,
+      "learning_rate": 4.822630610631378e-06,
+      "loss": 0.7733,
+      "step": 42490
+    },
+    {
+      "epoch": 1.6808716802784316,
+      "grad_norm": 1.405912697625109,
+      "learning_rate": 4.82033108407835e-06,
+      "loss": 0.7945,
+      "step": 42500
+    },
+    {
+      "epoch": 1.6812671794973206,
+      "grad_norm": 1.2248690592419256,
+      "learning_rate": 4.8180315955761194e-06,
+      "loss": 0.8054,
+      "step": 42510
+    },
+    {
+      "epoch": 1.6816626787162097,
+      "grad_norm": 1.3441265232365305,
+      "learning_rate": 4.815732145611679e-06,
+      "loss": 0.8001,
+      "step": 42520
+    },
+    {
+      "epoch": 1.6820581779350987,
+      "grad_norm": 1.4068201423342177,
+      "learning_rate": 4.813432734672014e-06,
+      "loss": 0.8026,
+      "step": 42530
+    },
+    {
+      "epoch": 1.6824536771539877,
+      "grad_norm": 1.6335093188431398,
+      "learning_rate": 4.811133363244098e-06,
+      "loss": 0.7829,
+      "step": 42540
+    },
+    {
+      "epoch": 1.6828491763728768,
+      "grad_norm": 1.6361973567873165,
+      "learning_rate": 4.808834031814903e-06,
+      "loss": 0.7963,
+      "step": 42550
+    },
+    {
+      "epoch": 1.6832446755917658,
+      "grad_norm": 1.6063928983817761,
+      "learning_rate": 4.8065347408713825e-06,
+      "loss": 0.8293,
+      "step": 42560
+    },
+    {
+      "epoch": 1.6836401748106549,
+      "grad_norm": 1.189586260288003,
+      "learning_rate": 4.80423549090049e-06,
+      "loss": 0.8113,
+      "step": 42570
+    },
+    {
+      "epoch": 1.684035674029544,
+      "grad_norm": 1.4326924850284528,
+      "learning_rate": 4.8019362823891666e-06,
+      "loss": 0.803,
+      "step": 42580
+    },
+    {
+      "epoch": 1.684431173248433,
+      "grad_norm": 1.3667483940799752,
+      "learning_rate": 4.799637115824345e-06,
+      "loss": 0.798,
+      "step": 42590
+    },
+    {
+      "epoch": 1.684826672467322,
+      "grad_norm": 1.5016465200063276,
+      "learning_rate": 4.797337991692949e-06,
+      "loss": 0.7738,
+      "step": 42600
+    },
+    {
+      "epoch": 1.685222171686211,
+      "grad_norm": 1.6018033297555405,
+      "learning_rate": 4.795038910481895e-06,
+      "loss": 0.8036,
+      "step": 42610
+    },
+    {
+      "epoch": 1.6856176709051,
+      "grad_norm": 1.471060850050138,
+      "learning_rate": 4.7927398726780885e-06,
+      "loss": 0.8051,
+      "step": 42620
+    },
+    {
+      "epoch": 1.686013170123989,
+      "grad_norm": 1.5916982701656708,
+      "learning_rate": 4.7904408787684285e-06,
+      "loss": 0.7977,
+      "step": 42630
+    },
+    {
+      "epoch": 1.6864086693428781,
+      "grad_norm": 1.3876508928192102,
+      "learning_rate": 4.788141929239798e-06,
+      "loss": 0.8016,
+      "step": 42640
+    },
+    {
+      "epoch": 1.6868041685617672,
+      "grad_norm": 1.6229604411871494,
+      "learning_rate": 4.785843024579077e-06,
+      "loss": 0.7928,
+      "step": 42650
+    },
+    {
+      "epoch": 1.6871996677806562,
+      "grad_norm": 1.2848397455570024,
+      "learning_rate": 4.783544165273134e-06,
+      "loss": 0.7872,
+      "step": 42660
+    },
+    {
+      "epoch": 1.6875951669995453,
+      "grad_norm": 1.8445795440823296,
+      "learning_rate": 4.781245351808829e-06,
+      "loss": 0.7803,
+      "step": 42670
+    },
+    {
+      "epoch": 1.6879906662184343,
+      "grad_norm": 1.5040229146897743,
+      "learning_rate": 4.7789465846730106e-06,
+      "loss": 0.7975,
+      "step": 42680
+    },
+    {
+      "epoch": 1.6883861654373233,
+      "grad_norm": 1.2624642923813387,
+      "learning_rate": 4.776647864352518e-06,
+      "loss": 0.8116,
+      "step": 42690
+    },
+    {
+      "epoch": 1.6887816646562124,
+      "grad_norm": 1.4422981083863158,
+      "learning_rate": 4.774349191334182e-06,
+      "loss": 0.8156,
+      "step": 42700
+    },
+    {
+      "epoch": 1.6891771638751014,
+      "grad_norm": 1.2807413259462608,
+      "learning_rate": 4.772050566104821e-06,
+      "loss": 0.8165,
+      "step": 42710
+    },
+    {
+      "epoch": 1.6895726630939905,
+      "grad_norm": 1.4337090275460487,
+      "learning_rate": 4.769751989151244e-06,
+      "loss": 0.7953,
+      "step": 42720
+    },
+    {
+      "epoch": 1.6899681623128795,
+      "grad_norm": 1.4730448060629187,
+      "learning_rate": 4.767453460960253e-06,
+      "loss": 0.7904,
+      "step": 42730
+    },
+    {
+      "epoch": 1.6903636615317685,
+      "grad_norm": 1.3122743484906407,
+      "learning_rate": 4.765154982018634e-06,
+      "loss": 0.7579,
+      "step": 42740
+    },
+    {
+      "epoch": 1.6907591607506576,
+      "grad_norm": 1.464506022989358,
+      "learning_rate": 4.762856552813167e-06,
+      "loss": 0.7927,
+      "step": 42750
+    },
+    {
+      "epoch": 1.6911546599695466,
+      "grad_norm": 1.2175133867313277,
+      "learning_rate": 4.7605581738306196e-06,
+      "loss": 0.7908,
+      "step": 42760
+    },
+    {
+      "epoch": 1.6915501591884357,
+      "grad_norm": 1.304249314924589,
+      "learning_rate": 4.758259845557748e-06,
+      "loss": 0.7788,
+      "step": 42770
+    },
+    {
+      "epoch": 1.6919456584073247,
+      "grad_norm": 1.5389999111391783,
+      "learning_rate": 4.755961568481299e-06,
+      "loss": 0.7893,
+      "step": 42780
+    },
+    {
+      "epoch": 1.6923411576262137,
+      "grad_norm": 1.283603829244678,
+      "learning_rate": 4.7536633430880106e-06,
+      "loss": 0.8001,
+      "step": 42790
+    },
+    {
+      "epoch": 1.6927366568451028,
+      "grad_norm": 1.533734508752668,
+      "learning_rate": 4.751365169864604e-06,
+      "loss": 0.7931,
+      "step": 42800
+    },
+    {
+      "epoch": 1.6931321560639918,
+      "grad_norm": 1.3646039861787496,
+      "learning_rate": 4.749067049297795e-06,
+      "loss": 0.7672,
+      "step": 42810
+    },
+    {
+      "epoch": 1.6935276552828809,
+      "grad_norm": 1.4328315251127925,
+      "learning_rate": 4.746768981874286e-06,
+      "loss": 0.7696,
+      "step": 42820
+    },
+    {
+      "epoch": 1.69392315450177,
+      "grad_norm": 1.3241302899423866,
+      "learning_rate": 4.744470968080769e-06,
+      "loss": 0.7965,
+      "step": 42830
+    },
+    {
+      "epoch": 1.694318653720659,
+      "grad_norm": 1.2411850828009556,
+      "learning_rate": 4.7421730084039225e-06,
+      "loss": 0.8008,
+      "step": 42840
+    },
+    {
+      "epoch": 1.694714152939548,
+      "grad_norm": 1.1850419218779245,
+      "learning_rate": 4.739875103330416e-06,
+      "loss": 0.7838,
+      "step": 42850
+    },
+    {
+      "epoch": 1.695109652158437,
+      "grad_norm": 1.477033790672088,
+      "learning_rate": 4.7375772533469055e-06,
+      "loss": 0.8107,
+      "step": 42860
+    },
+    {
+      "epoch": 1.695505151377326,
+      "grad_norm": 1.2723717474790426,
+      "learning_rate": 4.735279458940037e-06,
+      "loss": 0.7758,
+      "step": 42870
+    },
+    {
+      "epoch": 1.695900650596215,
+      "grad_norm": 1.5846393257758138,
+      "learning_rate": 4.732981720596447e-06,
+      "loss": 0.7926,
+      "step": 42880
+    },
+    {
+      "epoch": 1.6962961498151041,
+      "grad_norm": 1.2630931814888893,
+      "learning_rate": 4.73068403880275e-06,
+      "loss": 0.78,
+      "step": 42890
+    },
+    {
+      "epoch": 1.6966916490339932,
+      "grad_norm": 1.5360216483570945,
+      "learning_rate": 4.728386414045561e-06,
+      "loss": 0.7923,
+      "step": 42900
+    },
+    {
+      "epoch": 1.6970871482528822,
+      "grad_norm": 1.4776253090034164,
+      "learning_rate": 4.726088846811476e-06,
+      "loss": 0.7844,
+      "step": 42910
+    },
+    {
+      "epoch": 1.6974826474717712,
+      "grad_norm": 1.1139686598064495,
+      "learning_rate": 4.72379133758708e-06,
+      "loss": 0.8108,
+      "step": 42920
+    },
+    {
+      "epoch": 1.6978781466906603,
+      "grad_norm": 1.3942163650179535,
+      "learning_rate": 4.721493886858947e-06,
+      "loss": 0.7758,
+      "step": 42930
+    },
+    {
+      "epoch": 1.6982736459095493,
+      "grad_norm": 1.233318492107555,
+      "learning_rate": 4.719196495113637e-06,
+      "loss": 0.7929,
+      "step": 42940
+    },
+    {
+      "epoch": 1.6986691451284384,
+      "grad_norm": 1.5815785738343369,
+      "learning_rate": 4.7168991628377e-06,
+      "loss": 0.7814,
+      "step": 42950
+    },
+    {
+      "epoch": 1.6990646443473274,
+      "grad_norm": 1.2812676881165932,
+      "learning_rate": 4.714601890517669e-06,
+      "loss": 0.7782,
+      "step": 42960
+    },
+    {
+      "epoch": 1.6994601435662164,
+      "grad_norm": 1.65062343339854,
+      "learning_rate": 4.712304678640069e-06,
+      "loss": 0.8127,
+      "step": 42970
+    },
+    {
+      "epoch": 1.6998556427851055,
+      "grad_norm": 1.2545488390976887,
+      "learning_rate": 4.710007527691409e-06,
+      "loss": 0.8155,
+      "step": 42980
+    },
+    {
+      "epoch": 1.7002511420039945,
+      "grad_norm": 1.331153033309248,
+      "learning_rate": 4.707710438158185e-06,
+      "loss": 0.791,
+      "step": 42990
+    },
+    {
+      "epoch": 1.7006466412228836,
+      "grad_norm": 1.45002074119932,
+      "learning_rate": 4.705413410526885e-06,
+      "loss": 0.8064,
+      "step": 43000
+    },
+    {
+      "epoch": 1.7010421404417726,
+      "grad_norm": 1.4229274728152188,
+      "learning_rate": 4.7031164452839764e-06,
+      "loss": 0.7802,
+      "step": 43010
+    },
+    {
+      "epoch": 1.7014376396606616,
+      "grad_norm": 1.5195023053378005,
+      "learning_rate": 4.700819542915919e-06,
+      "loss": 0.7878,
+      "step": 43020
+    },
+    {
+      "epoch": 1.7018331388795507,
+      "grad_norm": 1.1286717055297824,
+      "learning_rate": 4.698522703909156e-06,
+      "loss": 0.7924,
+      "step": 43030
+    },
+    {
+      "epoch": 1.7022286380984397,
+      "grad_norm": 1.3208356068146982,
+      "learning_rate": 4.69622592875012e-06,
+      "loss": 0.7788,
+      "step": 43040
+    },
+    {
+      "epoch": 1.7026241373173288,
+      "grad_norm": 1.4236047452016296,
+      "learning_rate": 4.6939292179252264e-06,
+      "loss": 0.7834,
+      "step": 43050
+    },
+    {
+      "epoch": 1.7030196365362178,
+      "grad_norm": 1.4213020799472975,
+      "learning_rate": 4.691632571920882e-06,
+      "loss": 0.7594,
+      "step": 43060
+    },
+    {
+      "epoch": 1.7034151357551068,
+      "grad_norm": 1.2990758698231542,
+      "learning_rate": 4.689335991223475e-06,
+      "loss": 0.7946,
+      "step": 43070
+    },
+    {
+      "epoch": 1.7038106349739959,
+      "grad_norm": 1.079892919526166,
+      "learning_rate": 4.687039476319384e-06,
+      "loss": 0.7934,
+      "step": 43080
+    },
+    {
+      "epoch": 1.704206134192885,
+      "grad_norm": 1.368650688183977,
+      "learning_rate": 4.68474302769497e-06,
+      "loss": 0.7947,
+      "step": 43090
+    },
+    {
+      "epoch": 1.704601633411774,
+      "grad_norm": 1.1723317519407415,
+      "learning_rate": 4.6824466458365805e-06,
+      "loss": 0.7928,
+      "step": 43100
+    },
+    {
+      "epoch": 1.704997132630663,
+      "grad_norm": 1.3812258058757154,
+      "learning_rate": 4.680150331230552e-06,
+      "loss": 0.7954,
+      "step": 43110
+    },
+    {
+      "epoch": 1.705392631849552,
+      "grad_norm": 1.461383247629719,
+      "learning_rate": 4.677854084363206e-06,
+      "loss": 0.7851,
+      "step": 43120
+    },
+    {
+      "epoch": 1.705788131068441,
+      "grad_norm": 1.3002946754519833,
+      "learning_rate": 4.675557905720842e-06,
+      "loss": 0.804,
+      "step": 43130
+    },
+    {
+      "epoch": 1.70618363028733,
+      "grad_norm": 1.6365299241906293,
+      "learning_rate": 4.673261795789757e-06,
+      "loss": 0.7856,
+      "step": 43140
+    },
+    {
+      "epoch": 1.7065791295062192,
+      "grad_norm": 1.2647620869391314,
+      "learning_rate": 4.6709657550562254e-06,
+      "loss": 0.7752,
+      "step": 43150
+    },
+    {
+      "epoch": 1.7069746287251082,
+      "grad_norm": 1.3916516294522465,
+      "learning_rate": 4.668669784006509e-06,
+      "loss": 0.7706,
+      "step": 43160
+    },
+    {
+      "epoch": 1.7073701279439972,
+      "grad_norm": 1.4425852227050018,
+      "learning_rate": 4.666373883126857e-06,
+      "loss": 0.7674,
+      "step": 43170
+    },
+    {
+      "epoch": 1.7077656271628863,
+      "grad_norm": 1.247225327649516,
+      "learning_rate": 4.664078052903501e-06,
+      "loss": 0.7791,
+      "step": 43180
+    },
+    {
+      "epoch": 1.7081611263817753,
+      "grad_norm": 1.5542956589994794,
+      "learning_rate": 4.661782293822657e-06,
+      "loss": 0.7868,
+      "step": 43190
+    },
+    {
+      "epoch": 1.7085566256006643,
+      "grad_norm": 1.262597111656027,
+      "learning_rate": 4.659486606370531e-06,
+      "loss": 0.8086,
+      "step": 43200
+    },
+    {
+      "epoch": 1.7089521248195534,
+      "grad_norm": 1.562777807705374,
+      "learning_rate": 4.657190991033306e-06,
+      "loss": 0.8009,
+      "step": 43210
+    },
+    {
+      "epoch": 1.7093476240384424,
+      "grad_norm": 1.4206171746031013,
+      "learning_rate": 4.654895448297157e-06,
+      "loss": 0.814,
+      "step": 43220
+    },
+    {
+      "epoch": 1.7097431232573315,
+      "grad_norm": 1.3357640746080472,
+      "learning_rate": 4.652599978648239e-06,
+      "loss": 0.7768,
+      "step": 43230
+    },
+    {
+      "epoch": 1.7101386224762205,
+      "grad_norm": 1.4507705043982013,
+      "learning_rate": 4.650304582572696e-06,
+      "loss": 0.7918,
+      "step": 43240
+    },
+    {
+      "epoch": 1.7105341216951095,
+      "grad_norm": 1.555565836525127,
+      "learning_rate": 4.648009260556648e-06,
+      "loss": 0.7431,
+      "step": 43250
+    },
+    {
+      "epoch": 1.7109296209139986,
+      "grad_norm": 1.4793011784054226,
+      "learning_rate": 4.645714013086212e-06,
+      "loss": 0.7626,
+      "step": 43260
+    },
+    {
+      "epoch": 1.7113251201328876,
+      "grad_norm": 1.3249260777705645,
+      "learning_rate": 4.643418840647475e-06,
+      "loss": 0.7873,
+      "step": 43270
+    },
+    {
+      "epoch": 1.7117206193517767,
+      "grad_norm": 1.141516036502993,
+      "learning_rate": 4.64112374372652e-06,
+      "loss": 0.7695,
+      "step": 43280
+    },
+    {
+      "epoch": 1.7121161185706657,
+      "grad_norm": 1.2682986606771651,
+      "learning_rate": 4.638828722809407e-06,
+      "loss": 0.8331,
+      "step": 43290
+    },
+    {
+      "epoch": 1.7125116177895547,
+      "grad_norm": 1.7800288765457992,
+      "learning_rate": 4.636533778382183e-06,
+      "loss": 0.7892,
+      "step": 43300
+    },
+    {
+      "epoch": 1.7129071170084438,
+      "grad_norm": 1.1573003314148935,
+      "learning_rate": 4.634238910930878e-06,
+      "loss": 0.7974,
+      "step": 43310
+    },
+    {
+      "epoch": 1.7133026162273328,
+      "grad_norm": 1.3140613896474784,
+      "learning_rate": 4.631944120941505e-06,
+      "loss": 0.768,
+      "step": 43320
+    },
+    {
+      "epoch": 1.7136981154462219,
+      "grad_norm": 1.242059835084353,
+      "learning_rate": 4.6296494089000625e-06,
+      "loss": 0.7981,
+      "step": 43330
+    },
+    {
+      "epoch": 1.714093614665111,
+      "grad_norm": 1.3520625891250195,
+      "learning_rate": 4.627354775292529e-06,
+      "loss": 0.8124,
+      "step": 43340
+    },
+    {
+      "epoch": 1.714489113884,
+      "grad_norm": 1.3938212345846235,
+      "learning_rate": 4.6250602206048706e-06,
+      "loss": 0.7992,
+      "step": 43350
+    },
+    {
+      "epoch": 1.714884613102889,
+      "grad_norm": 1.540168213658307,
+      "learning_rate": 4.6227657453230335e-06,
+      "loss": 0.7787,
+      "step": 43360
+    },
+    {
+      "epoch": 1.715280112321778,
+      "grad_norm": 1.5187184457938918,
+      "learning_rate": 4.620471349932951e-06,
+      "loss": 0.7827,
+      "step": 43370
+    },
+    {
+      "epoch": 1.715675611540667,
+      "grad_norm": 1.3365967903633527,
+      "learning_rate": 4.618177034920533e-06,
+      "loss": 0.784,
+      "step": 43380
+    },
+    {
+      "epoch": 1.716071110759556,
+      "grad_norm": 1.2613601465845692,
+      "learning_rate": 4.615882800771676e-06,
+      "loss": 0.7984,
+      "step": 43390
+    },
+    {
+      "epoch": 1.7164666099784451,
+      "grad_norm": 1.5110117544441157,
+      "learning_rate": 4.613588647972263e-06,
+      "loss": 0.8014,
+      "step": 43400
+    },
+    {
+      "epoch": 1.7168621091973342,
+      "grad_norm": 1.4515443369380563,
+      "learning_rate": 4.611294577008153e-06,
+      "loss": 0.7983,
+      "step": 43410
+    },
+    {
+      "epoch": 1.7172576084162232,
+      "grad_norm": 1.3116019499031484,
+      "learning_rate": 4.609000588365193e-06,
+      "loss": 0.7955,
+      "step": 43420
+    },
+    {
+      "epoch": 1.7176531076351125,
+      "grad_norm": 1.5715744245793175,
+      "learning_rate": 4.606706682529209e-06,
+      "loss": 0.8038,
+      "step": 43430
+    },
+    {
+      "epoch": 1.7180486068540015,
+      "grad_norm": 1.4233011934503232,
+      "learning_rate": 4.604412859986013e-06,
+      "loss": 0.8005,
+      "step": 43440
+    },
+    {
+      "epoch": 1.7184441060728906,
+      "grad_norm": 1.5406127604454116,
+      "learning_rate": 4.602119121221395e-06,
+      "loss": 0.7839,
+      "step": 43450
+    },
+    {
+      "epoch": 1.7188396052917796,
+      "grad_norm": 1.2969103620457323,
+      "learning_rate": 4.59982546672113e-06,
+      "loss": 0.8304,
+      "step": 43460
+    },
+    {
+      "epoch": 1.7192351045106686,
+      "grad_norm": 1.2168489023920228,
+      "learning_rate": 4.597531896970975e-06,
+      "loss": 0.8121,
+      "step": 43470
+    },
+    {
+      "epoch": 1.7196306037295577,
+      "grad_norm": 1.5503270882288969,
+      "learning_rate": 4.59523841245667e-06,
+      "loss": 0.7793,
+      "step": 43480
+    },
+    {
+      "epoch": 1.7200261029484467,
+      "grad_norm": 1.3386067126107042,
+      "learning_rate": 4.592945013663937e-06,
+      "loss": 0.7911,
+      "step": 43490
+    },
+    {
+      "epoch": 1.7204216021673358,
+      "grad_norm": 1.648293225110738,
+      "learning_rate": 4.590651701078474e-06,
+      "loss": 0.8038,
+      "step": 43500
+    },
+    {
+      "epoch": 1.7208171013862248,
+      "grad_norm": 1.411665150837201,
+      "learning_rate": 4.588358475185968e-06,
+      "loss": 0.7898,
+      "step": 43510
+    },
+    {
+      "epoch": 1.7212126006051138,
+      "grad_norm": 1.3745587267809554,
+      "learning_rate": 4.586065336472083e-06,
+      "loss": 0.7736,
+      "step": 43520
+    },
+    {
+      "epoch": 1.7216080998240029,
+      "grad_norm": 1.2600581481452702,
+      "learning_rate": 4.58377228542247e-06,
+      "loss": 0.773,
+      "step": 43530
+    },
+    {
+      "epoch": 1.722003599042892,
+      "grad_norm": 1.3379460524623126,
+      "learning_rate": 4.581479322522755e-06,
+      "loss": 0.8021,
+      "step": 43540
+    },
+    {
+      "epoch": 1.722399098261781,
+      "grad_norm": 1.5224938196495796,
+      "learning_rate": 4.57918644825855e-06,
+      "loss": 0.8054,
+      "step": 43550
+    },
+    {
+      "epoch": 1.72279459748067,
+      "grad_norm": 1.3272093623988706,
+      "learning_rate": 4.576893663115445e-06,
+      "loss": 0.7815,
+      "step": 43560
+    },
+    {
+      "epoch": 1.723190096699559,
+      "grad_norm": 1.189088259580658,
+      "learning_rate": 4.574600967579014e-06,
+      "loss": 0.7719,
+      "step": 43570
+    },
+    {
+      "epoch": 1.723585595918448,
+      "grad_norm": 1.804509097110049,
+      "learning_rate": 4.57230836213481e-06,
+      "loss": 0.7912,
+      "step": 43580
+    },
+    {
+      "epoch": 1.723981095137337,
+      "grad_norm": 1.3525131534672215,
+      "learning_rate": 4.570015847268368e-06,
+      "loss": 0.7626,
+      "step": 43590
+    },
+    {
+      "epoch": 1.7243765943562261,
+      "grad_norm": 1.5868380595371219,
+      "learning_rate": 4.567723423465203e-06,
+      "loss": 0.7899,
+      "step": 43600
+    },
+    {
+      "epoch": 1.7247720935751152,
+      "grad_norm": 1.1700013146166923,
+      "learning_rate": 4.565431091210813e-06,
+      "loss": 0.7999,
+      "step": 43610
+    },
+    {
+      "epoch": 1.7251675927940042,
+      "grad_norm": 1.5595369260555179,
+      "learning_rate": 4.56313885099067e-06,
+      "loss": 0.8083,
+      "step": 43620
+    },
+    {
+      "epoch": 1.7255630920128933,
+      "grad_norm": 1.3654884814429953,
+      "learning_rate": 4.560846703290234e-06,
+      "loss": 0.7943,
+      "step": 43630
+    },
+    {
+      "epoch": 1.7259585912317823,
+      "grad_norm": 1.264563670134767,
+      "learning_rate": 4.558554648594943e-06,
+      "loss": 0.7834,
+      "step": 43640
+    },
+    {
+      "epoch": 1.7263540904506713,
+      "grad_norm": 1.4524405450710334,
+      "learning_rate": 4.556262687390214e-06,
+      "loss": 0.7846,
+      "step": 43650
+    },
+    {
+      "epoch": 1.7267495896695604,
+      "grad_norm": 1.2135123442114277,
+      "learning_rate": 4.5539708201614466e-06,
+      "loss": 0.7732,
+      "step": 43660
+    },
+    {
+      "epoch": 1.7271450888884494,
+      "grad_norm": 1.3724194130354423,
+      "learning_rate": 4.5516790473940184e-06,
+      "loss": 0.799,
+      "step": 43670
+    },
+    {
+      "epoch": 1.7275405881073385,
+      "grad_norm": 1.2973309513581168,
+      "learning_rate": 4.5493873695732866e-06,
+      "loss": 0.8104,
+      "step": 43680
+    },
+    {
+      "epoch": 1.7279360873262275,
+      "grad_norm": 1.6248574289962425,
+      "learning_rate": 4.54709578718459e-06,
+      "loss": 0.7739,
+      "step": 43690
+    },
+    {
+      "epoch": 1.7283315865451165,
+      "grad_norm": 1.4590613801731587,
+      "learning_rate": 4.544804300713246e-06,
+      "loss": 0.7579,
+      "step": 43700
+    },
+    {
+      "epoch": 1.7287270857640056,
+      "grad_norm": 1.5067340878685358,
+      "learning_rate": 4.542512910644553e-06,
+      "loss": 0.7976,
+      "step": 43710
+    },
+    {
+      "epoch": 1.7291225849828946,
+      "grad_norm": 1.6010654889579239,
+      "learning_rate": 4.540221617463787e-06,
+      "loss": 0.7899,
+      "step": 43720
+    },
+    {
+      "epoch": 1.7295180842017837,
+      "grad_norm": 1.3715262231363263,
+      "learning_rate": 4.537930421656208e-06,
+      "loss": 0.8217,
+      "step": 43730
+    },
+    {
+      "epoch": 1.7299135834206727,
+      "grad_norm": 1.3163594482149317,
+      "learning_rate": 4.535639323707047e-06,
+      "loss": 0.7716,
+      "step": 43740
+    },
+    {
+      "epoch": 1.7303090826395617,
+      "grad_norm": 1.3055859024999616,
+      "learning_rate": 4.533348324101523e-06,
+      "loss": 0.7978,
+      "step": 43750
+    },
+    {
+      "epoch": 1.7307045818584508,
+      "grad_norm": 1.3722285625144957,
+      "learning_rate": 4.531057423324828e-06,
+      "loss": 0.7846,
+      "step": 43760
+    },
+    {
+      "epoch": 1.73110008107734,
+      "grad_norm": 1.2916459077755589,
+      "learning_rate": 4.528766621862137e-06,
+      "loss": 0.7907,
+      "step": 43770
+    },
+    {
+      "epoch": 1.731495580296229,
+      "grad_norm": 1.14407030889974,
+      "learning_rate": 4.526475920198602e-06,
+      "loss": 0.7888,
+      "step": 43780
+    },
+    {
+      "epoch": 1.7318910795151181,
+      "grad_norm": 1.2394495756314408,
+      "learning_rate": 4.524185318819355e-06,
+      "loss": 0.8048,
+      "step": 43790
+    },
+    {
+      "epoch": 1.7322865787340072,
+      "grad_norm": 1.3053063977768904,
+      "learning_rate": 4.5218948182095055e-06,
+      "loss": 0.7887,
+      "step": 43800
+    },
+    {
+      "epoch": 1.7326820779528962,
+      "grad_norm": 1.713932736485177,
+      "learning_rate": 4.5196044188541426e-06,
+      "loss": 0.7732,
+      "step": 43810
+    },
+    {
+      "epoch": 1.7330775771717852,
+      "grad_norm": 1.4351614371987402,
+      "learning_rate": 4.517314121238333e-06,
+      "loss": 0.7694,
+      "step": 43820
+    },
+    {
+      "epoch": 1.7334730763906743,
+      "grad_norm": 1.5428394603274558,
+      "learning_rate": 4.515023925847124e-06,
+      "loss": 0.7642,
+      "step": 43830
+    },
+    {
+      "epoch": 1.7338685756095633,
+      "grad_norm": 1.3262905067355322,
+      "learning_rate": 4.512733833165538e-06,
+      "loss": 0.8094,
+      "step": 43840
+    },
+    {
+      "epoch": 1.7342640748284524,
+      "grad_norm": 1.0615067206404898,
+      "learning_rate": 4.510443843678578e-06,
+      "loss": 0.7727,
+      "step": 43850
+    },
+    {
+      "epoch": 1.7346595740473414,
+      "grad_norm": 1.253974618679905,
+      "learning_rate": 4.508153957871228e-06,
+      "loss": 0.772,
+      "step": 43860
+    },
+    {
+      "epoch": 1.7350550732662304,
+      "grad_norm": 1.363532089707463,
+      "learning_rate": 4.5058641762284405e-06,
+      "loss": 0.7883,
+      "step": 43870
+    },
+    {
+      "epoch": 1.7354505724851195,
+      "grad_norm": 1.4190352078639084,
+      "learning_rate": 4.503574499235155e-06,
+      "loss": 0.7971,
+      "step": 43880
+    },
+    {
+      "epoch": 1.7358460717040085,
+      "grad_norm": 1.282005254832603,
+      "learning_rate": 4.5012849273762856e-06,
+      "loss": 0.7702,
+      "step": 43890
+    },
+    {
+      "epoch": 1.7362415709228975,
+      "grad_norm": 1.5165821503367798,
+      "learning_rate": 4.498995461136725e-06,
+      "loss": 0.7646,
+      "step": 43900
+    },
+    {
+      "epoch": 1.7366370701417866,
+      "grad_norm": 1.4966807550285086,
+      "learning_rate": 4.496706101001343e-06,
+      "loss": 0.7793,
+      "step": 43910
+    },
+    {
+      "epoch": 1.7370325693606756,
+      "grad_norm": 1.3873163639745776,
+      "learning_rate": 4.494416847454986e-06,
+      "loss": 0.7793,
+      "step": 43920
+    },
+    {
+      "epoch": 1.7374280685795647,
+      "grad_norm": 1.368106939601308,
+      "learning_rate": 4.492127700982477e-06,
+      "loss": 0.7856,
+      "step": 43930
+    },
+    {
+      "epoch": 1.7378235677984537,
+      "grad_norm": 1.3820321702043639,
+      "learning_rate": 4.489838662068622e-06,
+      "loss": 0.7859,
+      "step": 43940
+    },
+    {
+      "epoch": 1.7382190670173427,
+      "grad_norm": 1.3247565738913807,
+      "learning_rate": 4.487549731198197e-06,
+      "loss": 0.7712,
+      "step": 43950
+    },
+    {
+      "epoch": 1.7386145662362318,
+      "grad_norm": 1.60642127770446,
+      "learning_rate": 4.48526090885596e-06,
+      "loss": 0.7964,
+      "step": 43960
+    },
+    {
+      "epoch": 1.7390100654551208,
+      "grad_norm": 1.519598684598891,
+      "learning_rate": 4.482972195526644e-06,
+      "loss": 0.7534,
+      "step": 43970
+    },
+    {
+      "epoch": 1.7394055646740099,
+      "grad_norm": 1.3630472960650235,
+      "learning_rate": 4.480683591694961e-06,
+      "loss": 0.7667,
+      "step": 43980
+    },
+    {
+      "epoch": 1.739801063892899,
+      "grad_norm": 1.6283119330185185,
+      "learning_rate": 4.478395097845594e-06,
+      "loss": 0.7796,
+      "step": 43990
+    },
+    {
+      "epoch": 1.740196563111788,
+      "grad_norm": 1.1730907853344723,
+      "learning_rate": 4.47610671446321e-06,
+      "loss": 0.8033,
+      "step": 44000
+    },
+    {
+      "epoch": 1.740592062330677,
+      "grad_norm": 1.3834211017168112,
+      "learning_rate": 4.473818442032447e-06,
+      "loss": 0.8094,
+      "step": 44010
+    },
+    {
+      "epoch": 1.740987561549566,
+      "grad_norm": 1.5383465385946293,
+      "learning_rate": 4.4715302810379246e-06,
+      "loss": 0.8084,
+      "step": 44020
+    },
+    {
+      "epoch": 1.741383060768455,
+      "grad_norm": 1.5515370420499288,
+      "learning_rate": 4.469242231964234e-06,
+      "loss": 0.7819,
+      "step": 44030
+    },
+    {
+      "epoch": 1.741778559987344,
+      "grad_norm": 1.3522124000769695,
+      "learning_rate": 4.466954295295946e-06,
+      "loss": 0.7947,
+      "step": 44040
+    },
+    {
+      "epoch": 1.7421740592062331,
+      "grad_norm": 1.477866666677567,
+      "learning_rate": 4.4646664715176056e-06,
+      "loss": 0.8017,
+      "step": 44050
+    },
+    {
+      "epoch": 1.7425695584251222,
+      "grad_norm": 1.2171213913733678,
+      "learning_rate": 4.4623787611137355e-06,
+      "loss": 0.7637,
+      "step": 44060
+    },
+    {
+      "epoch": 1.7429650576440112,
+      "grad_norm": 1.4214546469646214,
+      "learning_rate": 4.460091164568833e-06,
+      "loss": 0.789,
+      "step": 44070
+    },
+    {
+      "epoch": 1.7433605568629003,
+      "grad_norm": 1.5340306748071701,
+      "learning_rate": 4.457803682367373e-06,
+      "loss": 0.7907,
+      "step": 44080
+    },
+    {
+      "epoch": 1.7437560560817893,
+      "grad_norm": 1.6173509563536357,
+      "learning_rate": 4.455516314993804e-06,
+      "loss": 0.7548,
+      "step": 44090
+    },
+    {
+      "epoch": 1.7441515553006783,
+      "grad_norm": 1.5526486560273582,
+      "learning_rate": 4.453229062932552e-06,
+      "loss": 0.758,
+      "step": 44100
+    },
+    {
+      "epoch": 1.7445470545195674,
+      "grad_norm": 1.3342344683731149,
+      "learning_rate": 4.450941926668015e-06,
+      "loss": 0.8055,
+      "step": 44110
+    },
+    {
+      "epoch": 1.7449425537384564,
+      "grad_norm": 1.4338919841988385,
+      "learning_rate": 4.448654906684572e-06,
+      "loss": 0.7829,
+      "step": 44120
+    },
+    {
+      "epoch": 1.7453380529573455,
+      "grad_norm": 1.547178784383168,
+      "learning_rate": 4.446368003466574e-06,
+      "loss": 0.7892,
+      "step": 44130
+    },
+    {
+      "epoch": 1.7457335521762345,
+      "grad_norm": 1.6062509247795729,
+      "learning_rate": 4.444081217498349e-06,
+      "loss": 0.8044,
+      "step": 44140
+    },
+    {
+      "epoch": 1.7461290513951235,
+      "grad_norm": 1.4141914559416404,
+      "learning_rate": 4.441794549264196e-06,
+      "loss": 0.7774,
+      "step": 44150
+    },
+    {
+      "epoch": 1.7465245506140126,
+      "grad_norm": 1.392607812214566,
+      "learning_rate": 4.439507999248396e-06,
+      "loss": 0.7773,
+      "step": 44160
+    },
+    {
+      "epoch": 1.7469200498329016,
+      "grad_norm": 1.443839498310404,
+      "learning_rate": 4.4372215679351985e-06,
+      "loss": 0.8011,
+      "step": 44170
+    },
+    {
+      "epoch": 1.7473155490517907,
+      "grad_norm": 1.6681270048610946,
+      "learning_rate": 4.434935255808831e-06,
+      "loss": 0.7649,
+      "step": 44180
+    },
+    {
+      "epoch": 1.7477110482706797,
+      "grad_norm": 1.3821515146973142,
+      "learning_rate": 4.432649063353496e-06,
+      "loss": 0.7853,
+      "step": 44190
+    },
+    {
+      "epoch": 1.7481065474895687,
+      "grad_norm": 1.2105415917711317,
+      "learning_rate": 4.430362991053369e-06,
+      "loss": 0.788,
+      "step": 44200
+    },
+    {
+      "epoch": 1.7485020467084578,
+      "grad_norm": 1.3506672361945162,
+      "learning_rate": 4.428077039392602e-06,
+      "loss": 0.7769,
+      "step": 44210
+    },
+    {
+      "epoch": 1.7488975459273468,
+      "grad_norm": 1.3125742732995098,
+      "learning_rate": 4.4257912088553215e-06,
+      "loss": 0.8036,
+      "step": 44220
+    },
+    {
+      "epoch": 1.7492930451462358,
+      "grad_norm": 1.5724996332375645,
+      "learning_rate": 4.423505499925623e-06,
+      "loss": 0.7918,
+      "step": 44230
+    },
+    {
+      "epoch": 1.7496885443651249,
+      "grad_norm": 1.3996903588212173,
+      "learning_rate": 4.421219913087583e-06,
+      "loss": 0.8216,
+      "step": 44240
+    },
+    {
+      "epoch": 1.750084043584014,
+      "grad_norm": 1.5267959490567566,
+      "learning_rate": 4.41893444882525e-06,
+      "loss": 0.7713,
+      "step": 44250
+    },
+    {
+      "epoch": 1.750479542802903,
+      "grad_norm": 1.4311818426161296,
+      "learning_rate": 4.416649107622646e-06,
+      "loss": 0.7898,
+      "step": 44260
+    },
+    {
+      "epoch": 1.750875042021792,
+      "grad_norm": 1.3332990181789384,
+      "learning_rate": 4.414363889963766e-06,
+      "loss": 0.8003,
+      "step": 44270
+    },
+    {
+      "epoch": 1.751270541240681,
+      "grad_norm": 1.4080000551555123,
+      "learning_rate": 4.412078796332582e-06,
+      "loss": 0.8085,
+      "step": 44280
+    },
+    {
+      "epoch": 1.75166604045957,
+      "grad_norm": 1.4249913072448293,
+      "learning_rate": 4.409793827213036e-06,
+      "loss": 0.8044,
+      "step": 44290
+    },
+    {
+      "epoch": 1.7520615396784591,
+      "grad_norm": 1.4895033989886426,
+      "learning_rate": 4.407508983089046e-06,
+      "loss": 0.7982,
+      "step": 44300
+    },
+    {
+      "epoch": 1.7524570388973482,
+      "grad_norm": 1.2921987129638182,
+      "learning_rate": 4.405224264444502e-06,
+      "loss": 0.8082,
+      "step": 44310
+    },
+    {
+      "epoch": 1.7528525381162372,
+      "grad_norm": 1.444011915566668,
+      "learning_rate": 4.40293967176327e-06,
+      "loss": 0.7632,
+      "step": 44320
+    },
+    {
+      "epoch": 1.7532480373351262,
+      "grad_norm": 1.2908306617089407,
+      "learning_rate": 4.400655205529187e-06,
+      "loss": 0.8017,
+      "step": 44330
+    },
+    {
+      "epoch": 1.7536435365540153,
+      "grad_norm": 1.3096535754533938,
+      "learning_rate": 4.398370866226065e-06,
+      "loss": 0.7632,
+      "step": 44340
+    },
+    {
+      "epoch": 1.7540390357729043,
+      "grad_norm": 1.3461796811902251,
+      "learning_rate": 4.3960866543376835e-06,
+      "loss": 0.7872,
+      "step": 44350
+    },
+    {
+      "epoch": 1.7544345349917934,
+      "grad_norm": 1.2807883025604567,
+      "learning_rate": 4.393802570347803e-06,
+      "loss": 0.7953,
+      "step": 44360
+    },
+    {
+      "epoch": 1.7548300342106824,
+      "grad_norm": 1.6433221941781169,
+      "learning_rate": 4.391518614740152e-06,
+      "loss": 0.7721,
+      "step": 44370
+    },
+    {
+      "epoch": 1.7552255334295714,
+      "grad_norm": 1.408821212382581,
+      "learning_rate": 4.3892347879984345e-06,
+      "loss": 0.7848,
+      "step": 44380
+    },
+    {
+      "epoch": 1.7556210326484605,
+      "grad_norm": 1.3192214630163677,
+      "learning_rate": 4.386951090606325e-06,
+      "loss": 0.7818,
+      "step": 44390
+    },
+    {
+      "epoch": 1.7560165318673495,
+      "grad_norm": 1.4823118378180073,
+      "learning_rate": 4.384667523047472e-06,
+      "loss": 0.7867,
+      "step": 44400
+    },
+    {
+      "epoch": 1.7564120310862386,
+      "grad_norm": 1.6090004865722174,
+      "learning_rate": 4.382384085805495e-06,
+      "loss": 0.7893,
+      "step": 44410
+    },
+    {
+      "epoch": 1.7568075303051276,
+      "grad_norm": 1.3019361148439694,
+      "learning_rate": 4.380100779363987e-06,
+      "loss": 0.773,
+      "step": 44420
+    },
+    {
+      "epoch": 1.7572030295240166,
+      "grad_norm": 1.3781399188672865,
+      "learning_rate": 4.377817604206514e-06,
+      "loss": 0.8034,
+      "step": 44430
+    },
+    {
+      "epoch": 1.7575985287429057,
+      "grad_norm": 1.1935781056824448,
+      "learning_rate": 4.375534560816613e-06,
+      "loss": 0.7914,
+      "step": 44440
+    },
+    {
+      "epoch": 1.7579940279617947,
+      "grad_norm": 1.421751383170964,
+      "learning_rate": 4.3732516496777945e-06,
+      "loss": 0.7645,
+      "step": 44450
+    },
+    {
+      "epoch": 1.7583895271806838,
+      "grad_norm": 1.4071668704830689,
+      "learning_rate": 4.370968871273538e-06,
+      "loss": 0.7902,
+      "step": 44460
+    },
+    {
+      "epoch": 1.7587850263995728,
+      "grad_norm": 1.3532918776688694,
+      "learning_rate": 4.368686226087301e-06,
+      "loss": 0.8074,
+      "step": 44470
+    },
+    {
+      "epoch": 1.7591805256184618,
+      "grad_norm": 1.2007198128694068,
+      "learning_rate": 4.366403714602502e-06,
+      "loss": 0.7635,
+      "step": 44480
+    },
+    {
+      "epoch": 1.7595760248373509,
+      "grad_norm": 1.3450244739720503,
+      "learning_rate": 4.3641213373025425e-06,
+      "loss": 0.7836,
+      "step": 44490
+    },
+    {
+      "epoch": 1.75997152405624,
+      "grad_norm": 1.5882583966104258,
+      "learning_rate": 4.361839094670789e-06,
+      "loss": 0.7967,
+      "step": 44500
+    },
+    {
+      "epoch": 1.760367023275129,
+      "grad_norm": 1.2176379769436472,
+      "learning_rate": 4.359556987190583e-06,
+      "loss": 0.7777,
+      "step": 44510
+    },
+    {
+      "epoch": 1.760762522494018,
+      "grad_norm": 1.3298017266261861,
+      "learning_rate": 4.357275015345235e-06,
+      "loss": 0.7946,
+      "step": 44520
+    },
+    {
+      "epoch": 1.761158021712907,
+      "grad_norm": 1.4489843147698926,
+      "learning_rate": 4.354993179618026e-06,
+      "loss": 0.779,
+      "step": 44530
+    },
+    {
+      "epoch": 1.761553520931796,
+      "grad_norm": 1.5955900570171213,
+      "learning_rate": 4.3527114804922125e-06,
+      "loss": 0.7654,
+      "step": 44540
+    },
+    {
+      "epoch": 1.761949020150685,
+      "grad_norm": 1.3389699944648392,
+      "learning_rate": 4.350429918451018e-06,
+      "loss": 0.7955,
+      "step": 44550
+    },
+    {
+      "epoch": 1.7623445193695741,
+      "grad_norm": 1.64525477102138,
+      "learning_rate": 4.348148493977638e-06,
+      "loss": 0.7649,
+      "step": 44560
+    },
+    {
+      "epoch": 1.7627400185884632,
+      "grad_norm": 1.556791328967586,
+      "learning_rate": 4.34586720755524e-06,
+      "loss": 0.7423,
+      "step": 44570
+    },
+    {
+      "epoch": 1.7631355178073522,
+      "grad_norm": 1.6112608140121207,
+      "learning_rate": 4.343586059666959e-06,
+      "loss": 0.7672,
+      "step": 44580
+    },
+    {
+      "epoch": 1.7635310170262413,
+      "grad_norm": 1.4049003769288613,
+      "learning_rate": 4.341305050795907e-06,
+      "loss": 0.797,
+      "step": 44590
+    },
+    {
+      "epoch": 1.7639265162451303,
+      "grad_norm": 1.4151072328398027,
+      "learning_rate": 4.339024181425159e-06,
+      "loss": 0.7824,
+      "step": 44600
+    },
+    {
+      "epoch": 1.7643220154640193,
+      "grad_norm": 1.4105712582019052,
+      "learning_rate": 4.336743452037767e-06,
+      "loss": 0.7787,
+      "step": 44610
+    },
+    {
+      "epoch": 1.7647175146829084,
+      "grad_norm": 1.3403168130991987,
+      "learning_rate": 4.334462863116747e-06,
+      "loss": 0.7756,
+      "step": 44620
+    },
+    {
+      "epoch": 1.7651130139017974,
+      "grad_norm": 1.4747284421853857,
+      "learning_rate": 4.33218241514509e-06,
+      "loss": 0.7773,
+      "step": 44630
+    },
+    {
+      "epoch": 1.7655085131206865,
+      "grad_norm": 1.4345108874032895,
+      "learning_rate": 4.329902108605758e-06,
+      "loss": 0.7693,
+      "step": 44640
+    },
+    {
+      "epoch": 1.7659040123395755,
+      "grad_norm": 1.6038288444856101,
+      "learning_rate": 4.327621943981678e-06,
+      "loss": 0.8043,
+      "step": 44650
+    },
+    {
+      "epoch": 1.7662995115584645,
+      "grad_norm": 1.4731335139835002,
+      "learning_rate": 4.3253419217557506e-06,
+      "loss": 0.796,
+      "step": 44660
+    },
+    {
+      "epoch": 1.7666950107773536,
+      "grad_norm": 1.613875442187249,
+      "learning_rate": 4.323062042410846e-06,
+      "loss": 0.7654,
+      "step": 44670
+    },
+    {
+      "epoch": 1.7670905099962426,
+      "grad_norm": 1.4677278660609412,
+      "learning_rate": 4.320782306429804e-06,
+      "loss": 0.7775,
+      "step": 44680
+    },
+    {
+      "epoch": 1.7674860092151317,
+      "grad_norm": 1.7639145973226702,
+      "learning_rate": 4.318502714295433e-06,
+      "loss": 0.8005,
+      "step": 44690
+    },
+    {
+      "epoch": 1.7678815084340207,
+      "grad_norm": 1.6021415871617222,
+      "learning_rate": 4.316223266490511e-06,
+      "loss": 0.7929,
+      "step": 44700
+    },
+    {
+      "epoch": 1.7682770076529097,
+      "grad_norm": 1.362740997156845,
+      "learning_rate": 4.313943963497788e-06,
+      "loss": 0.7936,
+      "step": 44710
+    },
+    {
+      "epoch": 1.7686725068717988,
+      "grad_norm": 1.292511416699964,
+      "learning_rate": 4.31166480579998e-06,
+      "loss": 0.7669,
+      "step": 44720
+    },
+    {
+      "epoch": 1.7690680060906878,
+      "grad_norm": 1.3835456305885425,
+      "learning_rate": 4.309385793879772e-06,
+      "loss": 0.7744,
+      "step": 44730
+    },
+    {
+      "epoch": 1.7694635053095769,
+      "grad_norm": 1.819094092993542,
+      "learning_rate": 4.307106928219821e-06,
+      "loss": 0.7483,
+      "step": 44740
+    },
+    {
+      "epoch": 1.769859004528466,
+      "grad_norm": 1.452203152532573,
+      "learning_rate": 4.3048282093027524e-06,
+      "loss": 0.7768,
+      "step": 44750
+    },
+    {
+      "epoch": 1.770254503747355,
+      "grad_norm": 1.363704627632019,
+      "learning_rate": 4.302549637611158e-06,
+      "loss": 0.7643,
+      "step": 44760
+    },
+    {
+      "epoch": 1.7706500029662442,
+      "grad_norm": 1.4401428913720895,
+      "learning_rate": 4.300271213627603e-06,
+      "loss": 0.782,
+      "step": 44770
+    },
+    {
+      "epoch": 1.7710455021851332,
+      "grad_norm": 1.35899039954038,
+      "learning_rate": 4.297992937834617e-06,
+      "loss": 0.7813,
+      "step": 44780
+    },
+    {
+      "epoch": 1.7714410014040223,
+      "grad_norm": 1.3956822551876829,
+      "learning_rate": 4.295714810714699e-06,
+      "loss": 0.7504,
+      "step": 44790
+    },
+    {
+      "epoch": 1.7718365006229113,
+      "grad_norm": 1.4914086598701293,
+      "learning_rate": 4.29343683275032e-06,
+      "loss": 0.7586,
+      "step": 44800
+    },
+    {
+      "epoch": 1.7722319998418004,
+      "grad_norm": 1.623809994117821,
+      "learning_rate": 4.2911590044239125e-06,
+      "loss": 0.808,
+      "step": 44810
+    },
+    {
+      "epoch": 1.7726274990606894,
+      "grad_norm": 1.6039147208215316,
+      "learning_rate": 4.288881326217886e-06,
+      "loss": 0.7624,
+      "step": 44820
+    },
+    {
+      "epoch": 1.7730229982795784,
+      "grad_norm": 1.5259344298884712,
+      "learning_rate": 4.286603798614611e-06,
+      "loss": 0.7704,
+      "step": 44830
+    },
+    {
+      "epoch": 1.7734184974984675,
+      "grad_norm": 1.3225292504284039,
+      "learning_rate": 4.284326422096428e-06,
+      "loss": 0.7863,
+      "step": 44840
+    },
+    {
+      "epoch": 1.7738139967173565,
+      "grad_norm": 1.408858905221673,
+      "learning_rate": 4.28204919714565e-06,
+      "loss": 0.7612,
+      "step": 44850
+    },
+    {
+      "epoch": 1.7742094959362456,
+      "grad_norm": 1.4230568037531317,
+      "learning_rate": 4.27977212424455e-06,
+      "loss": 0.7788,
+      "step": 44860
+    },
+    {
+      "epoch": 1.7746049951551346,
+      "grad_norm": 1.2483437728124192,
+      "learning_rate": 4.277495203875376e-06,
+      "loss": 0.7886,
+      "step": 44870
+    },
+    {
+      "epoch": 1.7750004943740236,
+      "grad_norm": 1.4672133290171747,
+      "learning_rate": 4.275218436520339e-06,
+      "loss": 0.7784,
+      "step": 44880
+    },
+    {
+      "epoch": 1.7753959935929127,
+      "grad_norm": 1.393363646562001,
+      "learning_rate": 4.2729418226616205e-06,
+      "loss": 0.7832,
+      "step": 44890
+    },
+    {
+      "epoch": 1.7757914928118017,
+      "grad_norm": 1.478880340926508,
+      "learning_rate": 4.2706653627813675e-06,
+      "loss": 0.7852,
+      "step": 44900
+    },
+    {
+      "epoch": 1.7761869920306907,
+      "grad_norm": 1.568129819601717,
+      "learning_rate": 4.268389057361695e-06,
+      "loss": 0.7781,
+      "step": 44910
+    },
+    {
+      "epoch": 1.7765824912495798,
+      "grad_norm": 1.4193852075004723,
+      "learning_rate": 4.2661129068846875e-06,
+      "loss": 0.7893,
+      "step": 44920
+    },
+    {
+      "epoch": 1.7769779904684688,
+      "grad_norm": 1.5068973474620326,
+      "learning_rate": 4.263836911832392e-06,
+      "loss": 0.7533,
+      "step": 44930
+    },
+    {
+      "epoch": 1.7773734896873579,
+      "grad_norm": 1.3691205031676723,
+      "learning_rate": 4.261561072686827e-06,
+      "loss": 0.7648,
+      "step": 44940
+    },
+    {
+      "epoch": 1.777768988906247,
+      "grad_norm": 1.468170859456608,
+      "learning_rate": 4.2592853899299755e-06,
+      "loss": 0.7985,
+      "step": 44950
+    },
+    {
+      "epoch": 1.778164488125136,
+      "grad_norm": 1.450527616166147,
+      "learning_rate": 4.257009864043791e-06,
+      "loss": 0.7933,
+      "step": 44960
+    },
+    {
+      "epoch": 1.778559987344025,
+      "grad_norm": 1.461212450557013,
+      "learning_rate": 4.254734495510185e-06,
+      "loss": 0.7813,
+      "step": 44970
+    },
+    {
+      "epoch": 1.778955486562914,
+      "grad_norm": 1.4166972287102884,
+      "learning_rate": 4.252459284811046e-06,
+      "loss": 0.7764,
+      "step": 44980
+    },
+    {
+      "epoch": 1.779350985781803,
+      "grad_norm": 1.0924556310868954,
+      "learning_rate": 4.250184232428223e-06,
+      "loss": 0.803,
+      "step": 44990
+    },
+    {
+      "epoch": 1.779746485000692,
+      "grad_norm": 1.2439774206128422,
+      "learning_rate": 4.247909338843534e-06,
+      "loss": 0.7875,
+      "step": 45000
+    },
+    {
+      "epoch": 1.7801419842195811,
+      "grad_norm": 1.827258645312475,
+      "learning_rate": 4.2456346045387615e-06,
+      "loss": 0.7703,
+      "step": 45010
+    },
+    {
+      "epoch": 1.7805374834384702,
+      "grad_norm": 1.1506913522004274,
+      "learning_rate": 4.243360029995656e-06,
+      "loss": 0.7682,
+      "step": 45020
+    },
+    {
+      "epoch": 1.7809329826573592,
+      "grad_norm": 1.4542294363594457,
+      "learning_rate": 4.241085615695935e-06,
+      "loss": 0.7699,
+      "step": 45030
+    },
+    {
+      "epoch": 1.7813284818762483,
+      "grad_norm": 1.3816438258849228,
+      "learning_rate": 4.238811362121277e-06,
+      "loss": 0.7842,
+      "step": 45040
+    },
+    {
+      "epoch": 1.7817239810951373,
+      "grad_norm": 1.7282756274413493,
+      "learning_rate": 4.236537269753331e-06,
+      "loss": 0.7568,
+      "step": 45050
+    },
+    {
+      "epoch": 1.7821194803140263,
+      "grad_norm": 1.494841111415543,
+      "learning_rate": 4.2342633390737126e-06,
+      "loss": 0.775,
+      "step": 45060
+    },
+    {
+      "epoch": 1.7825149795329154,
+      "grad_norm": 1.5946477635242504,
+      "learning_rate": 4.231989570564e-06,
+      "loss": 0.7559,
+      "step": 45070
+    },
+    {
+      "epoch": 1.7829104787518044,
+      "grad_norm": 1.2593815456816662,
+      "learning_rate": 4.2297159647057405e-06,
+      "loss": 0.7781,
+      "step": 45080
+    },
+    {
+      "epoch": 1.7833059779706935,
+      "grad_norm": 1.2234962134948866,
+      "learning_rate": 4.227442521980441e-06,
+      "loss": 0.7753,
+      "step": 45090
+    },
+    {
+      "epoch": 1.7837014771895825,
+      "grad_norm": 1.6497901789470792,
+      "learning_rate": 4.22516924286958e-06,
+      "loss": 0.7817,
+      "step": 45100
+    },
+    {
+      "epoch": 1.7840969764084718,
+      "grad_norm": 1.3971012132818765,
+      "learning_rate": 4.222896127854598e-06,
+      "loss": 0.7688,
+      "step": 45110
+    },
+    {
+      "epoch": 1.7844924756273608,
+      "grad_norm": 1.6813738699880654,
+      "learning_rate": 4.220623177416903e-06,
+      "loss": 0.7732,
+      "step": 45120
+    },
+    {
+      "epoch": 1.7848879748462498,
+      "grad_norm": 1.630597845864978,
+      "learning_rate": 4.218350392037866e-06,
+      "loss": 0.733,
+      "step": 45130
+    },
+    {
+      "epoch": 1.7852834740651389,
+      "grad_norm": 1.157999637773178,
+      "learning_rate": 4.216077772198826e-06,
+      "loss": 0.7734,
+      "step": 45140
+    },
+    {
+      "epoch": 1.785678973284028,
+      "grad_norm": 1.215136790667082,
+      "learning_rate": 4.213805318381084e-06,
+      "loss": 0.7977,
+      "step": 45150
+    },
+    {
+      "epoch": 1.786074472502917,
+      "grad_norm": 1.6018416144309322,
+      "learning_rate": 4.2115330310659046e-06,
+      "loss": 0.7538,
+      "step": 45160
+    },
+    {
+      "epoch": 1.786469971721806,
+      "grad_norm": 1.4296328880052969,
+      "learning_rate": 4.209260910734522e-06,
+      "loss": 0.7673,
+      "step": 45170
+    },
+    {
+      "epoch": 1.786865470940695,
+      "grad_norm": 1.289735164772647,
+      "learning_rate": 4.20698895786813e-06,
+      "loss": 0.795,
+      "step": 45180
+    },
+    {
+      "epoch": 1.787260970159584,
+      "grad_norm": 1.4003928626152649,
+      "learning_rate": 4.204717172947892e-06,
+      "loss": 0.7675,
+      "step": 45190
+    },
+    {
+      "epoch": 1.7876564693784731,
+      "grad_norm": 1.435218394120719,
+      "learning_rate": 4.202445556454934e-06,
+      "loss": 0.7873,
+      "step": 45200
+    },
+    {
+      "epoch": 1.7880519685973622,
+      "grad_norm": 1.6299015380454278,
+      "learning_rate": 4.200174108870341e-06,
+      "loss": 0.7339,
+      "step": 45210
+    },
+    {
+      "epoch": 1.7884474678162512,
+      "grad_norm": 1.5999887640432664,
+      "learning_rate": 4.197902830675169e-06,
+      "loss": 0.787,
+      "step": 45220
+    },
+    {
+      "epoch": 1.7888429670351402,
+      "grad_norm": 1.1896054718627778,
+      "learning_rate": 4.195631722350436e-06,
+      "loss": 0.7945,
+      "step": 45230
+    },
+    {
+      "epoch": 1.7892384662540293,
+      "grad_norm": 1.3360387219716665,
+      "learning_rate": 4.193360784377123e-06,
+      "loss": 0.7504,
+      "step": 45240
+    },
+    {
+      "epoch": 1.7896339654729183,
+      "grad_norm": 1.461943732480947,
+      "learning_rate": 4.191090017236177e-06,
+      "loss": 0.7715,
+      "step": 45250
+    },
+    {
+      "epoch": 1.7900294646918073,
+      "grad_norm": 1.5371448898378919,
+      "learning_rate": 4.188819421408507e-06,
+      "loss": 0.7535,
+      "step": 45260
+    },
+    {
+      "epoch": 1.7904249639106964,
+      "grad_norm": 1.4208965498047244,
+      "learning_rate": 4.186548997374986e-06,
+      "loss": 0.7697,
+      "step": 45270
+    },
+    {
+      "epoch": 1.7908204631295854,
+      "grad_norm": 1.4778972438745934,
+      "learning_rate": 4.184278745616451e-06,
+      "loss": 0.7745,
+      "step": 45280
+    },
+    {
+      "epoch": 1.7912159623484745,
+      "grad_norm": 1.4165928337881941,
+      "learning_rate": 4.182008666613703e-06,
+      "loss": 0.7566,
+      "step": 45290
+    },
+    {
+      "epoch": 1.7916114615673635,
+      "grad_norm": 1.7572655586233457,
+      "learning_rate": 4.179738760847505e-06,
+      "loss": 0.7856,
+      "step": 45300
+    },
+    {
+      "epoch": 1.7920069607862525,
+      "grad_norm": 1.6129481366580611,
+      "learning_rate": 4.1774690287985845e-06,
+      "loss": 0.7325,
+      "step": 45310
+    },
+    {
+      "epoch": 1.7924024600051416,
+      "grad_norm": 1.2045356151529507,
+      "learning_rate": 4.1751994709476345e-06,
+      "loss": 0.7645,
+      "step": 45320
+    },
+    {
+      "epoch": 1.7927979592240306,
+      "grad_norm": 1.468682648123263,
+      "learning_rate": 4.1729300877753035e-06,
+      "loss": 0.771,
+      "step": 45330
+    },
+    {
+      "epoch": 1.7931934584429197,
+      "grad_norm": 1.5377179201316538,
+      "learning_rate": 4.170660879762211e-06,
+      "loss": 0.7721,
+      "step": 45340
+    },
+    {
+      "epoch": 1.7935889576618087,
+      "grad_norm": 1.8154945244252436,
+      "learning_rate": 4.168391847388934e-06,
+      "loss": 0.7695,
+      "step": 45350
+    },
+    {
+      "epoch": 1.7939844568806977,
+      "grad_norm": 1.3895031709559718,
+      "learning_rate": 4.166122991136018e-06,
+      "loss": 0.781,
+      "step": 45360
+    },
+    {
+      "epoch": 1.7943799560995868,
+      "grad_norm": 1.239612160982945,
+      "learning_rate": 4.163854311483966e-06,
+      "loss": 0.8003,
+      "step": 45370
+    },
+    {
+      "epoch": 1.7947754553184758,
+      "grad_norm": 1.5197095804328187,
+      "learning_rate": 4.161585808913246e-06,
+      "loss": 0.7787,
+      "step": 45380
+    },
+    {
+      "epoch": 1.7951709545373649,
+      "grad_norm": 1.5401817467103172,
+      "learning_rate": 4.1593174839042874e-06,
+      "loss": 0.7719,
+      "step": 45390
+    },
+    {
+      "epoch": 1.795566453756254,
+      "grad_norm": 1.349350293611061,
+      "learning_rate": 4.157049336937483e-06,
+      "loss": 0.759,
+      "step": 45400
+    },
+    {
+      "epoch": 1.795961952975143,
+      "grad_norm": 1.3723117613601592,
+      "learning_rate": 4.154781368493187e-06,
+      "loss": 0.7763,
+      "step": 45410
+    },
+    {
+      "epoch": 1.796357452194032,
+      "grad_norm": 1.545437380981378,
+      "learning_rate": 4.152513579051718e-06,
+      "loss": 0.7718,
+      "step": 45420
+    },
+    {
+      "epoch": 1.796752951412921,
+      "grad_norm": 1.583606779428353,
+      "learning_rate": 4.150245969093353e-06,
+      "loss": 0.7611,
+      "step": 45430
+    },
+    {
+      "epoch": 1.79714845063181,
+      "grad_norm": 1.476474120051364,
+      "learning_rate": 4.147978539098334e-06,
+      "loss": 0.752,
+      "step": 45440
+    },
+    {
+      "epoch": 1.797543949850699,
+      "grad_norm": 1.5210462138085645,
+      "learning_rate": 4.1457112895468645e-06,
+      "loss": 0.7761,
+      "step": 45450
+    },
+    {
+      "epoch": 1.7979394490695881,
+      "grad_norm": 1.633826341873089,
+      "learning_rate": 4.143444220919107e-06,
+      "loss": 0.7721,
+      "step": 45460
+    },
+    {
+      "epoch": 1.7983349482884772,
+      "grad_norm": 1.7503841990183295,
+      "learning_rate": 4.141177333695188e-06,
+      "loss": 0.7897,
+      "step": 45470
+    },
+    {
+      "epoch": 1.7987304475073662,
+      "grad_norm": 1.2619864241037637,
+      "learning_rate": 4.138910628355197e-06,
+      "loss": 0.8,
+      "step": 45480
+    },
+    {
+      "epoch": 1.7991259467262553,
+      "grad_norm": 1.471301430485854,
+      "learning_rate": 4.136644105379182e-06,
+      "loss": 0.8015,
+      "step": 45490
+    },
+    {
+      "epoch": 1.7995214459451443,
+      "grad_norm": 1.4421259886120517,
+      "learning_rate": 4.134377765247155e-06,
+      "loss": 0.7605,
+      "step": 45500
+    },
+    {
+      "epoch": 1.7999169451640333,
+      "grad_norm": 1.3516067775719383,
+      "learning_rate": 4.132111608439087e-06,
+      "loss": 0.7841,
+      "step": 45510
+    },
+    {
+      "epoch": 1.8003124443829224,
+      "grad_norm": 1.2441439433322172,
+      "learning_rate": 4.129845635434911e-06,
+      "loss": 0.7508,
+      "step": 45520
+    },
+    {
+      "epoch": 1.8007079436018114,
+      "grad_norm": 1.4889357719088259,
+      "learning_rate": 4.127579846714522e-06,
+      "loss": 0.7765,
+      "step": 45530
+    },
+    {
+      "epoch": 1.8011034428207005,
+      "grad_norm": 1.557395453210787,
+      "learning_rate": 4.125314242757775e-06,
+      "loss": 0.757,
+      "step": 45540
+    },
+    {
+      "epoch": 1.8014989420395895,
+      "grad_norm": 1.3010898195447373,
+      "learning_rate": 4.123048824044486e-06,
+      "loss": 0.7742,
+      "step": 45550
+    },
+    {
+      "epoch": 1.8018944412584785,
+      "grad_norm": 1.1998803660136321,
+      "learning_rate": 4.120783591054433e-06,
+      "loss": 0.7582,
+      "step": 45560
+    },
+    {
+      "epoch": 1.8022899404773676,
+      "grad_norm": 1.5045054749468374,
+      "learning_rate": 4.118518544267353e-06,
+      "loss": 0.7387,
+      "step": 45570
+    },
+    {
+      "epoch": 1.8026854396962566,
+      "grad_norm": 1.387991288840058,
+      "learning_rate": 4.116253684162943e-06,
+      "loss": 0.7709,
+      "step": 45580
+    },
+    {
+      "epoch": 1.8030809389151456,
+      "grad_norm": 1.5643526758483042,
+      "learning_rate": 4.11398901122086e-06,
+      "loss": 0.7819,
+      "step": 45590
+    },
+    {
+      "epoch": 1.8034764381340347,
+      "grad_norm": 1.3172101138170522,
+      "learning_rate": 4.111724525920727e-06,
+      "loss": 0.765,
+      "step": 45600
+    },
+    {
+      "epoch": 1.8038719373529237,
+      "grad_norm": 1.4102617081286002,
+      "learning_rate": 4.1094602287421215e-06,
+      "loss": 0.7881,
+      "step": 45610
+    },
+    {
+      "epoch": 1.8042674365718128,
+      "grad_norm": 1.3142229000056214,
+      "learning_rate": 4.107196120164582e-06,
+      "loss": 0.7694,
+      "step": 45620
+    },
+    {
+      "epoch": 1.8046629357907018,
+      "grad_norm": 1.416473754856598,
+      "learning_rate": 4.104932200667609e-06,
+      "loss": 0.7653,
+      "step": 45630
+    },
+    {
+      "epoch": 1.8050584350095908,
+      "grad_norm": 1.2073096088392639,
+      "learning_rate": 4.10266847073066e-06,
+      "loss": 0.7834,
+      "step": 45640
+    },
+    {
+      "epoch": 1.8054539342284799,
+      "grad_norm": 1.8836674488829814,
+      "learning_rate": 4.1004049308331565e-06,
+      "loss": 0.7594,
+      "step": 45650
+    },
+    {
+      "epoch": 1.805849433447369,
+      "grad_norm": 1.3055936757301028,
+      "learning_rate": 4.098141581454477e-06,
+      "loss": 0.7665,
+      "step": 45660
+    },
+    {
+      "epoch": 1.806244932666258,
+      "grad_norm": 1.5364421268664765,
+      "learning_rate": 4.09587842307396e-06,
+      "loss": 0.7677,
+      "step": 45670
+    },
+    {
+      "epoch": 1.806640431885147,
+      "grad_norm": 1.4259537098748811,
+      "learning_rate": 4.0936154561709035e-06,
+      "loss": 0.78,
+      "step": 45680
+    },
+    {
+      "epoch": 1.807035931104036,
+      "grad_norm": 1.2407664826893434,
+      "learning_rate": 4.0913526812245655e-06,
+      "loss": 0.7984,
+      "step": 45690
+    },
+    {
+      "epoch": 1.807431430322925,
+      "grad_norm": 1.3636778782955388,
+      "learning_rate": 4.089090098714161e-06,
+      "loss": 0.8002,
+      "step": 45700
+    },
+    {
+      "epoch": 1.8078269295418141,
+      "grad_norm": 1.4363421582210136,
+      "learning_rate": 4.086827709118868e-06,
+      "loss": 0.7473,
+      "step": 45710
+    },
+    {
+      "epoch": 1.8082224287607032,
+      "grad_norm": 1.9586507531462394,
+      "learning_rate": 4.084565512917822e-06,
+      "loss": 0.765,
+      "step": 45720
+    },
+    {
+      "epoch": 1.8086179279795922,
+      "grad_norm": 1.5234385476542143,
+      "learning_rate": 4.082303510590117e-06,
+      "loss": 0.7729,
+      "step": 45730
+    },
+    {
+      "epoch": 1.8090134271984812,
+      "grad_norm": 1.5199325513116924,
+      "learning_rate": 4.080041702614807e-06,
+      "loss": 0.7838,
+      "step": 45740
+    },
+    {
+      "epoch": 1.8094089264173703,
+      "grad_norm": 1.3517545894513459,
+      "learning_rate": 4.077780089470902e-06,
+      "loss": 0.7764,
+      "step": 45750
+    },
+    {
+      "epoch": 1.8098044256362593,
+      "grad_norm": 1.5972210696439342,
+      "learning_rate": 4.075518671637375e-06,
+      "loss": 0.7623,
+      "step": 45760
+    },
+    {
+      "epoch": 1.8101999248551484,
+      "grad_norm": 1.6592142475372433,
+      "learning_rate": 4.073257449593156e-06,
+      "loss": 0.7642,
+      "step": 45770
+    },
+    {
+      "epoch": 1.8105954240740374,
+      "grad_norm": 1.269483600304672,
+      "learning_rate": 4.07099642381713e-06,
+      "loss": 0.7943,
+      "step": 45780
+    },
+    {
+      "epoch": 1.8109909232929264,
+      "grad_norm": 1.389191623328749,
+      "learning_rate": 4.068735594788146e-06,
+      "loss": 0.7757,
+      "step": 45790
+    },
+    {
+      "epoch": 1.8113864225118155,
+      "grad_norm": 1.5224690858989347,
+      "learning_rate": 4.066474962985009e-06,
+      "loss": 0.7543,
+      "step": 45800
+    },
+    {
+      "epoch": 1.8117819217307045,
+      "grad_norm": 1.5179014185853363,
+      "learning_rate": 4.064214528886484e-06,
+      "loss": 0.7494,
+      "step": 45810
+    },
+    {
+      "epoch": 1.8121774209495936,
+      "grad_norm": 1.5700708861766726,
+      "learning_rate": 4.061954292971287e-06,
+      "loss": 0.7741,
+      "step": 45820
+    },
+    {
+      "epoch": 1.8125729201684826,
+      "grad_norm": 1.2761813531353026,
+      "learning_rate": 4.0596942557181004e-06,
+      "loss": 0.7651,
+      "step": 45830
+    },
+    {
+      "epoch": 1.8129684193873716,
+      "grad_norm": 1.3097193073534474,
+      "learning_rate": 4.05743441760556e-06,
+      "loss": 0.7838,
+      "step": 45840
+    },
+    {
+      "epoch": 1.8133639186062607,
+      "grad_norm": 1.5033990406958209,
+      "learning_rate": 4.055174779112262e-06,
+      "loss": 0.7689,
+      "step": 45850
+    },
+    {
+      "epoch": 1.8137594178251497,
+      "grad_norm": 1.496448182208677,
+      "learning_rate": 4.05291534071676e-06,
+      "loss": 0.7744,
+      "step": 45860
+    },
+    {
+      "epoch": 1.8141549170440388,
+      "grad_norm": 1.619399835825619,
+      "learning_rate": 4.050656102897562e-06,
+      "loss": 0.7584,
+      "step": 45870
+    },
+    {
+      "epoch": 1.8145504162629278,
+      "grad_norm": 1.4993273938849707,
+      "learning_rate": 4.048397066133138e-06,
+      "loss": 0.8094,
+      "step": 45880
+    },
+    {
+      "epoch": 1.8149459154818168,
+      "grad_norm": 1.521287738985428,
+      "learning_rate": 4.0461382309019114e-06,
+      "loss": 0.7569,
+      "step": 45890
+    },
+    {
+      "epoch": 1.8153414147007059,
+      "grad_norm": 1.5487038465547813,
+      "learning_rate": 4.043879597682266e-06,
+      "loss": 0.7746,
+      "step": 45900
+    },
+    {
+      "epoch": 1.815736913919595,
+      "grad_norm": 1.3562951761395383,
+      "learning_rate": 4.041621166952542e-06,
+      "loss": 0.7766,
+      "step": 45910
+    },
+    {
+      "epoch": 1.816132413138484,
+      "grad_norm": 1.412088105457243,
+      "learning_rate": 4.039362939191036e-06,
+      "loss": 0.7628,
+      "step": 45920
+    },
+    {
+      "epoch": 1.816527912357373,
+      "grad_norm": 1.2410648810785314,
+      "learning_rate": 4.037104914876001e-06,
+      "loss": 0.7808,
+      "step": 45930
+    },
+    {
+      "epoch": 1.816923411576262,
+      "grad_norm": 1.38406206806233,
+      "learning_rate": 4.0348470944856496e-06,
+      "loss": 0.7722,
+      "step": 45940
+    },
+    {
+      "epoch": 1.817318910795151,
+      "grad_norm": 1.474588730130419,
+      "learning_rate": 4.032589478498147e-06,
+      "loss": 0.7692,
+      "step": 45950
+    },
+    {
+      "epoch": 1.81771441001404,
+      "grad_norm": 1.4136768043549202,
+      "learning_rate": 4.0303320673916195e-06,
+      "loss": 0.7991,
+      "step": 45960
+    },
+    {
+      "epoch": 1.8181099092329291,
+      "grad_norm": 1.4874282697046477,
+      "learning_rate": 4.028074861644149e-06,
+      "loss": 0.7372,
+      "step": 45970
+    },
+    {
+      "epoch": 1.8185054084518182,
+      "grad_norm": 1.3734093413076005,
+      "learning_rate": 4.025817861733769e-06,
+      "loss": 0.7671,
+      "step": 45980
+    },
+    {
+      "epoch": 1.8189009076707072,
+      "grad_norm": 1.2770383275104966,
+      "learning_rate": 4.023561068138478e-06,
+      "loss": 0.7741,
+      "step": 45990
+    },
+    {
+      "epoch": 1.8192964068895963,
+      "grad_norm": 1.5258026474440733,
+      "learning_rate": 4.0213044813362225e-06,
+      "loss": 0.7617,
+      "step": 46000
+    },
+    {
+      "epoch": 1.8196919061084853,
+      "grad_norm": 1.4470186437379458,
+      "learning_rate": 4.0190481018049116e-06,
+      "loss": 0.8073,
+      "step": 46010
+    },
+    {
+      "epoch": 1.8200874053273743,
+      "grad_norm": 1.4227686776566977,
+      "learning_rate": 4.016791930022407e-06,
+      "loss": 0.7746,
+      "step": 46020
+    },
+    {
+      "epoch": 1.8204829045462634,
+      "grad_norm": 1.536187426490688,
+      "learning_rate": 4.014535966466526e-06,
+      "loss": 0.7605,
+      "step": 46030
+    },
+    {
+      "epoch": 1.8208784037651524,
+      "grad_norm": 1.2772171551252087,
+      "learning_rate": 4.012280211615046e-06,
+      "loss": 0.774,
+      "step": 46040
+    },
+    {
+      "epoch": 1.8212739029840415,
+      "grad_norm": 1.3208669900956689,
+      "learning_rate": 4.010024665945693e-06,
+      "loss": 0.765,
+      "step": 46050
+    },
+    {
+      "epoch": 1.8216694022029305,
+      "grad_norm": 1.6056383013665025,
+      "learning_rate": 4.0077693299361594e-06,
+      "loss": 0.7331,
+      "step": 46060
+    },
+    {
+      "epoch": 1.8220649014218195,
+      "grad_norm": 1.773901275603768,
+      "learning_rate": 4.00551420406408e-06,
+      "loss": 0.7712,
+      "step": 46070
+    },
+    {
+      "epoch": 1.8224604006407086,
+      "grad_norm": 1.298633913311332,
+      "learning_rate": 4.003259288807055e-06,
+      "loss": 0.7781,
+      "step": 46080
+    },
+    {
+      "epoch": 1.8228558998595976,
+      "grad_norm": 1.3544488286149823,
+      "learning_rate": 4.001004584642635e-06,
+      "loss": 0.7984,
+      "step": 46090
+    },
+    {
+      "epoch": 1.8232513990784867,
+      "grad_norm": 1.3814133108976563,
+      "learning_rate": 3.998750092048329e-06,
+      "loss": 0.7691,
+      "step": 46100
+    },
+    {
+      "epoch": 1.823646898297376,
+      "grad_norm": 1.4582808095956161,
+      "learning_rate": 3.996495811501601e-06,
+      "loss": 0.7608,
+      "step": 46110
+    },
+    {
+      "epoch": 1.824042397516265,
+      "grad_norm": 1.5000079919750775,
+      "learning_rate": 3.994241743479867e-06,
+      "loss": 0.7661,
+      "step": 46120
+    },
+    {
+      "epoch": 1.824437896735154,
+      "grad_norm": 1.6103539859038203,
+      "learning_rate": 3.9919878884605015e-06,
+      "loss": 0.7783,
+      "step": 46130
+    },
+    {
+      "epoch": 1.824833395954043,
+      "grad_norm": 1.3782951038814135,
+      "learning_rate": 3.989734246920831e-06,
+      "loss": 0.7523,
+      "step": 46140
+    },
+    {
+      "epoch": 1.825228895172932,
+      "grad_norm": 1.6617487897801164,
+      "learning_rate": 3.987480819338141e-06,
+      "loss": 0.753,
+      "step": 46150
+    },
+    {
+      "epoch": 1.8256243943918211,
+      "grad_norm": 1.2456127558127321,
+      "learning_rate": 3.985227606189665e-06,
+      "loss": 0.7711,
+      "step": 46160
+    },
+    {
+      "epoch": 1.8260198936107102,
+      "grad_norm": 1.4248525267395948,
+      "learning_rate": 3.9829746079525975e-06,
+      "loss": 0.786,
+      "step": 46170
+    },
+    {
+      "epoch": 1.8264153928295992,
+      "grad_norm": 1.5509402017837948,
+      "learning_rate": 3.980721825104085e-06,
+      "loss": 0.7928,
+      "step": 46180
+    },
+    {
+      "epoch": 1.8268108920484882,
+      "grad_norm": 1.2819042373879894,
+      "learning_rate": 3.978469258121225e-06,
+      "loss": 0.7556,
+      "step": 46190
+    },
+    {
+      "epoch": 1.8272063912673773,
+      "grad_norm": 1.5321893336095616,
+      "learning_rate": 3.976216907481076e-06,
+      "loss": 0.7541,
+      "step": 46200
+    },
+    {
+      "epoch": 1.8276018904862663,
+      "grad_norm": 1.4560866364015026,
+      "learning_rate": 3.973964773660649e-06,
+      "loss": 0.777,
+      "step": 46210
+    },
+    {
+      "epoch": 1.8279973897051554,
+      "grad_norm": 1.4638563114993728,
+      "learning_rate": 3.971712857136902e-06,
+      "loss": 0.7808,
+      "step": 46220
+    },
+    {
+      "epoch": 1.8283928889240444,
+      "grad_norm": 1.27753349151783,
+      "learning_rate": 3.969461158386755e-06,
+      "loss": 0.7855,
+      "step": 46230
+    },
+    {
+      "epoch": 1.8287883881429334,
+      "grad_norm": 1.2601120346826225,
+      "learning_rate": 3.967209677887079e-06,
+      "loss": 0.8116,
+      "step": 46240
+    },
+    {
+      "epoch": 1.8291838873618225,
+      "grad_norm": 1.3532146278018007,
+      "learning_rate": 3.9649584161147e-06,
+      "loss": 0.8207,
+      "step": 46250
+    },
+    {
+      "epoch": 1.8295793865807115,
+      "grad_norm": 1.7483432492532731,
+      "learning_rate": 3.962707373546396e-06,
+      "loss": 0.7348,
+      "step": 46260
+    },
+    {
+      "epoch": 1.8299748857996005,
+      "grad_norm": 1.2177485797370824,
+      "learning_rate": 3.960456550658899e-06,
+      "loss": 0.7622,
+      "step": 46270
+    },
+    {
+      "epoch": 1.8303703850184896,
+      "grad_norm": 1.542469264649917,
+      "learning_rate": 3.958205947928895e-06,
+      "loss": 0.7644,
+      "step": 46280
+    },
+    {
+      "epoch": 1.8307658842373786,
+      "grad_norm": 1.5596861154038129,
+      "learning_rate": 3.9559555658330226e-06,
+      "loss": 0.7581,
+      "step": 46290
+    },
+    {
+      "epoch": 1.8311613834562677,
+      "grad_norm": 1.4021852589195147,
+      "learning_rate": 3.953705404847877e-06,
+      "loss": 0.7863,
+      "step": 46300
+    },
+    {
+      "epoch": 1.8315568826751567,
+      "grad_norm": 1.237541100184159,
+      "learning_rate": 3.951455465449999e-06,
+      "loss": 0.7796,
+      "step": 46310
+    },
+    {
+      "epoch": 1.8319523818940457,
+      "grad_norm": 1.291996208592542,
+      "learning_rate": 3.9492057481158905e-06,
+      "loss": 0.7742,
+      "step": 46320
+    },
+    {
+      "epoch": 1.8323478811129348,
+      "grad_norm": 1.705638717812554,
+      "learning_rate": 3.946956253322001e-06,
+      "loss": 0.7696,
+      "step": 46330
+    },
+    {
+      "epoch": 1.8327433803318238,
+      "grad_norm": 1.3042719117786041,
+      "learning_rate": 3.9447069815447365e-06,
+      "loss": 0.785,
+      "step": 46340
+    },
+    {
+      "epoch": 1.8331388795507129,
+      "grad_norm": 1.6320920785234616,
+      "learning_rate": 3.942457933260454e-06,
+      "loss": 0.7681,
+      "step": 46350
+    },
+    {
+      "epoch": 1.833534378769602,
+      "grad_norm": 1.5755890690491232,
+      "learning_rate": 3.940209108945463e-06,
+      "loss": 0.7621,
+      "step": 46360
+    },
+    {
+      "epoch": 1.833929877988491,
+      "grad_norm": 1.3850454525456801,
+      "learning_rate": 3.937960509076026e-06,
+      "loss": 0.7994,
+      "step": 46370
+    },
+    {
+      "epoch": 1.83432537720738,
+      "grad_norm": 1.2858976057023412,
+      "learning_rate": 3.935712134128359e-06,
+      "loss": 0.7849,
+      "step": 46380
+    },
+    {
+      "epoch": 1.834720876426269,
+      "grad_norm": 1.2661530575212854,
+      "learning_rate": 3.933463984578629e-06,
+      "loss": 0.7605,
+      "step": 46390
+    },
+    {
+      "epoch": 1.835116375645158,
+      "grad_norm": 1.5757550643874016,
+      "learning_rate": 3.931216060902953e-06,
+      "loss": 0.7743,
+      "step": 46400
+    },
+    {
+      "epoch": 1.835511874864047,
+      "grad_norm": 1.3706600697486169,
+      "learning_rate": 3.928968363577406e-06,
+      "loss": 0.7776,
+      "step": 46410
+    },
+    {
+      "epoch": 1.8359073740829361,
+      "grad_norm": 1.2600157671615326,
+      "learning_rate": 3.9267208930780095e-06,
+      "loss": 0.7691,
+      "step": 46420
+    },
+    {
+      "epoch": 1.8363028733018252,
+      "grad_norm": 1.297255437228757,
+      "learning_rate": 3.924473649880742e-06,
+      "loss": 0.7731,
+      "step": 46430
+    },
+    {
+      "epoch": 1.8366983725207142,
+      "grad_norm": 1.649512152772169,
+      "learning_rate": 3.922226634461529e-06,
+      "loss": 0.7482,
+      "step": 46440
+    },
+    {
+      "epoch": 1.8370938717396035,
+      "grad_norm": 1.547920602594299,
+      "learning_rate": 3.919979847296249e-06,
+      "loss": 0.7548,
+      "step": 46450
+    },
+    {
+      "epoch": 1.8374893709584925,
+      "grad_norm": 1.5427777863591599,
+      "learning_rate": 3.917733288860735e-06,
+      "loss": 0.7704,
+      "step": 46460
+    },
+    {
+      "epoch": 1.8378848701773816,
+      "grad_norm": 1.603914690257776,
+      "learning_rate": 3.9154869596307675e-06,
+      "loss": 0.7555,
+      "step": 46470
+    },
+    {
+      "epoch": 1.8382803693962706,
+      "grad_norm": 1.2338832587372544,
+      "learning_rate": 3.913240860082083e-06,
+      "loss": 0.745,
+      "step": 46480
+    },
+    {
+      "epoch": 1.8386758686151596,
+      "grad_norm": 1.3747284062494922,
+      "learning_rate": 3.910994990690366e-06,
+      "loss": 0.7533,
+      "step": 46490
+    },
+    {
+      "epoch": 1.8390713678340487,
+      "grad_norm": 1.5156644275398272,
+      "learning_rate": 3.908749351931251e-06,
+      "loss": 0.7866,
+      "step": 46500
+    },
+    {
+      "epoch": 1.8394668670529377,
+      "grad_norm": 1.3922074450804318,
+      "learning_rate": 3.9065039442803295e-06,
+      "loss": 0.781,
+      "step": 46510
+    },
+    {
+      "epoch": 1.8398623662718268,
+      "grad_norm": 1.4450779243915337,
+      "learning_rate": 3.9042587682131385e-06,
+      "loss": 0.7567,
+      "step": 46520
+    },
+    {
+      "epoch": 1.8402578654907158,
+      "grad_norm": 1.3973409135576254,
+      "learning_rate": 3.902013824205168e-06,
+      "loss": 0.7841,
+      "step": 46530
+    },
+    {
+      "epoch": 1.8406533647096048,
+      "grad_norm": 1.5687146319956817,
+      "learning_rate": 3.899769112731858e-06,
+      "loss": 0.7724,
+      "step": 46540
+    },
+    {
+      "epoch": 1.8410488639284939,
+      "grad_norm": 1.3325344548524891,
+      "learning_rate": 3.897524634268603e-06,
+      "loss": 0.7871,
+      "step": 46550
+    },
+    {
+      "epoch": 1.841444363147383,
+      "grad_norm": 1.4403765630798755,
+      "learning_rate": 3.89528038929074e-06,
+      "loss": 0.7841,
+      "step": 46560
+    },
+    {
+      "epoch": 1.841839862366272,
+      "grad_norm": 1.4242968179904358,
+      "learning_rate": 3.893036378273565e-06,
+      "loss": 0.7734,
+      "step": 46570
+    },
+    {
+      "epoch": 1.842235361585161,
+      "grad_norm": 1.490809391329131,
+      "learning_rate": 3.8907926016923205e-06,
+      "loss": 0.7643,
+      "step": 46580
+    },
+    {
+      "epoch": 1.84263086080405,
+      "grad_norm": 1.3295000751714796,
+      "learning_rate": 3.888549060022199e-06,
+      "loss": 0.7707,
+      "step": 46590
+    },
+    {
+      "epoch": 1.843026360022939,
+      "grad_norm": 1.1283752203481403,
+      "learning_rate": 3.8863057537383455e-06,
+      "loss": 0.7929,
+      "step": 46600
+    },
+    {
+      "epoch": 1.843421859241828,
+      "grad_norm": 1.2135571881303013,
+      "learning_rate": 3.8840626833158536e-06,
+      "loss": 0.7924,
+      "step": 46610
+    },
+    {
+      "epoch": 1.8438173584607171,
+      "grad_norm": 1.3588187301583048,
+      "learning_rate": 3.881819849229767e-06,
+      "loss": 0.788,
+      "step": 46620
+    },
+    {
+      "epoch": 1.8442128576796062,
+      "grad_norm": 1.5012704043770022,
+      "learning_rate": 3.879577251955079e-06,
+      "loss": 0.7538,
+      "step": 46630
+    },
+    {
+      "epoch": 1.8446083568984952,
+      "grad_norm": 1.2883755124542555,
+      "learning_rate": 3.8773348919667345e-06,
+      "loss": 0.7734,
+      "step": 46640
+    },
+    {
+      "epoch": 1.8450038561173843,
+      "grad_norm": 1.3574950782424573,
+      "learning_rate": 3.875092769739625e-06,
+      "loss": 0.7502,
+      "step": 46650
+    },
+    {
+      "epoch": 1.8453993553362733,
+      "grad_norm": 1.2512945178200172,
+      "learning_rate": 3.872850885748595e-06,
+      "loss": 0.7719,
+      "step": 46660
+    },
+    {
+      "epoch": 1.8457948545551623,
+      "grad_norm": 1.3725857682025175,
+      "learning_rate": 3.870609240468438e-06,
+      "loss": 0.7577,
+      "step": 46670
+    },
+    {
+      "epoch": 1.8461903537740514,
+      "grad_norm": 1.476246384778804,
+      "learning_rate": 3.868367834373895e-06,
+      "loss": 0.7703,
+      "step": 46680
+    },
+    {
+      "epoch": 1.8465858529929404,
+      "grad_norm": 1.249788109236149,
+      "learning_rate": 3.866126667939657e-06,
+      "loss": 0.7842,
+      "step": 46690
+    },
+    {
+      "epoch": 1.8469813522118295,
+      "grad_norm": 1.3699958036186077,
+      "learning_rate": 3.863885741640364e-06,
+      "loss": 0.7629,
+      "step": 46700
+    },
+    {
+      "epoch": 1.8473768514307185,
+      "grad_norm": 1.2344044725237173,
+      "learning_rate": 3.8616450559506065e-06,
+      "loss": 0.792,
+      "step": 46710
+    },
+    {
+      "epoch": 1.8477723506496075,
+      "grad_norm": 1.600299718541322,
+      "learning_rate": 3.859404611344925e-06,
+      "loss": 0.7401,
+      "step": 46720
+    },
+    {
+      "epoch": 1.8481678498684966,
+      "grad_norm": 1.3469177550692348,
+      "learning_rate": 3.8571644082978055e-06,
+      "loss": 0.8,
+      "step": 46730
+    },
+    {
+      "epoch": 1.8485633490873856,
+      "grad_norm": 1.822731952582551,
+      "learning_rate": 3.8549244472836845e-06,
+      "loss": 0.7625,
+      "step": 46740
+    },
+    {
+      "epoch": 1.8489588483062747,
+      "grad_norm": 1.4941439596833723,
+      "learning_rate": 3.852684728776948e-06,
+      "loss": 0.7868,
+      "step": 46750
+    },
+    {
+      "epoch": 1.8493543475251637,
+      "grad_norm": 1.2735700977569417,
+      "learning_rate": 3.85044525325193e-06,
+      "loss": 0.7802,
+      "step": 46760
+    },
+    {
+      "epoch": 1.8497498467440527,
+      "grad_norm": 1.6673522163069612,
+      "learning_rate": 3.848206021182913e-06,
+      "loss": 0.755,
+      "step": 46770
+    },
+    {
+      "epoch": 1.8501453459629418,
+      "grad_norm": 1.5837285639524838,
+      "learning_rate": 3.845967033044128e-06,
+      "loss": 0.7403,
+      "step": 46780
+    },
+    {
+      "epoch": 1.8505408451818308,
+      "grad_norm": 1.3251871856610935,
+      "learning_rate": 3.843728289309756e-06,
+      "loss": 0.7474,
+      "step": 46790
+    },
+    {
+      "epoch": 1.8509363444007199,
+      "grad_norm": 1.876480145570574,
+      "learning_rate": 3.8414897904539216e-06,
+      "loss": 0.7367,
+      "step": 46800
+    },
+    {
+      "epoch": 1.851331843619609,
+      "grad_norm": 1.4835877326385254,
+      "learning_rate": 3.8392515369507015e-06,
+      "loss": 0.7512,
+      "step": 46810
+    },
+    {
+      "epoch": 1.851727342838498,
+      "grad_norm": 1.6262707068358329,
+      "learning_rate": 3.8370135292741195e-06,
+      "loss": 0.7529,
+      "step": 46820
+    },
+    {
+      "epoch": 1.852122842057387,
+      "grad_norm": 1.4842564967867224,
+      "learning_rate": 3.834775767898148e-06,
+      "loss": 0.7428,
+      "step": 46830
+    },
+    {
+      "epoch": 1.852518341276276,
+      "grad_norm": 1.2023274939663307,
+      "learning_rate": 3.8325382532967045e-06,
+      "loss": 0.7732,
+      "step": 46840
+    },
+    {
+      "epoch": 1.852913840495165,
+      "grad_norm": 1.6824630189874736,
+      "learning_rate": 3.830300985943659e-06,
+      "loss": 0.7824,
+      "step": 46850
+    },
+    {
+      "epoch": 1.853309339714054,
+      "grad_norm": 1.3907747321127248,
+      "learning_rate": 3.828063966312827e-06,
+      "loss": 0.7694,
+      "step": 46860
+    },
+    {
+      "epoch": 1.8537048389329431,
+      "grad_norm": 1.3473232480616073,
+      "learning_rate": 3.825827194877967e-06,
+      "loss": 0.766,
+      "step": 46870
+    },
+    {
+      "epoch": 1.8541003381518322,
+      "grad_norm": 1.2997042914062478,
+      "learning_rate": 3.823590672112791e-06,
+      "loss": 0.7632,
+      "step": 46880
+    },
+    {
+      "epoch": 1.8544958373707212,
+      "grad_norm": 1.5027026063239883,
+      "learning_rate": 3.821354398490956e-06,
+      "loss": 0.7734,
+      "step": 46890
+    },
+    {
+      "epoch": 1.8548913365896103,
+      "grad_norm": 1.3219458775293302,
+      "learning_rate": 3.819118374486067e-06,
+      "loss": 0.7431,
+      "step": 46900
+    },
+    {
+      "epoch": 1.8552868358084993,
+      "grad_norm": 1.693861508471388,
+      "learning_rate": 3.816882600571675e-06,
+      "loss": 0.7784,
+      "step": 46910
+    },
+    {
+      "epoch": 1.8556823350273883,
+      "grad_norm": 1.4560999861874828,
+      "learning_rate": 3.814647077221281e-06,
+      "loss": 0.762,
+      "step": 46920
+    },
+    {
+      "epoch": 1.8560778342462774,
+      "grad_norm": 1.2969110838173135,
+      "learning_rate": 3.8124118049083257e-06,
+      "loss": 0.7684,
+      "step": 46930
+    },
+    {
+      "epoch": 1.8564733334651664,
+      "grad_norm": 1.3325445869781007,
+      "learning_rate": 3.810176784106205e-06,
+      "loss": 0.7598,
+      "step": 46940
+    },
+    {
+      "epoch": 1.8568688326840554,
+      "grad_norm": 1.441763564000196,
+      "learning_rate": 3.807942015288257e-06,
+      "loss": 0.7453,
+      "step": 46950
+    },
+    {
+      "epoch": 1.8572643319029445,
+      "grad_norm": 1.3562392166698485,
+      "learning_rate": 3.8057074989277676e-06,
+      "loss": 0.7985,
+      "step": 46960
+    },
+    {
+      "epoch": 1.8576598311218335,
+      "grad_norm": 1.3740741191197638,
+      "learning_rate": 3.8034732354979686e-06,
+      "loss": 0.7922,
+      "step": 46970
+    },
+    {
+      "epoch": 1.8580553303407226,
+      "grad_norm": 1.4661039861626741,
+      "learning_rate": 3.801239225472039e-06,
+      "loss": 0.7389,
+      "step": 46980
+    },
+    {
+      "epoch": 1.8584508295596116,
+      "grad_norm": 1.551889864743315,
+      "learning_rate": 3.7990054693231047e-06,
+      "loss": 0.7849,
+      "step": 46990
+    },
+    {
+      "epoch": 1.8588463287785006,
+      "grad_norm": 1.5807503772397271,
+      "learning_rate": 3.7967719675242366e-06,
+      "loss": 0.7573,
+      "step": 47000
+    },
+    {
+      "epoch": 1.8592418279973897,
+      "grad_norm": 1.569689675277944,
+      "learning_rate": 3.7945387205484514e-06,
+      "loss": 0.7649,
+      "step": 47010
+    },
+    {
+      "epoch": 1.8596373272162787,
+      "grad_norm": 1.654889404618668,
+      "learning_rate": 3.7923057288687125e-06,
+      "loss": 0.7642,
+      "step": 47020
+    },
+    {
+      "epoch": 1.8600328264351678,
+      "grad_norm": 1.5507912584625825,
+      "learning_rate": 3.7900729929579305e-06,
+      "loss": 0.7437,
+      "step": 47030
+    },
+    {
+      "epoch": 1.8604283256540568,
+      "grad_norm": 1.3285720838303194,
+      "learning_rate": 3.7878405132889618e-06,
+      "loss": 0.7667,
+      "step": 47040
+    },
+    {
+      "epoch": 1.8608238248729458,
+      "grad_norm": 1.5652573592522234,
+      "learning_rate": 3.7856082903346034e-06,
+      "loss": 0.7577,
+      "step": 47050
+    },
+    {
+      "epoch": 1.8612193240918349,
+      "grad_norm": 1.33809875754651,
+      "learning_rate": 3.7833763245676037e-06,
+      "loss": 0.7662,
+      "step": 47060
+    },
+    {
+      "epoch": 1.861614823310724,
+      "grad_norm": 1.5871729801949934,
+      "learning_rate": 3.7811446164606552e-06,
+      "loss": 0.7732,
+      "step": 47070
+    },
+    {
+      "epoch": 1.862010322529613,
+      "grad_norm": 1.242156028538668,
+      "learning_rate": 3.7789131664863956e-06,
+      "loss": 0.7835,
+      "step": 47080
+    },
+    {
+      "epoch": 1.862405821748502,
+      "grad_norm": 1.41531552062675,
+      "learning_rate": 3.776681975117408e-06,
+      "loss": 0.7642,
+      "step": 47090
+    },
+    {
+      "epoch": 1.862801320967391,
+      "grad_norm": 1.6408357012460044,
+      "learning_rate": 3.7744510428262193e-06,
+      "loss": 0.756,
+      "step": 47100
+    },
+    {
+      "epoch": 1.86319682018628,
+      "grad_norm": 1.3605624997799788,
+      "learning_rate": 3.7722203700853026e-06,
+      "loss": 0.7624,
+      "step": 47110
+    },
+    {
+      "epoch": 1.8635923194051691,
+      "grad_norm": 1.3389985496842347,
+      "learning_rate": 3.769989957367078e-06,
+      "loss": 0.753,
+      "step": 47120
+    },
+    {
+      "epoch": 1.8639878186240582,
+      "grad_norm": 1.2212417677663336,
+      "learning_rate": 3.767759805143907e-06,
+      "loss": 0.7501,
+      "step": 47130
+    },
+    {
+      "epoch": 1.8643833178429472,
+      "grad_norm": 1.767624420110444,
+      "learning_rate": 3.7655299138880986e-06,
+      "loss": 0.7823,
+      "step": 47140
+    },
+    {
+      "epoch": 1.8647788170618362,
+      "grad_norm": 1.61737452316488,
+      "learning_rate": 3.7633002840719044e-06,
+      "loss": 0.7735,
+      "step": 47150
+    },
+    {
+      "epoch": 1.8651743162807253,
+      "grad_norm": 1.4173678641166108,
+      "learning_rate": 3.7610709161675264e-06,
+      "loss": 0.7577,
+      "step": 47160
+    },
+    {
+      "epoch": 1.8655698154996143,
+      "grad_norm": 1.7781541040998885,
+      "learning_rate": 3.758841810647099e-06,
+      "loss": 0.7678,
+      "step": 47170
+    },
+    {
+      "epoch": 1.8659653147185034,
+      "grad_norm": 1.3133033382571648,
+      "learning_rate": 3.7566129679827135e-06,
+      "loss": 0.7571,
+      "step": 47180
+    },
+    {
+      "epoch": 1.8663608139373924,
+      "grad_norm": 1.8589502526553208,
+      "learning_rate": 3.7543843886463993e-06,
+      "loss": 0.7625,
+      "step": 47190
+    },
+    {
+      "epoch": 1.8667563131562814,
+      "grad_norm": 1.5112430406836406,
+      "learning_rate": 3.752156073110131e-06,
+      "loss": 0.7553,
+      "step": 47200
+    },
+    {
+      "epoch": 1.8671518123751705,
+      "grad_norm": 1.5033706968451628,
+      "learning_rate": 3.7499280218458282e-06,
+      "loss": 0.7532,
+      "step": 47210
+    },
+    {
+      "epoch": 1.8675473115940595,
+      "grad_norm": 1.4677868973736172,
+      "learning_rate": 3.7477002353253545e-06,
+      "loss": 0.7656,
+      "step": 47220
+    },
+    {
+      "epoch": 1.8679428108129486,
+      "grad_norm": 1.5521075463754603,
+      "learning_rate": 3.7454727140205154e-06,
+      "loss": 0.7451,
+      "step": 47230
+    },
+    {
+      "epoch": 1.8683383100318376,
+      "grad_norm": 1.450933463574836,
+      "learning_rate": 3.743245458403063e-06,
+      "loss": 0.7338,
+      "step": 47240
+    },
+    {
+      "epoch": 1.8687338092507266,
+      "grad_norm": 1.2575614759195324,
+      "learning_rate": 3.741018468944692e-06,
+      "loss": 0.7627,
+      "step": 47250
+    },
+    {
+      "epoch": 1.8691293084696157,
+      "grad_norm": 1.602739832028912,
+      "learning_rate": 3.7387917461170396e-06,
+      "loss": 0.7489,
+      "step": 47260
+    },
+    {
+      "epoch": 1.8695248076885047,
+      "grad_norm": 1.4371462164398505,
+      "learning_rate": 3.7365652903916892e-06,
+      "loss": 0.7492,
+      "step": 47270
+    },
+    {
+      "epoch": 1.8699203069073937,
+      "grad_norm": 1.466234772809852,
+      "learning_rate": 3.7343391022401653e-06,
+      "loss": 0.774,
+      "step": 47280
+    },
+    {
+      "epoch": 1.8703158061262828,
+      "grad_norm": 1.3825117242074576,
+      "learning_rate": 3.732113182133935e-06,
+      "loss": 0.7547,
+      "step": 47290
+    },
+    {
+      "epoch": 1.8707113053451718,
+      "grad_norm": 1.378978237589993,
+      "learning_rate": 3.729887530544411e-06,
+      "loss": 0.7558,
+      "step": 47300
+    },
+    {
+      "epoch": 1.8711068045640609,
+      "grad_norm": 1.3327701163729486,
+      "learning_rate": 3.7276621479429475e-06,
+      "loss": 0.7589,
+      "step": 47310
+    },
+    {
+      "epoch": 1.87150230378295,
+      "grad_norm": 1.5149515703076581,
+      "learning_rate": 3.725437034800844e-06,
+      "loss": 0.7439,
+      "step": 47320
+    },
+    {
+      "epoch": 1.871897803001839,
+      "grad_norm": 1.5615731571460778,
+      "learning_rate": 3.7232121915893414e-06,
+      "loss": 0.7734,
+      "step": 47330
+    },
+    {
+      "epoch": 1.872293302220728,
+      "grad_norm": 1.4618045624031561,
+      "learning_rate": 3.720987618779621e-06,
+      "loss": 0.7632,
+      "step": 47340
+    },
+    {
+      "epoch": 1.872688801439617,
+      "grad_norm": 1.5872922801959648,
+      "learning_rate": 3.718763316842811e-06,
+      "loss": 0.7646,
+      "step": 47350
+    },
+    {
+      "epoch": 1.873084300658506,
+      "grad_norm": 1.2933564937170532,
+      "learning_rate": 3.71653928624998e-06,
+      "loss": 0.7731,
+      "step": 47360
+    },
+    {
+      "epoch": 1.873479799877395,
+      "grad_norm": 1.334385568275373,
+      "learning_rate": 3.71431552747214e-06,
+      "loss": 0.7692,
+      "step": 47370
+    },
+    {
+      "epoch": 1.8738752990962841,
+      "grad_norm": 1.2515286619272854,
+      "learning_rate": 3.712092040980244e-06,
+      "loss": 0.7519,
+      "step": 47380
+    },
+    {
+      "epoch": 1.8742707983151732,
+      "grad_norm": 1.4741879738673807,
+      "learning_rate": 3.7098688272451893e-06,
+      "loss": 0.7568,
+      "step": 47390
+    },
+    {
+      "epoch": 1.8746662975340622,
+      "grad_norm": 1.4057151664095175,
+      "learning_rate": 3.707645886737814e-06,
+      "loss": 0.7701,
+      "step": 47400
+    },
+    {
+      "epoch": 1.8750617967529513,
+      "grad_norm": 1.3707203368565195,
+      "learning_rate": 3.705423219928902e-06,
+      "loss": 0.784,
+      "step": 47410
+    },
+    {
+      "epoch": 1.8754572959718403,
+      "grad_norm": 1.2574671352759665,
+      "learning_rate": 3.70320082728917e-06,
+      "loss": 0.7908,
+      "step": 47420
+    },
+    {
+      "epoch": 1.8758527951907293,
+      "grad_norm": 1.5146159927892748,
+      "learning_rate": 3.7009787092892863e-06,
+      "loss": 0.7476,
+      "step": 47430
+    },
+    {
+      "epoch": 1.8762482944096184,
+      "grad_norm": 1.6680939106558197,
+      "learning_rate": 3.698756866399857e-06,
+      "loss": 0.7459,
+      "step": 47440
+    },
+    {
+      "epoch": 1.8766437936285074,
+      "grad_norm": 1.434283660416888,
+      "learning_rate": 3.6965352990914295e-06,
+      "loss": 0.7574,
+      "step": 47450
+    },
+    {
+      "epoch": 1.8770392928473967,
+      "grad_norm": 1.3387187797510527,
+      "learning_rate": 3.694314007834495e-06,
+      "loss": 0.75,
+      "step": 47460
+    },
+    {
+      "epoch": 1.8774347920662857,
+      "grad_norm": 1.5593558973189203,
+      "learning_rate": 3.692092993099484e-06,
+      "loss": 0.7484,
+      "step": 47470
+    },
+    {
+      "epoch": 1.8778302912851748,
+      "grad_norm": 1.3467444091179797,
+      "learning_rate": 3.6898722553567706e-06,
+      "loss": 0.761,
+      "step": 47480
+    },
+    {
+      "epoch": 1.8782257905040638,
+      "grad_norm": 1.2569072830455288,
+      "learning_rate": 3.6876517950766675e-06,
+      "loss": 0.7695,
+      "step": 47490
+    },
+    {
+      "epoch": 1.8786212897229528,
+      "grad_norm": 1.1683779265078604,
+      "learning_rate": 3.685431612729431e-06,
+      "loss": 0.7866,
+      "step": 47500
+    },
+    {
+      "epoch": 1.8790167889418419,
+      "grad_norm": 1.534253366707865,
+      "learning_rate": 3.6832117087852587e-06,
+      "loss": 0.7629,
+      "step": 47510
+    },
+    {
+      "epoch": 1.879412288160731,
+      "grad_norm": 1.6231268002130992,
+      "learning_rate": 3.6809920837142853e-06,
+      "loss": 0.7495,
+      "step": 47520
+    },
+    {
+      "epoch": 1.87980778737962,
+      "grad_norm": 1.5771848540727516,
+      "learning_rate": 3.6787727379865934e-06,
+      "loss": 0.7445,
+      "step": 47530
+    },
+    {
+      "epoch": 1.880203286598509,
+      "grad_norm": 1.7399216172378273,
+      "learning_rate": 3.676553672072198e-06,
+      "loss": 0.7316,
+      "step": 47540
+    },
+    {
+      "epoch": 1.880598785817398,
+      "grad_norm": 1.676484051376917,
+      "learning_rate": 3.674334886441061e-06,
+      "loss": 0.7674,
+      "step": 47550
+    },
+    {
+      "epoch": 1.880994285036287,
+      "grad_norm": 1.311049970554493,
+      "learning_rate": 3.6721163815630855e-06,
+      "loss": 0.7757,
+      "step": 47560
+    },
+    {
+      "epoch": 1.8813897842551761,
+      "grad_norm": 1.4625221614169013,
+      "learning_rate": 3.6698981579081093e-06,
+      "loss": 0.7537,
+      "step": 47570
+    },
+    {
+      "epoch": 1.8817852834740652,
+      "grad_norm": 1.4499266913354638,
+      "learning_rate": 3.6676802159459155e-06,
+      "loss": 0.7529,
+      "step": 47580
+    },
+    {
+      "epoch": 1.8821807826929542,
+      "grad_norm": 1.522334245925788,
+      "learning_rate": 3.665462556146227e-06,
+      "loss": 0.7661,
+      "step": 47590
+    },
+    {
+      "epoch": 1.8825762819118432,
+      "grad_norm": 1.4144920461254835,
+      "learning_rate": 3.6632451789787056e-06,
+      "loss": 0.7611,
+      "step": 47600
+    },
+    {
+      "epoch": 1.8829717811307323,
+      "grad_norm": 1.4214050480361953,
+      "learning_rate": 3.6610280849129533e-06,
+      "loss": 0.7473,
+      "step": 47610
+    },
+    {
+      "epoch": 1.8833672803496213,
+      "grad_norm": 1.338640980951445,
+      "learning_rate": 3.6588112744185135e-06,
+      "loss": 0.7688,
+      "step": 47620
+    },
+    {
+      "epoch": 1.8837627795685103,
+      "grad_norm": 1.709520237250599,
+      "learning_rate": 3.656594747964868e-06,
+      "loss": 0.7356,
+      "step": 47630
+    },
+    {
+      "epoch": 1.8841582787873994,
+      "grad_norm": 1.311865762541882,
+      "learning_rate": 3.6543785060214387e-06,
+      "loss": 0.775,
+      "step": 47640
+    },
+    {
+      "epoch": 1.8845537780062884,
+      "grad_norm": 1.5860889959222024,
+      "learning_rate": 3.652162549057592e-06,
+      "loss": 0.7499,
+      "step": 47650
+    },
+    {
+      "epoch": 1.8849492772251775,
+      "grad_norm": 1.4018758600151087,
+      "learning_rate": 3.649946877542623e-06,
+      "loss": 0.7857,
+      "step": 47660
+    },
+    {
+      "epoch": 1.8853447764440665,
+      "grad_norm": 1.596874494146977,
+      "learning_rate": 3.647731491945775e-06,
+      "loss": 0.7574,
+      "step": 47670
+    },
+    {
+      "epoch": 1.8857402756629555,
+      "grad_norm": 1.265950007452047,
+      "learning_rate": 3.6455163927362315e-06,
+      "loss": 0.7764,
+      "step": 47680
+    },
+    {
+      "epoch": 1.8861357748818446,
+      "grad_norm": 1.3941556923120133,
+      "learning_rate": 3.6433015803831098e-06,
+      "loss": 0.7685,
+      "step": 47690
+    },
+    {
+      "epoch": 1.8865312741007336,
+      "grad_norm": 1.5619352056022604,
+      "learning_rate": 3.64108705535547e-06,
+      "loss": 0.7476,
+      "step": 47700
+    },
+    {
+      "epoch": 1.8869267733196227,
+      "grad_norm": 1.3659212859114231,
+      "learning_rate": 3.638872818122311e-06,
+      "loss": 0.7805,
+      "step": 47710
+    },
+    {
+      "epoch": 1.8873222725385117,
+      "grad_norm": 1.3161352415324021,
+      "learning_rate": 3.6366588691525706e-06,
+      "loss": 0.7765,
+      "step": 47720
+    },
+    {
+      "epoch": 1.8877177717574007,
+      "grad_norm": 1.522333657829966,
+      "learning_rate": 3.6344452089151238e-06,
+      "loss": 0.7485,
+      "step": 47730
+    },
+    {
+      "epoch": 1.8881132709762898,
+      "grad_norm": 1.5097266628952783,
+      "learning_rate": 3.6322318378787885e-06,
+      "loss": 0.7492,
+      "step": 47740
+    },
+    {
+      "epoch": 1.8885087701951788,
+      "grad_norm": 1.5333860312914256,
+      "learning_rate": 3.630018756512316e-06,
+      "loss": 0.7455,
+      "step": 47750
+    },
+    {
+      "epoch": 1.8889042694140679,
+      "grad_norm": 1.3129360080000902,
+      "learning_rate": 3.6278059652843995e-06,
+      "loss": 0.7635,
+      "step": 47760
+    },
+    {
+      "epoch": 1.889299768632957,
+      "grad_norm": 1.68370343450865,
+      "learning_rate": 3.6255934646636724e-06,
+      "loss": 0.7396,
+      "step": 47770
+    },
+    {
+      "epoch": 1.889695267851846,
+      "grad_norm": 1.4258204400074574,
+      "learning_rate": 3.623381255118702e-06,
+      "loss": 0.771,
+      "step": 47780
+    },
+    {
+      "epoch": 1.8900907670707352,
+      "grad_norm": 1.5769144004212399,
+      "learning_rate": 3.621169337117997e-06,
+      "loss": 0.7437,
+      "step": 47790
+    },
+    {
+      "epoch": 1.8904862662896242,
+      "grad_norm": 1.3636321368100723,
+      "learning_rate": 3.6189577111300043e-06,
+      "loss": 0.8037,
+      "step": 47800
+    },
+    {
+      "epoch": 1.8908817655085133,
+      "grad_norm": 1.6077616835747133,
+      "learning_rate": 3.6167463776231084e-06,
+      "loss": 0.7538,
+      "step": 47810
+    },
+    {
+      "epoch": 1.8912772647274023,
+      "grad_norm": 1.5906856982618143,
+      "learning_rate": 3.614535337065631e-06,
+      "loss": 0.7948,
+      "step": 47820
+    },
+    {
+      "epoch": 1.8916727639462914,
+      "grad_norm": 1.47615168405027,
+      "learning_rate": 3.612324589925833e-06,
+      "loss": 0.7553,
+      "step": 47830
+    },
+    {
+      "epoch": 1.8920682631651804,
+      "grad_norm": 1.6017589507133478,
+      "learning_rate": 3.6101141366719127e-06,
+      "loss": 0.7433,
+      "step": 47840
+    },
+    {
+      "epoch": 1.8924637623840694,
+      "grad_norm": 1.361011087813422,
+      "learning_rate": 3.607903977772007e-06,
+      "loss": 0.7581,
+      "step": 47850
+    },
+    {
+      "epoch": 1.8928592616029585,
+      "grad_norm": 1.3921189746976228,
+      "learning_rate": 3.605694113694189e-06,
+      "loss": 0.7779,
+      "step": 47860
+    },
+    {
+      "epoch": 1.8932547608218475,
+      "grad_norm": 1.769656467299996,
+      "learning_rate": 3.6034845449064702e-06,
+      "loss": 0.7508,
+      "step": 47870
+    },
+    {
+      "epoch": 1.8936502600407366,
+      "grad_norm": 1.4526432721423574,
+      "learning_rate": 3.6012752718767997e-06,
+      "loss": 0.7695,
+      "step": 47880
+    },
+    {
+      "epoch": 1.8940457592596256,
+      "grad_norm": 1.4970301621613045,
+      "learning_rate": 3.5990662950730627e-06,
+      "loss": 0.7691,
+      "step": 47890
+    },
+    {
+      "epoch": 1.8944412584785146,
+      "grad_norm": 1.5058105612611492,
+      "learning_rate": 3.596857614963086e-06,
+      "loss": 0.7726,
+      "step": 47900
+    },
+    {
+      "epoch": 1.8948367576974037,
+      "grad_norm": 1.424422232755828,
+      "learning_rate": 3.5946492320146254e-06,
+      "loss": 0.7605,
+      "step": 47910
+    },
+    {
+      "epoch": 1.8952322569162927,
+      "grad_norm": 1.5879870416430097,
+      "learning_rate": 3.5924411466953802e-06,
+      "loss": 0.7655,
+      "step": 47920
+    },
+    {
+      "epoch": 1.8956277561351818,
+      "grad_norm": 1.4769514544175584,
+      "learning_rate": 3.5902333594729865e-06,
+      "loss": 0.7578,
+      "step": 47930
+    },
+    {
+      "epoch": 1.8960232553540708,
+      "grad_norm": 1.6133341181371539,
+      "learning_rate": 3.588025870815014e-06,
+      "loss": 0.7689,
+      "step": 47940
+    },
+    {
+      "epoch": 1.8964187545729598,
+      "grad_norm": 1.3229647584098536,
+      "learning_rate": 3.585818681188972e-06,
+      "loss": 0.747,
+      "step": 47950
+    },
+    {
+      "epoch": 1.8968142537918489,
+      "grad_norm": 1.4620021502426253,
+      "learning_rate": 3.583611791062306e-06,
+      "loss": 0.7764,
+      "step": 47960
+    },
+    {
+      "epoch": 1.897209753010738,
+      "grad_norm": 1.4497171767610308,
+      "learning_rate": 3.581405200902396e-06,
+      "loss": 0.7777,
+      "step": 47970
+    },
+    {
+      "epoch": 1.897605252229627,
+      "grad_norm": 1.4626256602535805,
+      "learning_rate": 3.5791989111765623e-06,
+      "loss": 0.7365,
+      "step": 47980
+    },
+    {
+      "epoch": 1.898000751448516,
+      "grad_norm": 1.571066095295492,
+      "learning_rate": 3.576992922352057e-06,
+      "loss": 0.7651,
+      "step": 47990
+    },
+    {
+      "epoch": 1.898396250667405,
+      "grad_norm": 1.4671233826193422,
+      "learning_rate": 3.574787234896071e-06,
+      "loss": 0.7767,
+      "step": 48000
+    },
+    {
+      "epoch": 1.898791749886294,
+      "grad_norm": 1.246437399219056,
+      "learning_rate": 3.5725818492757313e-06,
+      "loss": 0.7522,
+      "step": 48010
+    },
+    {
+      "epoch": 1.899187249105183,
+      "grad_norm": 1.746162383396902,
+      "learning_rate": 3.5703767659581036e-06,
+      "loss": 0.7701,
+      "step": 48020
+    },
+    {
+      "epoch": 1.8995827483240721,
+      "grad_norm": 1.3201895423489198,
+      "learning_rate": 3.568171985410183e-06,
+      "loss": 0.7495,
+      "step": 48030
+    },
+    {
+      "epoch": 1.8999782475429612,
+      "grad_norm": 1.3726797329295566,
+      "learning_rate": 3.5659675080989048e-06,
+      "loss": 0.7704,
+      "step": 48040
+    },
+    {
+      "epoch": 1.9003737467618502,
+      "grad_norm": 1.5378643294240089,
+      "learning_rate": 3.5637633344911405e-06,
+      "loss": 0.7518,
+      "step": 48050
+    },
+    {
+      "epoch": 1.9007692459807393,
+      "grad_norm": 1.2225824926877078,
+      "learning_rate": 3.5615594650536957e-06,
+      "loss": 0.7743,
+      "step": 48060
+    },
+    {
+      "epoch": 1.9011647451996283,
+      "grad_norm": 1.6485901652446218,
+      "learning_rate": 3.5593559002533127e-06,
+      "loss": 0.7312,
+      "step": 48070
+    },
+    {
+      "epoch": 1.9015602444185173,
+      "grad_norm": 1.5411959745657817,
+      "learning_rate": 3.5571526405566685e-06,
+      "loss": 0.7784,
+      "step": 48080
+    },
+    {
+      "epoch": 1.9019557436374064,
+      "grad_norm": 1.294695125082055,
+      "learning_rate": 3.5549496864303762e-06,
+      "loss": 0.769,
+      "step": 48090
+    },
+    {
+      "epoch": 1.9023512428562954,
+      "grad_norm": 1.9705813153659901,
+      "learning_rate": 3.5527470383409833e-06,
+      "loss": 0.7509,
+      "step": 48100
+    },
+    {
+      "epoch": 1.9027467420751845,
+      "grad_norm": 1.160740302996935,
+      "learning_rate": 3.550544696754973e-06,
+      "loss": 0.7647,
+      "step": 48110
+    },
+    {
+      "epoch": 1.9031422412940735,
+      "grad_norm": 1.4084496739700558,
+      "learning_rate": 3.548342662138764e-06,
+      "loss": 0.7665,
+      "step": 48120
+    },
+    {
+      "epoch": 1.9035377405129625,
+      "grad_norm": 1.1630777810269677,
+      "learning_rate": 3.546140934958708e-06,
+      "loss": 0.7598,
+      "step": 48130
+    },
+    {
+      "epoch": 1.9039332397318516,
+      "grad_norm": 1.2672876152702206,
+      "learning_rate": 3.5439395156810974e-06,
+      "loss": 0.7537,
+      "step": 48140
+    },
+    {
+      "epoch": 1.9043287389507406,
+      "grad_norm": 1.534236507509776,
+      "learning_rate": 3.5417384047721496e-06,
+      "loss": 0.7706,
+      "step": 48150
+    },
+    {
+      "epoch": 1.9047242381696297,
+      "grad_norm": 1.466027993497561,
+      "learning_rate": 3.5395376026980246e-06,
+      "loss": 0.7588,
+      "step": 48160
+    },
+    {
+      "epoch": 1.9051197373885187,
+      "grad_norm": 1.5466614037393556,
+      "learning_rate": 3.5373371099248137e-06,
+      "loss": 0.7463,
+      "step": 48170
+    },
+    {
+      "epoch": 1.9055152366074077,
+      "grad_norm": 1.439383345988291,
+      "learning_rate": 3.5351369269185456e-06,
+      "loss": 0.7677,
+      "step": 48180
+    },
+    {
+      "epoch": 1.9059107358262968,
+      "grad_norm": 1.2989300898032212,
+      "learning_rate": 3.5329370541451785e-06,
+      "loss": 0.7929,
+      "step": 48190
+    },
+    {
+      "epoch": 1.9063062350451858,
+      "grad_norm": 1.5356930205726336,
+      "learning_rate": 3.53073749207061e-06,
+      "loss": 0.7331,
+      "step": 48200
+    },
+    {
+      "epoch": 1.9067017342640749,
+      "grad_norm": 1.260874660528549,
+      "learning_rate": 3.52853824116067e-06,
+      "loss": 0.7709,
+      "step": 48210
+    },
+    {
+      "epoch": 1.907097233482964,
+      "grad_norm": 1.5015594923176887,
+      "learning_rate": 3.5263393018811203e-06,
+      "loss": 0.7584,
+      "step": 48220
+    },
+    {
+      "epoch": 1.907492732701853,
+      "grad_norm": 1.6731768466238122,
+      "learning_rate": 3.5241406746976593e-06,
+      "loss": 0.7483,
+      "step": 48230
+    },
+    {
+      "epoch": 1.907888231920742,
+      "grad_norm": 1.58439474247732,
+      "learning_rate": 3.5219423600759183e-06,
+      "loss": 0.7702,
+      "step": 48240
+    },
+    {
+      "epoch": 1.908283731139631,
+      "grad_norm": 1.432798190434656,
+      "learning_rate": 3.519744358481464e-06,
+      "loss": 0.7583,
+      "step": 48250
+    },
+    {
+      "epoch": 1.90867923035852,
+      "grad_norm": 1.4980797527771759,
+      "learning_rate": 3.517546670379795e-06,
+      "loss": 0.7765,
+      "step": 48260
+    },
+    {
+      "epoch": 1.909074729577409,
+      "grad_norm": 1.6516813346331432,
+      "learning_rate": 3.5153492962363435e-06,
+      "loss": 0.7418,
+      "step": 48270
+    },
+    {
+      "epoch": 1.9094702287962981,
+      "grad_norm": 1.2163086985860219,
+      "learning_rate": 3.513152236516475e-06,
+      "loss": 0.754,
+      "step": 48280
+    },
+    {
+      "epoch": 1.9098657280151872,
+      "grad_norm": 1.378064511799827,
+      "learning_rate": 3.5109554916854893e-06,
+      "loss": 0.7214,
+      "step": 48290
+    },
+    {
+      "epoch": 1.9102612272340762,
+      "grad_norm": 1.2866435107623866,
+      "learning_rate": 3.5087590622086205e-06,
+      "loss": 0.7776,
+      "step": 48300
+    },
+    {
+      "epoch": 1.9106567264529652,
+      "grad_norm": 1.4979418450519701,
+      "learning_rate": 3.5065629485510338e-06,
+      "loss": 0.7628,
+      "step": 48310
+    },
+    {
+      "epoch": 1.9110522256718543,
+      "grad_norm": 1.4333142807341872,
+      "learning_rate": 3.504367151177829e-06,
+      "loss": 0.7511,
+      "step": 48320
+    },
+    {
+      "epoch": 1.9114477248907433,
+      "grad_norm": 1.4839016128316724,
+      "learning_rate": 3.5021716705540375e-06,
+      "loss": 0.7526,
+      "step": 48330
+    },
+    {
+      "epoch": 1.9118432241096324,
+      "grad_norm": 1.2815094503791602,
+      "learning_rate": 3.4999765071446258e-06,
+      "loss": 0.7624,
+      "step": 48340
+    },
+    {
+      "epoch": 1.9122387233285214,
+      "grad_norm": 1.1597589019802432,
+      "learning_rate": 3.497781661414491e-06,
+      "loss": 0.7963,
+      "step": 48350
+    },
+    {
+      "epoch": 1.9126342225474104,
+      "grad_norm": 1.2710576182177307,
+      "learning_rate": 3.4955871338284637e-06,
+      "loss": 0.7732,
+      "step": 48360
+    },
+    {
+      "epoch": 1.9130297217662995,
+      "grad_norm": 1.5194220002008914,
+      "learning_rate": 3.4933929248513075e-06,
+      "loss": 0.7568,
+      "step": 48370
+    },
+    {
+      "epoch": 1.9134252209851885,
+      "grad_norm": 1.648735011704732,
+      "learning_rate": 3.4911990349477187e-06,
+      "loss": 0.7574,
+      "step": 48380
+    },
+    {
+      "epoch": 1.9138207202040776,
+      "grad_norm": 1.8509268928634877,
+      "learning_rate": 3.4890054645823274e-06,
+      "loss": 0.7491,
+      "step": 48390
+    },
+    {
+      "epoch": 1.9142162194229666,
+      "grad_norm": 1.7741533543757104,
+      "learning_rate": 3.4868122142196897e-06,
+      "loss": 0.7512,
+      "step": 48400
+    },
+    {
+      "epoch": 1.9146117186418556,
+      "grad_norm": 1.5846453909260099,
+      "learning_rate": 3.484619284324301e-06,
+      "loss": 0.7453,
+      "step": 48410
+    },
+    {
+      "epoch": 1.9150072178607447,
+      "grad_norm": 1.385084597775232,
+      "learning_rate": 3.4824266753605864e-06,
+      "loss": 0.7714,
+      "step": 48420
+    },
+    {
+      "epoch": 1.9154027170796337,
+      "grad_norm": 1.5675267484745021,
+      "learning_rate": 3.4802343877929017e-06,
+      "loss": 0.7719,
+      "step": 48430
+    },
+    {
+      "epoch": 1.9157982162985228,
+      "grad_norm": 1.5766883116121044,
+      "learning_rate": 3.4780424220855375e-06,
+      "loss": 0.7489,
+      "step": 48440
+    },
+    {
+      "epoch": 1.9161937155174118,
+      "grad_norm": 1.4727666318080896,
+      "learning_rate": 3.4758507787027146e-06,
+      "loss": 0.7889,
+      "step": 48450
+    },
+    {
+      "epoch": 1.9165892147363008,
+      "grad_norm": 1.4609562670679748,
+      "learning_rate": 3.4736594581085837e-06,
+      "loss": 0.7569,
+      "step": 48460
+    },
+    {
+      "epoch": 1.9169847139551899,
+      "grad_norm": 1.6013206194665468,
+      "learning_rate": 3.47146846076723e-06,
+      "loss": 0.7751,
+      "step": 48470
+    },
+    {
+      "epoch": 1.917380213174079,
+      "grad_norm": 1.637435622210086,
+      "learning_rate": 3.4692777871426695e-06,
+      "loss": 0.7535,
+      "step": 48480
+    },
+    {
+      "epoch": 1.917775712392968,
+      "grad_norm": 1.340507590365922,
+      "learning_rate": 3.467087437698849e-06,
+      "loss": 0.7448,
+      "step": 48490
+    },
+    {
+      "epoch": 1.918171211611857,
+      "grad_norm": 1.3083632649820889,
+      "learning_rate": 3.4648974128996472e-06,
+      "loss": 0.7688,
+      "step": 48500
+    },
+    {
+      "epoch": 1.918566710830746,
+      "grad_norm": 1.3394916046289926,
+      "learning_rate": 3.4627077132088748e-06,
+      "loss": 0.781,
+      "step": 48510
+    },
+    {
+      "epoch": 1.918962210049635,
+      "grad_norm": 1.5131590329594262,
+      "learning_rate": 3.4605183390902703e-06,
+      "loss": 0.7182,
+      "step": 48520
+    },
+    {
+      "epoch": 1.9193577092685241,
+      "grad_norm": 1.4898860941093186,
+      "learning_rate": 3.458329291007507e-06,
+      "loss": 0.7577,
+      "step": 48530
+    },
+    {
+      "epoch": 1.9197532084874132,
+      "grad_norm": 1.7497731059162311,
+      "learning_rate": 3.4561405694241872e-06,
+      "loss": 0.7349,
+      "step": 48540
+    },
+    {
+      "epoch": 1.9201487077063022,
+      "grad_norm": 1.7178735818647393,
+      "learning_rate": 3.453952174803845e-06,
+      "loss": 0.7698,
+      "step": 48550
+    },
+    {
+      "epoch": 1.9205442069251912,
+      "grad_norm": 1.2341161006663444,
+      "learning_rate": 3.4517641076099455e-06,
+      "loss": 0.7392,
+      "step": 48560
+    },
+    {
+      "epoch": 1.9209397061440803,
+      "grad_norm": 1.2990106198790634,
+      "learning_rate": 3.4495763683058837e-06,
+      "loss": 0.7599,
+      "step": 48570
+    },
+    {
+      "epoch": 1.9213352053629693,
+      "grad_norm": 1.6598828214155192,
+      "learning_rate": 3.447388957354984e-06,
+      "loss": 0.7609,
+      "step": 48580
+    },
+    {
+      "epoch": 1.9217307045818584,
+      "grad_norm": 1.2076112304285591,
+      "learning_rate": 3.445201875220504e-06,
+      "loss": 0.7702,
+      "step": 48590
+    },
+    {
+      "epoch": 1.9221262038007474,
+      "grad_norm": 1.2795179070829605,
+      "learning_rate": 3.4430151223656293e-06,
+      "loss": 0.7486,
+      "step": 48600
+    },
+    {
+      "epoch": 1.9225217030196364,
+      "grad_norm": 1.4980234359919122,
+      "learning_rate": 3.4408286992534778e-06,
+      "loss": 0.7855,
+      "step": 48610
+    },
+    {
+      "epoch": 1.9229172022385255,
+      "grad_norm": 1.2031824654733618,
+      "learning_rate": 3.4386426063470952e-06,
+      "loss": 0.7468,
+      "step": 48620
+    },
+    {
+      "epoch": 1.9233127014574145,
+      "grad_norm": 1.3813304905431087,
+      "learning_rate": 3.4364568441094614e-06,
+      "loss": 0.8035,
+      "step": 48630
+    },
+    {
+      "epoch": 1.9237082006763035,
+      "grad_norm": 1.5370940755529536,
+      "learning_rate": 3.4342714130034794e-06,
+      "loss": 0.7831,
+      "step": 48640
+    },
+    {
+      "epoch": 1.9241036998951926,
+      "grad_norm": 1.6620931428241879,
+      "learning_rate": 3.4320863134919867e-06,
+      "loss": 0.7384,
+      "step": 48650
+    },
+    {
+      "epoch": 1.9244991991140816,
+      "grad_norm": 1.4524789950188688,
+      "learning_rate": 3.4299015460377517e-06,
+      "loss": 0.7419,
+      "step": 48660
+    },
+    {
+      "epoch": 1.9248946983329707,
+      "grad_norm": 1.4055892594981305,
+      "learning_rate": 3.4277171111034703e-06,
+      "loss": 0.7724,
+      "step": 48670
+    },
+    {
+      "epoch": 1.9252901975518597,
+      "grad_norm": 1.3104777161366816,
+      "learning_rate": 3.425533009151769e-06,
+      "loss": 0.7721,
+      "step": 48680
+    },
+    {
+      "epoch": 1.9256856967707487,
+      "grad_norm": 1.294980341625533,
+      "learning_rate": 3.423349240645201e-06,
+      "loss": 0.7515,
+      "step": 48690
+    },
+    {
+      "epoch": 1.9260811959896378,
+      "grad_norm": 1.3831266983043116,
+      "learning_rate": 3.421165806046253e-06,
+      "loss": 0.761,
+      "step": 48700
+    },
+    {
+      "epoch": 1.9264766952085268,
+      "grad_norm": 1.8914936315985789,
+      "learning_rate": 3.4189827058173373e-06,
+      "loss": 0.7625,
+      "step": 48710
+    },
+    {
+      "epoch": 1.9268721944274159,
+      "grad_norm": 1.4046195571591606,
+      "learning_rate": 3.416799940420799e-06,
+      "loss": 0.763,
+      "step": 48720
+    },
+    {
+      "epoch": 1.927267693646305,
+      "grad_norm": 1.5114377469229656,
+      "learning_rate": 3.4146175103189093e-06,
+      "loss": 0.7578,
+      "step": 48730
+    },
+    {
+      "epoch": 1.927663192865194,
+      "grad_norm": 1.505956086773398,
+      "learning_rate": 3.4124354159738706e-06,
+      "loss": 0.7728,
+      "step": 48740
+    },
+    {
+      "epoch": 1.928058692084083,
+      "grad_norm": 1.7177236509143539,
+      "learning_rate": 3.4102536578478128e-06,
+      "loss": 0.764,
+      "step": 48750
+    },
+    {
+      "epoch": 1.928454191302972,
+      "grad_norm": 1.4892797158043651,
+      "learning_rate": 3.408072236402794e-06,
+      "loss": 0.7516,
+      "step": 48760
+    },
+    {
+      "epoch": 1.928849690521861,
+      "grad_norm": 1.5692821668259482,
+      "learning_rate": 3.4058911521008015e-06,
+      "loss": 0.7585,
+      "step": 48770
+    },
+    {
+      "epoch": 1.92924518974075,
+      "grad_norm": 1.2803271335932815,
+      "learning_rate": 3.4037104054037527e-06,
+      "loss": 0.7461,
+      "step": 48780
+    },
+    {
+      "epoch": 1.9296406889596391,
+      "grad_norm": 1.5675715242674635,
+      "learning_rate": 3.4015299967734918e-06,
+      "loss": 0.7289,
+      "step": 48790
+    },
+    {
+      "epoch": 1.9300361881785284,
+      "grad_norm": 1.3890223069054999,
+      "learning_rate": 3.3993499266717923e-06,
+      "loss": 0.7697,
+      "step": 48800
+    },
+    {
+      "epoch": 1.9304316873974174,
+      "grad_norm": 1.5466298113770032,
+      "learning_rate": 3.3971701955603566e-06,
+      "loss": 0.737,
+      "step": 48810
+    },
+    {
+      "epoch": 1.9308271866163065,
+      "grad_norm": 1.3084112723217525,
+      "learning_rate": 3.3949908039008122e-06,
+      "loss": 0.7503,
+      "step": 48820
+    },
+    {
+      "epoch": 1.9312226858351955,
+      "grad_norm": 1.4734269768329404,
+      "learning_rate": 3.392811752154719e-06,
+      "loss": 0.7413,
+      "step": 48830
+    },
+    {
+      "epoch": 1.9316181850540846,
+      "grad_norm": 1.5581941009276234,
+      "learning_rate": 3.390633040783562e-06,
+      "loss": 0.7446,
+      "step": 48840
+    },
+    {
+      "epoch": 1.9320136842729736,
+      "grad_norm": 1.5897203197148722,
+      "learning_rate": 3.388454670248754e-06,
+      "loss": 0.7447,
+      "step": 48850
+    },
+    {
+      "epoch": 1.9324091834918626,
+      "grad_norm": 1.4761346466781393,
+      "learning_rate": 3.38627664101164e-06,
+      "loss": 0.7403,
+      "step": 48860
+    },
+    {
+      "epoch": 1.9328046827107517,
+      "grad_norm": 1.2618642542204772,
+      "learning_rate": 3.384098953533485e-06,
+      "loss": 0.7461,
+      "step": 48870
+    },
+    {
+      "epoch": 1.9332001819296407,
+      "grad_norm": 1.3521030552741606,
+      "learning_rate": 3.381921608275489e-06,
+      "loss": 0.7648,
+      "step": 48880
+    },
+    {
+      "epoch": 1.9335956811485298,
+      "grad_norm": 1.1312807510275773,
+      "learning_rate": 3.3797446056987737e-06,
+      "loss": 0.7666,
+      "step": 48890
+    },
+    {
+      "epoch": 1.9339911803674188,
+      "grad_norm": 1.4020658782001285,
+      "learning_rate": 3.377567946264393e-06,
+      "loss": 0.7352,
+      "step": 48900
+    },
+    {
+      "epoch": 1.9343866795863078,
+      "grad_norm": 1.5485316861848237,
+      "learning_rate": 3.3753916304333258e-06,
+      "loss": 0.7393,
+      "step": 48910
+    },
+    {
+      "epoch": 1.9347821788051969,
+      "grad_norm": 1.2181391824146819,
+      "learning_rate": 3.3732156586664777e-06,
+      "loss": 0.7644,
+      "step": 48920
+    },
+    {
+      "epoch": 1.935177678024086,
+      "grad_norm": 1.5293651300305036,
+      "learning_rate": 3.371040031424683e-06,
+      "loss": 0.7908,
+      "step": 48930
+    },
+    {
+      "epoch": 1.935573177242975,
+      "grad_norm": 1.5103491018102542,
+      "learning_rate": 3.3688647491687014e-06,
+      "loss": 0.7681,
+      "step": 48940
+    },
+    {
+      "epoch": 1.935968676461864,
+      "grad_norm": 1.543737479420635,
+      "learning_rate": 3.3666898123592214e-06,
+      "loss": 0.725,
+      "step": 48950
+    },
+    {
+      "epoch": 1.936364175680753,
+      "grad_norm": 1.578888859247185,
+      "learning_rate": 3.3645152214568567e-06,
+      "loss": 0.7679,
+      "step": 48960
+    },
+    {
+      "epoch": 1.936759674899642,
+      "grad_norm": 1.4035187295134819,
+      "learning_rate": 3.3623409769221482e-06,
+      "loss": 0.7762,
+      "step": 48970
+    },
+    {
+      "epoch": 1.937155174118531,
+      "grad_norm": 1.546934871807321,
+      "learning_rate": 3.360167079215565e-06,
+      "loss": 0.7611,
+      "step": 48980
+    },
+    {
+      "epoch": 1.9375506733374201,
+      "grad_norm": 1.3246807323843184,
+      "learning_rate": 3.3579935287975003e-06,
+      "loss": 0.7625,
+      "step": 48990
+    },
+    {
+      "epoch": 1.9379461725563092,
+      "grad_norm": 1.4447933562823514,
+      "learning_rate": 3.3558203261282767e-06,
+      "loss": 0.7315,
+      "step": 49000
+    },
+    {
+      "epoch": 1.9383416717751982,
+      "grad_norm": 1.4436902647042094,
+      "learning_rate": 3.353647471668138e-06,
+      "loss": 0.7608,
+      "step": 49010
+    },
+    {
+      "epoch": 1.9387371709940873,
+      "grad_norm": 1.4259839029396255,
+      "learning_rate": 3.351474965877258e-06,
+      "loss": 0.7819,
+      "step": 49020
+    },
+    {
+      "epoch": 1.9391326702129763,
+      "grad_norm": 1.5828484745009785,
+      "learning_rate": 3.3493028092157386e-06,
+      "loss": 0.7506,
+      "step": 49030
+    },
+    {
+      "epoch": 1.9395281694318653,
+      "grad_norm": 1.4226258962452412,
+      "learning_rate": 3.3471310021436044e-06,
+      "loss": 0.7425,
+      "step": 49040
+    },
+    {
+      "epoch": 1.9399236686507544,
+      "grad_norm": 1.4578316701814398,
+      "learning_rate": 3.3449595451208062e-06,
+      "loss": 0.7613,
+      "step": 49050
+    },
+    {
+      "epoch": 1.9403191678696434,
+      "grad_norm": 1.4540644265331437,
+      "learning_rate": 3.3427884386072216e-06,
+      "loss": 0.7696,
+      "step": 49060
+    },
+    {
+      "epoch": 1.9407146670885325,
+      "grad_norm": 1.3271743142653192,
+      "learning_rate": 3.3406176830626547e-06,
+      "loss": 0.772,
+      "step": 49070
+    },
+    {
+      "epoch": 1.9411101663074215,
+      "grad_norm": 1.3187071815920612,
+      "learning_rate": 3.3384472789468323e-06,
+      "loss": 0.7879,
+      "step": 49080
+    },
+    {
+      "epoch": 1.9415056655263105,
+      "grad_norm": 1.4905494881325165,
+      "learning_rate": 3.3362772267194117e-06,
+      "loss": 0.7665,
+      "step": 49090
+    },
+    {
+      "epoch": 1.9419011647451996,
+      "grad_norm": 1.4644771396011695,
+      "learning_rate": 3.3341075268399716e-06,
+      "loss": 0.7401,
+      "step": 49100
+    },
+    {
+      "epoch": 1.9422966639640886,
+      "grad_norm": 1.2426524751269876,
+      "learning_rate": 3.331938179768016e-06,
+      "loss": 0.7301,
+      "step": 49110
+    },
+    {
+      "epoch": 1.9426921631829777,
+      "grad_norm": 1.5486693015515494,
+      "learning_rate": 3.3297691859629776e-06,
+      "loss": 0.7563,
+      "step": 49120
+    },
+    {
+      "epoch": 1.943087662401867,
+      "grad_norm": 1.7099274403192768,
+      "learning_rate": 3.32760054588421e-06,
+      "loss": 0.7519,
+      "step": 49130
+    },
+    {
+      "epoch": 1.943483161620756,
+      "grad_norm": 1.9001952166461693,
+      "learning_rate": 3.3254322599909944e-06,
+      "loss": 0.7422,
+      "step": 49140
+    },
+    {
+      "epoch": 1.943878660839645,
+      "grad_norm": 1.7038576805585015,
+      "learning_rate": 3.323264328742538e-06,
+      "loss": 0.7348,
+      "step": 49150
+    },
+    {
+      "epoch": 1.944274160058534,
+      "grad_norm": 1.5195556853745318,
+      "learning_rate": 3.3210967525979705e-06,
+      "loss": 0.7271,
+      "step": 49160
+    },
+    {
+      "epoch": 1.944669659277423,
+      "grad_norm": 1.5813132310844498,
+      "learning_rate": 3.3189295320163465e-06,
+      "loss": 0.7533,
+      "step": 49170
+    },
+    {
+      "epoch": 1.9450651584963121,
+      "grad_norm": 1.4061286335668366,
+      "learning_rate": 3.3167626674566477e-06,
+      "loss": 0.7534,
+      "step": 49180
+    },
+    {
+      "epoch": 1.9454606577152012,
+      "grad_norm": 1.4926974593798348,
+      "learning_rate": 3.3145961593777785e-06,
+      "loss": 0.7554,
+      "step": 49190
+    },
+    {
+      "epoch": 1.9458561569340902,
+      "grad_norm": 1.5428129311889018,
+      "learning_rate": 3.312430008238568e-06,
+      "loss": 0.7489,
+      "step": 49200
+    },
+    {
+      "epoch": 1.9462516561529792,
+      "grad_norm": 1.267836886416892,
+      "learning_rate": 3.3102642144977702e-06,
+      "loss": 0.7596,
+      "step": 49210
+    },
+    {
+      "epoch": 1.9466471553718683,
+      "grad_norm": 1.3416948983027432,
+      "learning_rate": 3.308098778614062e-06,
+      "loss": 0.7685,
+      "step": 49220
+    },
+    {
+      "epoch": 1.9470426545907573,
+      "grad_norm": 2.079395250847278,
+      "learning_rate": 3.305933701046048e-06,
+      "loss": 0.7446,
+      "step": 49230
+    },
+    {
+      "epoch": 1.9474381538096464,
+      "grad_norm": 1.4652609744736982,
+      "learning_rate": 3.303768982252254e-06,
+      "loss": 0.7453,
+      "step": 49240
+    },
+    {
+      "epoch": 1.9478336530285354,
+      "grad_norm": 1.520013031890141,
+      "learning_rate": 3.3016046226911275e-06,
+      "loss": 0.7513,
+      "step": 49250
+    },
+    {
+      "epoch": 1.9482291522474244,
+      "grad_norm": 1.5059464962824864,
+      "learning_rate": 3.2994406228210446e-06,
+      "loss": 0.7572,
+      "step": 49260
+    },
+    {
+      "epoch": 1.9486246514663135,
+      "grad_norm": 1.5431473929414292,
+      "learning_rate": 3.2972769831003037e-06,
+      "loss": 0.7624,
+      "step": 49270
+    },
+    {
+      "epoch": 1.9490201506852025,
+      "grad_norm": 1.6683943154884664,
+      "learning_rate": 3.295113703987126e-06,
+      "loss": 0.7625,
+      "step": 49280
+    },
+    {
+      "epoch": 1.9494156499040916,
+      "grad_norm": 1.7187373024759323,
+      "learning_rate": 3.2929507859396583e-06,
+      "loss": 0.777,
+      "step": 49290
+    },
+    {
+      "epoch": 1.9498111491229806,
+      "grad_norm": 1.4338741058739413,
+      "learning_rate": 3.2907882294159676e-06,
+      "loss": 0.7622,
+      "step": 49300
+    },
+    {
+      "epoch": 1.9502066483418696,
+      "grad_norm": 1.2694241209479993,
+      "learning_rate": 3.2886260348740486e-06,
+      "loss": 0.7518,
+      "step": 49310
+    },
+    {
+      "epoch": 1.9506021475607587,
+      "grad_norm": 1.2918390528054295,
+      "learning_rate": 3.2864642027718145e-06,
+      "loss": 0.7618,
+      "step": 49320
+    },
+    {
+      "epoch": 1.9509976467796477,
+      "grad_norm": 1.8233829713077558,
+      "learning_rate": 3.2843027335671073e-06,
+      "loss": 0.7195,
+      "step": 49330
+    },
+    {
+      "epoch": 1.9513931459985367,
+      "grad_norm": 1.4349392796064928,
+      "learning_rate": 3.2821416277176866e-06,
+      "loss": 0.7812,
+      "step": 49340
+    },
+    {
+      "epoch": 1.9517886452174258,
+      "grad_norm": 1.5777762532946922,
+      "learning_rate": 3.279980885681238e-06,
+      "loss": 0.7555,
+      "step": 49350
+    },
+    {
+      "epoch": 1.9521841444363148,
+      "grad_norm": 1.2528261526068245,
+      "learning_rate": 3.277820507915371e-06,
+      "loss": 0.7498,
+      "step": 49360
+    },
+    {
+      "epoch": 1.9525796436552039,
+      "grad_norm": 1.4311290839123112,
+      "learning_rate": 3.2756604948776162e-06,
+      "loss": 0.7464,
+      "step": 49370
+    },
+    {
+      "epoch": 1.952975142874093,
+      "grad_norm": 1.4245912022898137,
+      "learning_rate": 3.2735008470254253e-06,
+      "loss": 0.7319,
+      "step": 49380
+    },
+    {
+      "epoch": 1.953370642092982,
+      "grad_norm": 1.3913165342796276,
+      "learning_rate": 3.2713415648161784e-06,
+      "loss": 0.76,
+      "step": 49390
+    },
+    {
+      "epoch": 1.953766141311871,
+      "grad_norm": 1.3409731134475538,
+      "learning_rate": 3.2691826487071706e-06,
+      "loss": 0.7574,
+      "step": 49400
+    },
+    {
+      "epoch": 1.95416164053076,
+      "grad_norm": 1.538553591011493,
+      "learning_rate": 3.2670240991556246e-06,
+      "loss": 0.7515,
+      "step": 49410
+    },
+    {
+      "epoch": 1.954557139749649,
+      "grad_norm": 1.2293005895073346,
+      "learning_rate": 3.264865916618686e-06,
+      "loss": 0.7366,
+      "step": 49420
+    },
+    {
+      "epoch": 1.954952638968538,
+      "grad_norm": 1.6330229907868956,
+      "learning_rate": 3.262708101553419e-06,
+      "loss": 0.7554,
+      "step": 49430
+    },
+    {
+      "epoch": 1.9553481381874271,
+      "grad_norm": 1.227599314546672,
+      "learning_rate": 3.260550654416812e-06,
+      "loss": 0.7638,
+      "step": 49440
+    },
+    {
+      "epoch": 1.9557436374063162,
+      "grad_norm": 1.782317767172154,
+      "learning_rate": 3.2583935756657765e-06,
+      "loss": 0.7275,
+      "step": 49450
+    },
+    {
+      "epoch": 1.9561391366252052,
+      "grad_norm": 1.517257695031236,
+      "learning_rate": 3.256236865757144e-06,
+      "loss": 0.7758,
+      "step": 49460
+    },
+    {
+      "epoch": 1.9565346358440943,
+      "grad_norm": 1.848947159628992,
+      "learning_rate": 3.2540805251476686e-06,
+      "loss": 0.7657,
+      "step": 49470
+    },
+    {
+      "epoch": 1.9569301350629833,
+      "grad_norm": 1.357699368333366,
+      "learning_rate": 3.251924554294027e-06,
+      "loss": 0.7397,
+      "step": 49480
+    },
+    {
+      "epoch": 1.9573256342818723,
+      "grad_norm": 1.488430604393747,
+      "learning_rate": 3.249768953652818e-06,
+      "loss": 0.7564,
+      "step": 49490
+    },
+    {
+      "epoch": 1.9577211335007614,
+      "grad_norm": 1.5031648287920552,
+      "learning_rate": 3.247613723680558e-06,
+      "loss": 0.7272,
+      "step": 49500
+    },
+    {
+      "epoch": 1.9581166327196504,
+      "grad_norm": 1.5734375701773522,
+      "learning_rate": 3.2454588648336883e-06,
+      "loss": 0.7555,
+      "step": 49510
+    },
+    {
+      "epoch": 1.9585121319385395,
+      "grad_norm": 1.5026824892908905,
+      "learning_rate": 3.2433043775685726e-06,
+      "loss": 0.7507,
+      "step": 49520
+    },
+    {
+      "epoch": 1.9589076311574285,
+      "grad_norm": 1.391410395956086,
+      "learning_rate": 3.2411502623414925e-06,
+      "loss": 0.7667,
+      "step": 49530
+    },
+    {
+      "epoch": 1.9593031303763175,
+      "grad_norm": 1.375393984038744,
+      "learning_rate": 3.238996519608655e-06,
+      "loss": 0.7321,
+      "step": 49540
+    },
+    {
+      "epoch": 1.9596986295952066,
+      "grad_norm": 1.4985941461219778,
+      "learning_rate": 3.2368431498261843e-06,
+      "loss": 0.7452,
+      "step": 49550
+    },
+    {
+      "epoch": 1.9600941288140956,
+      "grad_norm": 1.3261002167446854,
+      "learning_rate": 3.2346901534501284e-06,
+      "loss": 0.7438,
+      "step": 49560
+    },
+    {
+      "epoch": 1.9604896280329847,
+      "grad_norm": 1.3234637320520948,
+      "learning_rate": 3.232537530936455e-06,
+      "loss": 0.7297,
+      "step": 49570
+    },
+    {
+      "epoch": 1.9608851272518737,
+      "grad_norm": 1.6337549339603916,
+      "learning_rate": 3.2303852827410507e-06,
+      "loss": 0.7363,
+      "step": 49580
+    },
+    {
+      "epoch": 1.9612806264707627,
+      "grad_norm": 1.6417254549010538,
+      "learning_rate": 3.2282334093197264e-06,
+      "loss": 0.7578,
+      "step": 49590
+    },
+    {
+      "epoch": 1.9616761256896518,
+      "grad_norm": 1.4706272719879383,
+      "learning_rate": 3.2260819111282116e-06,
+      "loss": 0.7684,
+      "step": 49600
+    },
+    {
+      "epoch": 1.9620716249085408,
+      "grad_norm": 1.4205193802129494,
+      "learning_rate": 3.2239307886221584e-06,
+      "loss": 0.7593,
+      "step": 49610
+    },
+    {
+      "epoch": 1.9624671241274299,
+      "grad_norm": 1.2850868967229856,
+      "learning_rate": 3.2217800422571355e-06,
+      "loss": 0.7203,
+      "step": 49620
+    },
+    {
+      "epoch": 1.962862623346319,
+      "grad_norm": 1.629159667372725,
+      "learning_rate": 3.2196296724886344e-06,
+      "loss": 0.7482,
+      "step": 49630
+    },
+    {
+      "epoch": 1.963258122565208,
+      "grad_norm": 1.4239030714198087,
+      "learning_rate": 3.217479679772067e-06,
+      "loss": 0.7187,
+      "step": 49640
+    },
+    {
+      "epoch": 1.963653621784097,
+      "grad_norm": 1.344343091529516,
+      "learning_rate": 3.215330064562765e-06,
+      "loss": 0.7679,
+      "step": 49650
+    },
+    {
+      "epoch": 1.964049121002986,
+      "grad_norm": 1.5223572839653838,
+      "learning_rate": 3.2131808273159797e-06,
+      "loss": 0.74,
+      "step": 49660
+    },
+    {
+      "epoch": 1.964444620221875,
+      "grad_norm": 1.3005502780582872,
+      "learning_rate": 3.2110319684868828e-06,
+      "loss": 0.757,
+      "step": 49670
+    },
+    {
+      "epoch": 1.964840119440764,
+      "grad_norm": 1.5729654691099022,
+      "learning_rate": 3.2088834885305663e-06,
+      "loss": 0.7544,
+      "step": 49680
+    },
+    {
+      "epoch": 1.9652356186596531,
+      "grad_norm": 1.532141530435542,
+      "learning_rate": 3.2067353879020417e-06,
+      "loss": 0.7391,
+      "step": 49690
+    },
+    {
+      "epoch": 1.9656311178785422,
+      "grad_norm": 1.4893212963382882,
+      "learning_rate": 3.2045876670562392e-06,
+      "loss": 0.7412,
+      "step": 49700
+    },
+    {
+      "epoch": 1.9660266170974312,
+      "grad_norm": 1.3284123493225042,
+      "learning_rate": 3.2024403264480093e-06,
+      "loss": 0.756,
+      "step": 49710
+    },
+    {
+      "epoch": 1.9664221163163202,
+      "grad_norm": 1.6925962773624412,
+      "learning_rate": 3.200293366532122e-06,
+      "loss": 0.7148,
+      "step": 49720
+    },
+    {
+      "epoch": 1.9668176155352093,
+      "grad_norm": 1.2955586304860902,
+      "learning_rate": 3.198146787763269e-06,
+      "loss": 0.7627,
+      "step": 49730
+    },
+    {
+      "epoch": 1.9672131147540983,
+      "grad_norm": 1.2757727569359905,
+      "learning_rate": 3.1960005905960543e-06,
+      "loss": 0.7373,
+      "step": 49740
+    },
+    {
+      "epoch": 1.9676086139729874,
+      "grad_norm": 1.3369547190427764,
+      "learning_rate": 3.193854775485008e-06,
+      "loss": 0.7919,
+      "step": 49750
+    },
+    {
+      "epoch": 1.9680041131918764,
+      "grad_norm": 1.2374774626039289,
+      "learning_rate": 3.191709342884578e-06,
+      "loss": 0.739,
+      "step": 49760
+    },
+    {
+      "epoch": 1.9683996124107654,
+      "grad_norm": 1.3403133770543079,
+      "learning_rate": 3.189564293249128e-06,
+      "loss": 0.7395,
+      "step": 49770
+    },
+    {
+      "epoch": 1.9687951116296545,
+      "grad_norm": 1.4439156004199367,
+      "learning_rate": 3.187419627032945e-06,
+      "loss": 0.7428,
+      "step": 49780
+    },
+    {
+      "epoch": 1.9691906108485435,
+      "grad_norm": 1.2464371198076574,
+      "learning_rate": 3.1852753446902308e-06,
+      "loss": 0.7469,
+      "step": 49790
+    },
+    {
+      "epoch": 1.9695861100674326,
+      "grad_norm": 1.4960067420592544,
+      "learning_rate": 3.1831314466751094e-06,
+      "loss": 0.7456,
+      "step": 49800
+    },
+    {
+      "epoch": 1.9699816092863216,
+      "grad_norm": 1.5620929600686453,
+      "learning_rate": 3.18098793344162e-06,
+      "loss": 0.742,
+      "step": 49810
+    },
+    {
+      "epoch": 1.9703771085052106,
+      "grad_norm": 1.4174597496704604,
+      "learning_rate": 3.1788448054437226e-06,
+      "loss": 0.7818,
+      "step": 49820
+    },
+    {
+      "epoch": 1.9707726077240997,
+      "grad_norm": 1.6933624540242442,
+      "learning_rate": 3.1767020631352944e-06,
+      "loss": 0.7624,
+      "step": 49830
+    },
+    {
+      "epoch": 1.9711681069429887,
+      "grad_norm": 1.5719732507415092,
+      "learning_rate": 3.174559706970133e-06,
+      "loss": 0.7381,
+      "step": 49840
+    },
+    {
+      "epoch": 1.9715636061618778,
+      "grad_norm": 1.5205689202300559,
+      "learning_rate": 3.1724177374019516e-06,
+      "loss": 0.7483,
+      "step": 49850
+    },
+    {
+      "epoch": 1.9719591053807668,
+      "grad_norm": 1.3824346838352402,
+      "learning_rate": 3.1702761548843846e-06,
+      "loss": 0.7429,
+      "step": 49860
+    },
+    {
+      "epoch": 1.9723546045996558,
+      "grad_norm": 1.7177389822960694,
+      "learning_rate": 3.1681349598709786e-06,
+      "loss": 0.7534,
+      "step": 49870
+    },
+    {
+      "epoch": 1.9727501038185449,
+      "grad_norm": 1.5848933922888981,
+      "learning_rate": 3.165994152815205e-06,
+      "loss": 0.7475,
+      "step": 49880
+    },
+    {
+      "epoch": 1.973145603037434,
+      "grad_norm": 1.6432615433092956,
+      "learning_rate": 3.163853734170449e-06,
+      "loss": 0.7187,
+      "step": 49890
+    },
+    {
+      "epoch": 1.973541102256323,
+      "grad_norm": 1.3280025483371978,
+      "learning_rate": 3.161713704390015e-06,
+      "loss": 0.7468,
+      "step": 49900
+    },
+    {
+      "epoch": 1.973936601475212,
+      "grad_norm": 1.4042848197815534,
+      "learning_rate": 3.1595740639271244e-06,
+      "loss": 0.7524,
+      "step": 49910
+    },
+    {
+      "epoch": 1.974332100694101,
+      "grad_norm": 1.5327978854117172,
+      "learning_rate": 3.1574348132349166e-06,
+      "loss": 0.744,
+      "step": 49920
+    },
+    {
+      "epoch": 1.97472759991299,
+      "grad_norm": 1.369787456526148,
+      "learning_rate": 3.1552959527664486e-06,
+      "loss": 0.7637,
+      "step": 49930
+    },
+    {
+      "epoch": 1.9751230991318791,
+      "grad_norm": 1.4162618286042996,
+      "learning_rate": 3.153157482974694e-06,
+      "loss": 0.7845,
+      "step": 49940
+    },
+    {
+      "epoch": 1.9755185983507682,
+      "grad_norm": 1.5572962395347227,
+      "learning_rate": 3.151019404312543e-06,
+      "loss": 0.7593,
+      "step": 49950
+    },
+    {
+      "epoch": 1.9759140975696572,
+      "grad_norm": 1.6630340158767258,
+      "learning_rate": 3.148881717232806e-06,
+      "loss": 0.7228,
+      "step": 49960
+    },
+    {
+      "epoch": 1.9763095967885462,
+      "grad_norm": 1.436640404635433,
+      "learning_rate": 3.146744422188207e-06,
+      "loss": 0.7507,
+      "step": 49970
+    },
+    {
+      "epoch": 1.9767050960074353,
+      "grad_norm": 1.5900334690830433,
+      "learning_rate": 3.144607519631391e-06,
+      "loss": 0.7123,
+      "step": 49980
+    },
+    {
+      "epoch": 1.9771005952263243,
+      "grad_norm": 1.8065022828426256,
+      "learning_rate": 3.1424710100149138e-06,
+      "loss": 0.7266,
+      "step": 49990
+    },
+    {
+      "epoch": 1.9774960944452133,
+      "grad_norm": 1.5708774212769452,
+      "learning_rate": 3.140334893791253e-06,
+      "loss": 0.751,
+      "step": 50000
+    },
+    {
+      "epoch": 1.9778915936641024,
+      "grad_norm": 1.2041803179200292,
+      "learning_rate": 3.1381991714128014e-06,
+      "loss": 0.7645,
+      "step": 50010
+    },
+    {
+      "epoch": 1.9782870928829914,
+      "grad_norm": 1.5161607960909291,
+      "learning_rate": 3.136063843331869e-06,
+      "loss": 0.7669,
+      "step": 50020
+    },
+    {
+      "epoch": 1.9786825921018805,
+      "grad_norm": 1.1949110223981865,
+      "learning_rate": 3.133928910000681e-06,
+      "loss": 0.7298,
+      "step": 50030
+    },
+    {
+      "epoch": 1.9790780913207695,
+      "grad_norm": 1.57743696187895,
+      "learning_rate": 3.131794371871381e-06,
+      "loss": 0.7436,
+      "step": 50040
+    },
+    {
+      "epoch": 1.9794735905396585,
+      "grad_norm": 1.7472260740809376,
+      "learning_rate": 3.1296602293960255e-06,
+      "loss": 0.7442,
+      "step": 50050
+    },
+    {
+      "epoch": 1.9798690897585476,
+      "grad_norm": 1.331231052866077,
+      "learning_rate": 3.1275264830265906e-06,
+      "loss": 0.7123,
+      "step": 50060
+    },
+    {
+      "epoch": 1.9802645889774366,
+      "grad_norm": 1.422859931294295,
+      "learning_rate": 3.1253931332149674e-06,
+      "loss": 0.7658,
+      "step": 50070
+    },
+    {
+      "epoch": 1.9806600881963257,
+      "grad_norm": 1.7125474026339809,
+      "learning_rate": 3.1232601804129614e-06,
+      "loss": 0.7459,
+      "step": 50080
+    },
+    {
+      "epoch": 1.9810555874152147,
+      "grad_norm": 1.411372049610973,
+      "learning_rate": 3.121127625072298e-06,
+      "loss": 0.7196,
+      "step": 50090
+    },
+    {
+      "epoch": 1.9814510866341037,
+      "grad_norm": 1.5085204450222867,
+      "learning_rate": 3.1189954676446157e-06,
+      "loss": 0.7368,
+      "step": 50100
+    },
+    {
+      "epoch": 1.9818465858529928,
+      "grad_norm": 1.684665455960958,
+      "learning_rate": 3.1168637085814646e-06,
+      "loss": 0.7488,
+      "step": 50110
+    },
+    {
+      "epoch": 1.9822420850718818,
+      "grad_norm": 1.3989186619439802,
+      "learning_rate": 3.114732348334319e-06,
+      "loss": 0.7752,
+      "step": 50120
+    },
+    {
+      "epoch": 1.9826375842907709,
+      "grad_norm": 1.6538681001106232,
+      "learning_rate": 3.112601387354563e-06,
+      "loss": 0.7577,
+      "step": 50130
+    },
+    {
+      "epoch": 1.9830330835096601,
+      "grad_norm": 1.3966945065203595,
+      "learning_rate": 3.110470826093498e-06,
+      "loss": 0.7991,
+      "step": 50140
+    },
+    {
+      "epoch": 1.9834285827285492,
+      "grad_norm": 1.2149962113615989,
+      "learning_rate": 3.1083406650023395e-06,
+      "loss": 0.7443,
+      "step": 50150
+    },
+    {
+      "epoch": 1.9838240819474382,
+      "grad_norm": 1.4709324582136063,
+      "learning_rate": 3.106210904532221e-06,
+      "loss": 0.7693,
+      "step": 50160
+    },
+    {
+      "epoch": 1.9842195811663272,
+      "grad_norm": 1.2485442819705834,
+      "learning_rate": 3.1040815451341877e-06,
+      "loss": 0.759,
+      "step": 50170
+    },
+    {
+      "epoch": 1.9846150803852163,
+      "grad_norm": 1.3859062218403437,
+      "learning_rate": 3.1019525872592016e-06,
+      "loss": 0.7472,
+      "step": 50180
+    },
+    {
+      "epoch": 1.9850105796041053,
+      "grad_norm": 1.8170528006030893,
+      "learning_rate": 3.0998240313581395e-06,
+      "loss": 0.7155,
+      "step": 50190
+    },
+    {
+      "epoch": 1.9854060788229944,
+      "grad_norm": 1.4772368201282566,
+      "learning_rate": 3.0976958778817945e-06,
+      "loss": 0.7479,
+      "step": 50200
+    },
+    {
+      "epoch": 1.9858015780418834,
+      "grad_norm": 1.2891779094272988,
+      "learning_rate": 3.095568127280871e-06,
+      "loss": 0.7401,
+      "step": 50210
+    },
+    {
+      "epoch": 1.9861970772607724,
+      "grad_norm": 1.228706551163606,
+      "learning_rate": 3.0934407800059936e-06,
+      "loss": 0.7746,
+      "step": 50220
+    },
+    {
+      "epoch": 1.9865925764796615,
+      "grad_norm": 1.4719983593967,
+      "learning_rate": 3.0913138365076935e-06,
+      "loss": 0.751,
+      "step": 50230
+    },
+    {
+      "epoch": 1.9869880756985505,
+      "grad_norm": 1.431622349470501,
+      "learning_rate": 3.089187297236422e-06,
+      "loss": 0.7414,
+      "step": 50240
+    },
+    {
+      "epoch": 1.9873835749174396,
+      "grad_norm": 1.2807268492912474,
+      "learning_rate": 3.0870611626425456e-06,
+      "loss": 0.7373,
+      "step": 50250
+    },
+    {
+      "epoch": 1.9877790741363286,
+      "grad_norm": 1.4242911829633138,
+      "learning_rate": 3.0849354331763417e-06,
+      "loss": 0.739,
+      "step": 50260
+    },
+    {
+      "epoch": 1.9881745733552176,
+      "grad_norm": 1.5430532314663867,
+      "learning_rate": 3.082810109288005e-06,
+      "loss": 0.7773,
+      "step": 50270
+    },
+    {
+      "epoch": 1.9885700725741067,
+      "grad_norm": 1.8474100325704523,
+      "learning_rate": 3.0806851914276404e-06,
+      "loss": 0.7476,
+      "step": 50280
+    },
+    {
+      "epoch": 1.9889655717929957,
+      "grad_norm": 1.4555674312606346,
+      "learning_rate": 3.0785606800452694e-06,
+      "loss": 0.7346,
+      "step": 50290
+    },
+    {
+      "epoch": 1.9893610710118848,
+      "grad_norm": 1.1870897621333016,
+      "learning_rate": 3.076436575590829e-06,
+      "loss": 0.7282,
+      "step": 50300
+    },
+    {
+      "epoch": 1.9897565702307738,
+      "grad_norm": 1.6382153059937383,
+      "learning_rate": 3.0743128785141662e-06,
+      "loss": 0.7614,
+      "step": 50310
+    },
+    {
+      "epoch": 1.9901520694496628,
+      "grad_norm": 1.3498307697155651,
+      "learning_rate": 3.0721895892650453e-06,
+      "loss": 0.7571,
+      "step": 50320
+    },
+    {
+      "epoch": 1.9905475686685519,
+      "grad_norm": 1.3613800681174704,
+      "learning_rate": 3.070066708293141e-06,
+      "loss": 0.7313,
+      "step": 50330
+    },
+    {
+      "epoch": 1.990943067887441,
+      "grad_norm": 1.3769070892965563,
+      "learning_rate": 3.0679442360480444e-06,
+      "loss": 0.7471,
+      "step": 50340
+    },
+    {
+      "epoch": 1.99133856710633,
+      "grad_norm": 1.1861380029663058,
+      "learning_rate": 3.06582217297926e-06,
+      "loss": 0.765,
+      "step": 50350
+    },
+    {
+      "epoch": 1.991734066325219,
+      "grad_norm": 1.3224926966755899,
+      "learning_rate": 3.0637005195362014e-06,
+      "loss": 0.7535,
+      "step": 50360
+    },
+    {
+      "epoch": 1.992129565544108,
+      "grad_norm": 1.4988078410324497,
+      "learning_rate": 3.0615792761681986e-06,
+      "loss": 0.7186,
+      "step": 50370
+    },
+    {
+      "epoch": 1.992525064762997,
+      "grad_norm": 1.3629812450419356,
+      "learning_rate": 3.059458443324497e-06,
+      "loss": 0.751,
+      "step": 50380
+    },
+    {
+      "epoch": 1.992920563981886,
+      "grad_norm": 1.3919346966392678,
+      "learning_rate": 3.0573380214542503e-06,
+      "loss": 0.7392,
+      "step": 50390
+    },
+    {
+      "epoch": 1.9933160632007751,
+      "grad_norm": 1.7689707022600436,
+      "learning_rate": 3.0552180110065287e-06,
+      "loss": 0.7452,
+      "step": 50400
+    },
+    {
+      "epoch": 1.9937115624196642,
+      "grad_norm": 1.508851741090035,
+      "learning_rate": 3.053098412430314e-06,
+      "loss": 0.7188,
+      "step": 50410
+    },
+    {
+      "epoch": 1.9941070616385532,
+      "grad_norm": 1.5179117928795338,
+      "learning_rate": 3.050979226174501e-06,
+      "loss": 0.7463,
+      "step": 50420
+    },
+    {
+      "epoch": 1.9945025608574423,
+      "grad_norm": 1.511734302566831,
+      "learning_rate": 3.0488604526878973e-06,
+      "loss": 0.7679,
+      "step": 50430
+    },
+    {
+      "epoch": 1.9948980600763313,
+      "grad_norm": 1.3499716359148253,
+      "learning_rate": 3.0467420924192222e-06,
+      "loss": 0.7718,
+      "step": 50440
+    },
+    {
+      "epoch": 1.9952935592952203,
+      "grad_norm": 1.4635847049541149,
+      "learning_rate": 3.044624145817109e-06,
+      "loss": 0.7809,
+      "step": 50450
+    },
+    {
+      "epoch": 1.9956890585141094,
+      "grad_norm": 1.6636553439630535,
+      "learning_rate": 3.0425066133301013e-06,
+      "loss": 0.7531,
+      "step": 50460
+    },
+    {
+      "epoch": 1.9960845577329986,
+      "grad_norm": 1.6397023814258374,
+      "learning_rate": 3.040389495406657e-06,
+      "loss": 0.7359,
+      "step": 50470
+    },
+    {
+      "epoch": 1.9964800569518877,
+      "grad_norm": 1.5760470468518895,
+      "learning_rate": 3.038272792495145e-06,
+      "loss": 0.7533,
+      "step": 50480
+    },
+    {
+      "epoch": 1.9968755561707767,
+      "grad_norm": 1.413545117194915,
+      "learning_rate": 3.036156505043847e-06,
+      "loss": 0.7352,
+      "step": 50490
+    },
+    {
+      "epoch": 1.9972710553896658,
+      "grad_norm": 1.482806844125226,
+      "learning_rate": 3.034040633500955e-06,
+      "loss": 0.7759,
+      "step": 50500
+    },
+    {
+      "epoch": 1.9976665546085548,
+      "grad_norm": 1.2633338655765045,
+      "learning_rate": 3.031925178314578e-06,
+      "loss": 0.7508,
+      "step": 50510
+    },
+    {
+      "epoch": 1.9980620538274438,
+      "grad_norm": 1.6399789811528698,
+      "learning_rate": 3.0298101399327296e-06,
+      "loss": 0.7427,
+      "step": 50520
+    },
+    {
+      "epoch": 1.9984575530463329,
+      "grad_norm": 2.029912877311006,
+      "learning_rate": 3.0276955188033395e-06,
+      "loss": 0.7421,
+      "step": 50530
+    },
+    {
+      "epoch": 1.998853052265222,
+      "grad_norm": 1.2634810149622835,
+      "learning_rate": 3.0255813153742488e-06,
+      "loss": 0.7576,
+      "step": 50540
+    },
+    {
+      "epoch": 1.999248551484111,
+      "grad_norm": 1.385864567449702,
+      "learning_rate": 3.0234675300932093e-06,
+      "loss": 0.7567,
+      "step": 50550
+    },
+    {
+      "epoch": 1.999644050703,
+      "grad_norm": 1.5902270149194355,
+      "learning_rate": 3.0213541634078847e-06,
+      "loss": 0.6993,
+      "step": 50560
+    },
+    {
+      "epoch": 2.000039549921889,
+      "grad_norm": 1.2503498514691747,
+      "learning_rate": 3.019241215765849e-06,
+      "loss": 0.7206,
+      "step": 50570
+    },
+    {
+      "epoch": 2.000435049140778,
+      "grad_norm": 1.5242846812726885,
+      "learning_rate": 3.01712868761459e-06,
+      "loss": 0.6639,
+      "step": 50580
+    },
+    {
+      "epoch": 2.000830548359667,
+      "grad_norm": 1.2428219186475273,
+      "learning_rate": 3.0150165794015055e-06,
+      "loss": 0.7109,
+      "step": 50590
+    },
+    {
+      "epoch": 2.001226047578556,
+      "grad_norm": 1.3345865451016519,
+      "learning_rate": 3.0129048915739013e-06,
+      "loss": 0.7023,
+      "step": 50600
+    },
+    {
+      "epoch": 2.001621546797445,
+      "grad_norm": 1.283190225337282,
+      "learning_rate": 3.010793624578997e-06,
+      "loss": 0.688,
+      "step": 50610
+    },
+    {
+      "epoch": 2.0020170460163342,
+      "grad_norm": 1.3809873563864985,
+      "learning_rate": 3.0086827788639233e-06,
+      "loss": 0.6404,
+      "step": 50620
+    },
+    {
+      "epoch": 2.0024125452352233,
+      "grad_norm": 1.5080959929557487,
+      "learning_rate": 3.006572354875722e-06,
+      "loss": 0.663,
+      "step": 50630
+    },
+    {
+      "epoch": 2.0028080444541123,
+      "grad_norm": 1.407609094848851,
+      "learning_rate": 3.0044623530613437e-06,
+      "loss": 0.6746,
+      "step": 50640
+    },
+    {
+      "epoch": 2.0032035436730014,
+      "grad_norm": 1.849854331500034,
+      "learning_rate": 3.0023527738676518e-06,
+      "loss": 0.6774,
+      "step": 50650
+    },
+    {
+      "epoch": 2.0035990428918904,
+      "grad_norm": 1.6437960896583887,
+      "learning_rate": 3.0002436177414175e-06,
+      "loss": 0.6708,
+      "step": 50660
+    },
+    {
+      "epoch": 2.0039945421107794,
+      "grad_norm": 1.3265830976424084,
+      "learning_rate": 2.9981348851293254e-06,
+      "loss": 0.7002,
+      "step": 50670
+    },
+    {
+      "epoch": 2.0043900413296685,
+      "grad_norm": 1.6576160939199058,
+      "learning_rate": 2.996026576477967e-06,
+      "loss": 0.6894,
+      "step": 50680
+    },
+    {
+      "epoch": 2.0047855405485575,
+      "grad_norm": 1.3296136604093665,
+      "learning_rate": 2.99391869223385e-06,
+      "loss": 0.6512,
+      "step": 50690
+    },
+    {
+      "epoch": 2.0051810397674465,
+      "grad_norm": 1.3516789424581421,
+      "learning_rate": 2.9918112328433835e-06,
+      "loss": 0.7059,
+      "step": 50700
+    },
+    {
+      "epoch": 2.0055765389863356,
+      "grad_norm": 1.59714220712334,
+      "learning_rate": 2.9897041987528943e-06,
+      "loss": 0.6825,
+      "step": 50710
+    },
+    {
+      "epoch": 2.0059720382052246,
+      "grad_norm": 1.3536228274613509,
+      "learning_rate": 2.987597590408614e-06,
+      "loss": 0.7068,
+      "step": 50720
+    },
+    {
+      "epoch": 2.0063675374241137,
+      "grad_norm": 1.4035241484722534,
+      "learning_rate": 2.9854914082566876e-06,
+      "loss": 0.678,
+      "step": 50730
+    },
+    {
+      "epoch": 2.0067630366430027,
+      "grad_norm": 1.3902323666999807,
+      "learning_rate": 2.9833856527431686e-06,
+      "loss": 0.6882,
+      "step": 50740
+    },
+    {
+      "epoch": 2.0071585358618917,
+      "grad_norm": 1.379686579583006,
+      "learning_rate": 2.9812803243140188e-06,
+      "loss": 0.7059,
+      "step": 50750
+    },
+    {
+      "epoch": 2.007554035080781,
+      "grad_norm": 1.3427615297738003,
+      "learning_rate": 2.9791754234151106e-06,
+      "loss": 0.6908,
+      "step": 50760
+    },
+    {
+      "epoch": 2.00794953429967,
+      "grad_norm": 1.3745683001797573,
+      "learning_rate": 2.9770709504922266e-06,
+      "loss": 0.6483,
+      "step": 50770
+    },
+    {
+      "epoch": 2.008345033518559,
+      "grad_norm": 1.5354490076683796,
+      "learning_rate": 2.9749669059910586e-06,
+      "loss": 0.6949,
+      "step": 50780
+    },
+    {
+      "epoch": 2.008740532737448,
+      "grad_norm": 1.3935711216163609,
+      "learning_rate": 2.9728632903572065e-06,
+      "loss": 0.6699,
+      "step": 50790
+    },
+    {
+      "epoch": 2.009136031956337,
+      "grad_norm": 1.6777422963852027,
+      "learning_rate": 2.97076010403618e-06,
+      "loss": 0.7059,
+      "step": 50800
+    },
+    {
+      "epoch": 2.009531531175226,
+      "grad_norm": 1.2663557314927076,
+      "learning_rate": 2.9686573474733983e-06,
+      "loss": 0.6945,
+      "step": 50810
+    },
+    {
+      "epoch": 2.009927030394115,
+      "grad_norm": 1.4118856584692836,
+      "learning_rate": 2.9665550211141884e-06,
+      "loss": 0.668,
+      "step": 50820
+    },
+    {
+      "epoch": 2.010322529613004,
+      "grad_norm": 1.3243353188551914,
+      "learning_rate": 2.964453125403789e-06,
+      "loss": 0.6685,
+      "step": 50830
+    },
+    {
+      "epoch": 2.010718028831893,
+      "grad_norm": 1.2969200377065218,
+      "learning_rate": 2.962351660787345e-06,
+      "loss": 0.6806,
+      "step": 50840
+    },
+    {
+      "epoch": 2.011113528050782,
+      "grad_norm": 1.6311053422237278,
+      "learning_rate": 2.9602506277099085e-06,
+      "loss": 0.6719,
+      "step": 50850
+    },
+    {
+      "epoch": 2.011509027269671,
+      "grad_norm": 1.3307741847343124,
+      "learning_rate": 2.958150026616443e-06,
+      "loss": 0.6599,
+      "step": 50860
+    },
+    {
+      "epoch": 2.01190452648856,
+      "grad_norm": 1.4835150032198157,
+      "learning_rate": 2.9560498579518216e-06,
+      "loss": 0.6673,
+      "step": 50870
+    },
+    {
+      "epoch": 2.0123000257074493,
+      "grad_norm": 1.385214996983982,
+      "learning_rate": 2.9539501221608225e-06,
+      "loss": 0.6456,
+      "step": 50880
+    },
+    {
+      "epoch": 2.0126955249263383,
+      "grad_norm": 1.2772499801401835,
+      "learning_rate": 2.951850819688134e-06,
+      "loss": 0.6645,
+      "step": 50890
+    },
+    {
+      "epoch": 2.0130910241452273,
+      "grad_norm": 1.2386461563456654,
+      "learning_rate": 2.9497519509783524e-06,
+      "loss": 0.7074,
+      "step": 50900
+    },
+    {
+      "epoch": 2.0134865233641164,
+      "grad_norm": 1.5315591541942783,
+      "learning_rate": 2.9476535164759827e-06,
+      "loss": 0.6873,
+      "step": 50910
+    },
+    {
+      "epoch": 2.0138820225830054,
+      "grad_norm": 1.3425740280079377,
+      "learning_rate": 2.945555516625438e-06,
+      "loss": 0.679,
+      "step": 50920
+    },
+    {
+      "epoch": 2.0142775218018945,
+      "grad_norm": 1.3319633447164219,
+      "learning_rate": 2.943457951871037e-06,
+      "loss": 0.6709,
+      "step": 50930
+    },
+    {
+      "epoch": 2.0146730210207835,
+      "grad_norm": 1.3179112283657646,
+      "learning_rate": 2.941360822657008e-06,
+      "loss": 0.6874,
+      "step": 50940
+    },
+    {
+      "epoch": 2.0150685202396725,
+      "grad_norm": 1.3017835976189571,
+      "learning_rate": 2.939264129427489e-06,
+      "loss": 0.6543,
+      "step": 50950
+    },
+    {
+      "epoch": 2.0154640194585616,
+      "grad_norm": 1.3499179718961225,
+      "learning_rate": 2.9371678726265223e-06,
+      "loss": 0.6689,
+      "step": 50960
+    },
+    {
+      "epoch": 2.0158595186774506,
+      "grad_norm": 1.6671297883313765,
+      "learning_rate": 2.9350720526980592e-06,
+      "loss": 0.679,
+      "step": 50970
+    },
+    {
+      "epoch": 2.0162550178963397,
+      "grad_norm": 1.4018498982984247,
+      "learning_rate": 2.9329766700859586e-06,
+      "loss": 0.6512,
+      "step": 50980
+    },
+    {
+      "epoch": 2.0166505171152287,
+      "grad_norm": 1.4676454889004524,
+      "learning_rate": 2.930881725233986e-06,
+      "loss": 0.6605,
+      "step": 50990
+    },
+    {
+      "epoch": 2.0170460163341177,
+      "grad_norm": 1.3274383695240082,
+      "learning_rate": 2.928787218585816e-06,
+      "loss": 0.6733,
+      "step": 51000
+    },
+    {
+      "epoch": 2.0174415155530068,
+      "grad_norm": 1.5160342524855654,
+      "learning_rate": 2.926693150585028e-06,
+      "loss": 0.6805,
+      "step": 51010
+    },
+    {
+      "epoch": 2.017837014771896,
+      "grad_norm": 1.448429933318291,
+      "learning_rate": 2.9245995216751113e-06,
+      "loss": 0.6798,
+      "step": 51020
+    },
+    {
+      "epoch": 2.018232513990785,
+      "grad_norm": 1.4160325991828986,
+      "learning_rate": 2.922506332299459e-06,
+      "loss": 0.6567,
+      "step": 51030
+    },
+    {
+      "epoch": 2.018628013209674,
+      "grad_norm": 1.6586108652650249,
+      "learning_rate": 2.9204135829013735e-06,
+      "loss": 0.6531,
+      "step": 51040
+    },
+    {
+      "epoch": 2.019023512428563,
+      "grad_norm": 1.260584164412188,
+      "learning_rate": 2.9183212739240647e-06,
+      "loss": 0.7015,
+      "step": 51050
+    },
+    {
+      "epoch": 2.019419011647452,
+      "grad_norm": 1.331985372217651,
+      "learning_rate": 2.9162294058106444e-06,
+      "loss": 0.6808,
+      "step": 51060
+    },
+    {
+      "epoch": 2.019814510866341,
+      "grad_norm": 1.497903133786886,
+      "learning_rate": 2.914137979004138e-06,
+      "loss": 0.6536,
+      "step": 51070
+    },
+    {
+      "epoch": 2.02021001008523,
+      "grad_norm": 1.6507433804618468,
+      "learning_rate": 2.9120469939474728e-06,
+      "loss": 0.6804,
+      "step": 51080
+    },
+    {
+      "epoch": 2.020605509304119,
+      "grad_norm": 1.2715119936228625,
+      "learning_rate": 2.90995645108348e-06,
+      "loss": 0.6826,
+      "step": 51090
+    },
+    {
+      "epoch": 2.021001008523008,
+      "grad_norm": 1.4486293293535164,
+      "learning_rate": 2.9078663508549064e-06,
+      "loss": 0.6924,
+      "step": 51100
+    },
+    {
+      "epoch": 2.021396507741897,
+      "grad_norm": 1.2879757972428043,
+      "learning_rate": 2.9057766937043975e-06,
+      "loss": 0.6922,
+      "step": 51110
+    },
+    {
+      "epoch": 2.021792006960786,
+      "grad_norm": 1.5685520761087486,
+      "learning_rate": 2.9036874800745035e-06,
+      "loss": 0.6794,
+      "step": 51120
+    },
+    {
+      "epoch": 2.0221875061796752,
+      "grad_norm": 1.3158964222984417,
+      "learning_rate": 2.9015987104076893e-06,
+      "loss": 0.6895,
+      "step": 51130
+    },
+    {
+      "epoch": 2.0225830053985643,
+      "grad_norm": 1.3960220197219224,
+      "learning_rate": 2.899510385146316e-06,
+      "loss": 0.6938,
+      "step": 51140
+    },
+    {
+      "epoch": 2.0229785046174533,
+      "grad_norm": 1.2181485021077414,
+      "learning_rate": 2.897422504732659e-06,
+      "loss": 0.69,
+      "step": 51150
+    },
+    {
+      "epoch": 2.0233740038363424,
+      "grad_norm": 1.2331212391951465,
+      "learning_rate": 2.895335069608891e-06,
+      "loss": 0.6804,
+      "step": 51160
+    },
+    {
+      "epoch": 2.0237695030552314,
+      "grad_norm": 1.3131492109852732,
+      "learning_rate": 2.8932480802171005e-06,
+      "loss": 0.686,
+      "step": 51170
+    },
+    {
+      "epoch": 2.0241650022741204,
+      "grad_norm": 1.439822595920632,
+      "learning_rate": 2.891161536999271e-06,
+      "loss": 0.6678,
+      "step": 51180
+    },
+    {
+      "epoch": 2.0245605014930095,
+      "grad_norm": 1.2464753737473369,
+      "learning_rate": 2.8890754403973015e-06,
+      "loss": 0.7049,
+      "step": 51190
+    },
+    {
+      "epoch": 2.0249560007118985,
+      "grad_norm": 1.2402150879044864,
+      "learning_rate": 2.8869897908529885e-06,
+      "loss": 0.6532,
+      "step": 51200
+    },
+    {
+      "epoch": 2.0253514999307876,
+      "grad_norm": 1.5991207384443442,
+      "learning_rate": 2.8849045888080375e-06,
+      "loss": 0.6795,
+      "step": 51210
+    },
+    {
+      "epoch": 2.0257469991496766,
+      "grad_norm": 1.5347594990005007,
+      "learning_rate": 2.8828198347040567e-06,
+      "loss": 0.6552,
+      "step": 51220
+    },
+    {
+      "epoch": 2.0261424983685656,
+      "grad_norm": 1.608453122797535,
+      "learning_rate": 2.8807355289825645e-06,
+      "loss": 0.7042,
+      "step": 51230
+    },
+    {
+      "epoch": 2.0265379975874547,
+      "grad_norm": 1.8781780266172596,
+      "learning_rate": 2.878651672084978e-06,
+      "loss": 0.6463,
+      "step": 51240
+    },
+    {
+      "epoch": 2.0269334968063437,
+      "grad_norm": 1.6421720318587067,
+      "learning_rate": 2.876568264452625e-06,
+      "loss": 0.6593,
+      "step": 51250
+    },
+    {
+      "epoch": 2.0273289960252328,
+      "grad_norm": 1.5979964200325398,
+      "learning_rate": 2.874485306526733e-06,
+      "loss": 0.6739,
+      "step": 51260
+    },
+    {
+      "epoch": 2.027724495244122,
+      "grad_norm": 1.4108229885269192,
+      "learning_rate": 2.8724027987484417e-06,
+      "loss": 0.6759,
+      "step": 51270
+    },
+    {
+      "epoch": 2.028119994463011,
+      "grad_norm": 1.6001752683500097,
+      "learning_rate": 2.870320741558785e-06,
+      "loss": 0.6651,
+      "step": 51280
+    },
+    {
+      "epoch": 2.0285154936819,
+      "grad_norm": 1.4589700924840185,
+      "learning_rate": 2.8682391353987087e-06,
+      "loss": 0.7056,
+      "step": 51290
+    },
+    {
+      "epoch": 2.028910992900789,
+      "grad_norm": 1.79256884361263,
+      "learning_rate": 2.8661579807090634e-06,
+      "loss": 0.6485,
+      "step": 51300
+    },
+    {
+      "epoch": 2.029306492119678,
+      "grad_norm": 1.5533611885916156,
+      "learning_rate": 2.8640772779305985e-06,
+      "loss": 0.6788,
+      "step": 51310
+    },
+    {
+      "epoch": 2.029701991338567,
+      "grad_norm": 1.2682116196971873,
+      "learning_rate": 2.8619970275039755e-06,
+      "loss": 0.6764,
+      "step": 51320
+    },
+    {
+      "epoch": 2.030097490557456,
+      "grad_norm": 1.409051980441851,
+      "learning_rate": 2.8599172298697526e-06,
+      "loss": 0.67,
+      "step": 51330
+    },
+    {
+      "epoch": 2.030492989776345,
+      "grad_norm": 1.599522980345227,
+      "learning_rate": 2.857837885468396e-06,
+      "loss": 0.6834,
+      "step": 51340
+    },
+    {
+      "epoch": 2.030888488995234,
+      "grad_norm": 1.4713719749726173,
+      "learning_rate": 2.855758994740274e-06,
+      "loss": 0.7017,
+      "step": 51350
+    },
+    {
+      "epoch": 2.031283988214123,
+      "grad_norm": 1.6546618583822588,
+      "learning_rate": 2.853680558125663e-06,
+      "loss": 0.6281,
+      "step": 51360
+    },
+    {
+      "epoch": 2.031679487433012,
+      "grad_norm": 1.6168192095449252,
+      "learning_rate": 2.851602576064737e-06,
+      "loss": 0.6866,
+      "step": 51370
+    },
+    {
+      "epoch": 2.0320749866519012,
+      "grad_norm": 1.41902590109875,
+      "learning_rate": 2.8495250489975806e-06,
+      "loss": 0.7171,
+      "step": 51380
+    },
+    {
+      "epoch": 2.0324704858707903,
+      "grad_norm": 1.511544137845957,
+      "learning_rate": 2.8474479773641737e-06,
+      "loss": 0.684,
+      "step": 51390
+    },
+    {
+      "epoch": 2.0328659850896793,
+      "grad_norm": 1.4672154781081783,
+      "learning_rate": 2.8453713616044106e-06,
+      "loss": 0.7136,
+      "step": 51400
+    },
+    {
+      "epoch": 2.0332614843085683,
+      "grad_norm": 1.3807271315009328,
+      "learning_rate": 2.843295202158077e-06,
+      "loss": 0.6548,
+      "step": 51410
+    },
+    {
+      "epoch": 2.0336569835274574,
+      "grad_norm": 1.7527289993783621,
+      "learning_rate": 2.841219499464872e-06,
+      "loss": 0.6813,
+      "step": 51420
+    },
+    {
+      "epoch": 2.0340524827463464,
+      "grad_norm": 1.6253010217478865,
+      "learning_rate": 2.839144253964391e-06,
+      "loss": 0.6417,
+      "step": 51430
+    },
+    {
+      "epoch": 2.0344479819652355,
+      "grad_norm": 1.318797320732602,
+      "learning_rate": 2.8370694660961386e-06,
+      "loss": 0.6769,
+      "step": 51440
+    },
+    {
+      "epoch": 2.0348434811841245,
+      "grad_norm": 1.4737748701668791,
+      "learning_rate": 2.8349951362995177e-06,
+      "loss": 0.6977,
+      "step": 51450
+    },
+    {
+      "epoch": 2.0352389804030135,
+      "grad_norm": 1.428404610531691,
+      "learning_rate": 2.832921265013835e-06,
+      "loss": 0.6444,
+      "step": 51460
+    },
+    {
+      "epoch": 2.0356344796219026,
+      "grad_norm": 1.5207348819296758,
+      "learning_rate": 2.8308478526783e-06,
+      "loss": 0.6645,
+      "step": 51470
+    },
+    {
+      "epoch": 2.0360299788407916,
+      "grad_norm": 1.4151771378902087,
+      "learning_rate": 2.8287748997320296e-06,
+      "loss": 0.679,
+      "step": 51480
+    },
+    {
+      "epoch": 2.0364254780596807,
+      "grad_norm": 1.3974089996903323,
+      "learning_rate": 2.826702406614036e-06,
+      "loss": 0.6933,
+      "step": 51490
+    },
+    {
+      "epoch": 2.0368209772785697,
+      "grad_norm": 1.4122223333777773,
+      "learning_rate": 2.82463037376324e-06,
+      "loss": 0.6686,
+      "step": 51500
+    },
+    {
+      "epoch": 2.0372164764974587,
+      "grad_norm": 1.43840570170672,
+      "learning_rate": 2.8225588016184635e-06,
+      "loss": 0.6822,
+      "step": 51510
+    },
+    {
+      "epoch": 2.037611975716348,
+      "grad_norm": 1.1813850276128999,
+      "learning_rate": 2.8204876906184255e-06,
+      "loss": 0.694,
+      "step": 51520
+    },
+    {
+      "epoch": 2.038007474935237,
+      "grad_norm": 1.8225240542674708,
+      "learning_rate": 2.8184170412017576e-06,
+      "loss": 0.6667,
+      "step": 51530
+    },
+    {
+      "epoch": 2.038402974154126,
+      "grad_norm": 1.4965125674565252,
+      "learning_rate": 2.8163468538069823e-06,
+      "loss": 0.6732,
+      "step": 51540
+    },
+    {
+      "epoch": 2.038798473373015,
+      "grad_norm": 1.448677894727122,
+      "learning_rate": 2.8142771288725345e-06,
+      "loss": 0.6772,
+      "step": 51550
+    },
+    {
+      "epoch": 2.039193972591904,
+      "grad_norm": 1.3194978096594925,
+      "learning_rate": 2.8122078668367435e-06,
+      "loss": 0.6793,
+      "step": 51560
+    },
+    {
+      "epoch": 2.039589471810793,
+      "grad_norm": 1.5670614469670083,
+      "learning_rate": 2.810139068137848e-06,
+      "loss": 0.6789,
+      "step": 51570
+    },
+    {
+      "epoch": 2.039984971029682,
+      "grad_norm": 1.4063689584703747,
+      "learning_rate": 2.808070733213977e-06,
+      "loss": 0.677,
+      "step": 51580
+    },
+    {
+      "epoch": 2.040380470248571,
+      "grad_norm": 1.5446635384179739,
+      "learning_rate": 2.806002862503174e-06,
+      "loss": 0.679,
+      "step": 51590
+    },
+    {
+      "epoch": 2.04077596946746,
+      "grad_norm": 1.315420834173595,
+      "learning_rate": 2.8039354564433746e-06,
+      "loss": 0.665,
+      "step": 51600
+    },
+    {
+      "epoch": 2.041171468686349,
+      "grad_norm": 1.4561359291183944,
+      "learning_rate": 2.8018685154724246e-06,
+      "loss": 0.6514,
+      "step": 51610
+    },
+    {
+      "epoch": 2.041566967905238,
+      "grad_norm": 1.4501766365352706,
+      "learning_rate": 2.799802040028062e-06,
+      "loss": 0.6785,
+      "step": 51620
+    },
+    {
+      "epoch": 2.041962467124127,
+      "grad_norm": 1.643693782075612,
+      "learning_rate": 2.797736030547935e-06,
+      "loss": 0.6798,
+      "step": 51630
+    },
+    {
+      "epoch": 2.0423579663430163,
+      "grad_norm": 1.4187275409018834,
+      "learning_rate": 2.795670487469585e-06,
+      "loss": 0.6926,
+      "step": 51640
+    },
+    {
+      "epoch": 2.0427534655619057,
+      "grad_norm": 1.4312540627714883,
+      "learning_rate": 2.793605411230463e-06,
+      "loss": 0.6528,
+      "step": 51650
+    },
+    {
+      "epoch": 2.0431489647807948,
+      "grad_norm": 1.7056937881056342,
+      "learning_rate": 2.7915408022679126e-06,
+      "loss": 0.6832,
+      "step": 51660
+    },
+    {
+      "epoch": 2.043544463999684,
+      "grad_norm": 1.3243362374528815,
+      "learning_rate": 2.789476661019186e-06,
+      "loss": 0.6756,
+      "step": 51670
+    },
+    {
+      "epoch": 2.043939963218573,
+      "grad_norm": 1.3395807097369474,
+      "learning_rate": 2.78741298792143e-06,
+      "loss": 0.6727,
+      "step": 51680
+    },
+    {
+      "epoch": 2.044335462437462,
+      "grad_norm": 1.2042112999230044,
+      "learning_rate": 2.7853497834117005e-06,
+      "loss": 0.6761,
+      "step": 51690
+    },
+    {
+      "epoch": 2.044730961656351,
+      "grad_norm": 1.4510951398769156,
+      "learning_rate": 2.7832870479269414e-06,
+      "loss": 0.6873,
+      "step": 51700
+    },
+    {
+      "epoch": 2.04512646087524,
+      "grad_norm": 1.4410161339203527,
+      "learning_rate": 2.7812247819040105e-06,
+      "loss": 0.665,
+      "step": 51710
+    },
+    {
+      "epoch": 2.045521960094129,
+      "grad_norm": 1.4850022883181755,
+      "learning_rate": 2.779162985779655e-06,
+      "loss": 0.6752,
+      "step": 51720
+    },
+    {
+      "epoch": 2.045917459313018,
+      "grad_norm": 1.3824240175203364,
+      "learning_rate": 2.7771016599905354e-06,
+      "loss": 0.6675,
+      "step": 51730
+    },
+    {
+      "epoch": 2.046312958531907,
+      "grad_norm": 1.57840654228568,
+      "learning_rate": 2.7750408049731976e-06,
+      "loss": 0.6751,
+      "step": 51740
+    },
+    {
+      "epoch": 2.046708457750796,
+      "grad_norm": 1.5595061154067895,
+      "learning_rate": 2.7729804211641008e-06,
+      "loss": 0.661,
+      "step": 51750
+    },
+    {
+      "epoch": 2.047103956969685,
+      "grad_norm": 1.4522336237441211,
+      "learning_rate": 2.7709205089995983e-06,
+      "loss": 0.6903,
+      "step": 51760
+    },
+    {
+      "epoch": 2.047499456188574,
+      "grad_norm": 1.3382892906889068,
+      "learning_rate": 2.76886106891594e-06,
+      "loss": 0.6531,
+      "step": 51770
+    },
+    {
+      "epoch": 2.0478949554074632,
+      "grad_norm": 1.5907294576561035,
+      "learning_rate": 2.7668021013492853e-06,
+      "loss": 0.6634,
+      "step": 51780
+    },
+    {
+      "epoch": 2.0482904546263523,
+      "grad_norm": 1.2589676471675642,
+      "learning_rate": 2.7647436067356837e-06,
+      "loss": 0.688,
+      "step": 51790
+    },
+    {
+      "epoch": 2.0486859538452413,
+      "grad_norm": 1.4836115450633147,
+      "learning_rate": 2.7626855855110933e-06,
+      "loss": 0.6804,
+      "step": 51800
+    },
+    {
+      "epoch": 2.0490814530641304,
+      "grad_norm": 1.2705691695055399,
+      "learning_rate": 2.7606280381113647e-06,
+      "loss": 0.6528,
+      "step": 51810
+    },
+    {
+      "epoch": 2.0494769522830194,
+      "grad_norm": 1.5990336497105846,
+      "learning_rate": 2.758570964972256e-06,
+      "loss": 0.6858,
+      "step": 51820
+    },
+    {
+      "epoch": 2.0498724515019084,
+      "grad_norm": 1.6320340247285556,
+      "learning_rate": 2.7565143665294113e-06,
+      "loss": 0.6502,
+      "step": 51830
+    },
+    {
+      "epoch": 2.0502679507207975,
+      "grad_norm": 1.3638568833601041,
+      "learning_rate": 2.754458243218391e-06,
+      "loss": 0.6913,
+      "step": 51840
+    },
+    {
+      "epoch": 2.0506634499396865,
+      "grad_norm": 1.6690454724771784,
+      "learning_rate": 2.7524025954746416e-06,
+      "loss": 0.6843,
+      "step": 51850
+    },
+    {
+      "epoch": 2.0510589491585756,
+      "grad_norm": 1.5581749548571813,
+      "learning_rate": 2.7503474237335178e-06,
+      "loss": 0.668,
+      "step": 51860
+    },
+    {
+      "epoch": 2.0514544483774646,
+      "grad_norm": 1.5845702276915152,
+      "learning_rate": 2.7482927284302664e-06,
+      "loss": 0.6642,
+      "step": 51870
+    },
+    {
+      "epoch": 2.0518499475963536,
+      "grad_norm": 1.18781631151122,
+      "learning_rate": 2.74623851000004e-06,
+      "loss": 0.67,
+      "step": 51880
+    },
+    {
+      "epoch": 2.0522454468152427,
+      "grad_norm": 1.38478460380908,
+      "learning_rate": 2.7441847688778837e-06,
+      "loss": 0.6691,
+      "step": 51890
+    },
+    {
+      "epoch": 2.0526409460341317,
+      "grad_norm": 1.4919508849025567,
+      "learning_rate": 2.7421315054987485e-06,
+      "loss": 0.6953,
+      "step": 51900
+    },
+    {
+      "epoch": 2.0530364452530208,
+      "grad_norm": 1.409645879476153,
+      "learning_rate": 2.740078720297476e-06,
+      "loss": 0.6498,
+      "step": 51910
+    },
+    {
+      "epoch": 2.05343194447191,
+      "grad_norm": 1.3887768232572557,
+      "learning_rate": 2.7380264137088152e-06,
+      "loss": 0.6593,
+      "step": 51920
+    },
+    {
+      "epoch": 2.053827443690799,
+      "grad_norm": 1.5356611157694373,
+      "learning_rate": 2.735974586167407e-06,
+      "loss": 0.6548,
+      "step": 51930
+    },
+    {
+      "epoch": 2.054222942909688,
+      "grad_norm": 1.4512133736684516,
+      "learning_rate": 2.7339232381077947e-06,
+      "loss": 0.6903,
+      "step": 51940
+    },
+    {
+      "epoch": 2.054618442128577,
+      "grad_norm": 1.325614361693809,
+      "learning_rate": 2.7318723699644144e-06,
+      "loss": 0.6828,
+      "step": 51950
+    },
+    {
+      "epoch": 2.055013941347466,
+      "grad_norm": 1.4889499623905664,
+      "learning_rate": 2.729821982171611e-06,
+      "loss": 0.6895,
+      "step": 51960
+    },
+    {
+      "epoch": 2.055409440566355,
+      "grad_norm": 1.2440146129922824,
+      "learning_rate": 2.727772075163617e-06,
+      "loss": 0.6655,
+      "step": 51970
+    },
+    {
+      "epoch": 2.055804939785244,
+      "grad_norm": 1.3290034827721968,
+      "learning_rate": 2.72572264937457e-06,
+      "loss": 0.6594,
+      "step": 51980
+    },
+    {
+      "epoch": 2.056200439004133,
+      "grad_norm": 1.5596145538617112,
+      "learning_rate": 2.7236737052385042e-06,
+      "loss": 0.6908,
+      "step": 51990
+    },
+    {
+      "epoch": 2.056595938223022,
+      "grad_norm": 1.570329150959247,
+      "learning_rate": 2.7216252431893463e-06,
+      "loss": 0.654,
+      "step": 52000
+    },
+    {
+      "epoch": 2.056991437441911,
+      "grad_norm": 1.4310978721575889,
+      "learning_rate": 2.71957726366093e-06,
+      "loss": 0.6782,
+      "step": 52010
+    },
+    {
+      "epoch": 2.0573869366608,
+      "grad_norm": 1.4169390400429067,
+      "learning_rate": 2.717529767086979e-06,
+      "loss": 0.678,
+      "step": 52020
+    },
+    {
+      "epoch": 2.0577824358796892,
+      "grad_norm": 1.5811118727831657,
+      "learning_rate": 2.715482753901122e-06,
+      "loss": 0.6665,
+      "step": 52030
+    },
+    {
+      "epoch": 2.0581779350985783,
+      "grad_norm": 1.794965533863638,
+      "learning_rate": 2.713436224536876e-06,
+      "loss": 0.6806,
+      "step": 52040
+    },
+    {
+      "epoch": 2.0585734343174673,
+      "grad_norm": 1.238557125605881,
+      "learning_rate": 2.7113901794276666e-06,
+      "loss": 0.6479,
+      "step": 52050
+    },
+    {
+      "epoch": 2.0589689335363563,
+      "grad_norm": 1.374570653604843,
+      "learning_rate": 2.709344619006808e-06,
+      "loss": 0.6614,
+      "step": 52060
+    },
+    {
+      "epoch": 2.0593644327552454,
+      "grad_norm": 1.5715357755839472,
+      "learning_rate": 2.7072995437075152e-06,
+      "loss": 0.6566,
+      "step": 52070
+    },
+    {
+      "epoch": 2.0597599319741344,
+      "grad_norm": 1.452515575478075,
+      "learning_rate": 2.705254953962898e-06,
+      "loss": 0.6581,
+      "step": 52080
+    },
+    {
+      "epoch": 2.0601554311930235,
+      "grad_norm": 1.4301612305510367,
+      "learning_rate": 2.7032108502059696e-06,
+      "loss": 0.6876,
+      "step": 52090
+    },
+    {
+      "epoch": 2.0605509304119125,
+      "grad_norm": 1.620012252015546,
+      "learning_rate": 2.7011672328696316e-06,
+      "loss": 0.6767,
+      "step": 52100
+    },
+    {
+      "epoch": 2.0609464296308015,
+      "grad_norm": 1.4335361240090754,
+      "learning_rate": 2.6991241023866922e-06,
+      "loss": 0.6487,
+      "step": 52110
+    },
+    {
+      "epoch": 2.0613419288496906,
+      "grad_norm": 1.4164320536519117,
+      "learning_rate": 2.6970814591898465e-06,
+      "loss": 0.6725,
+      "step": 52120
+    },
+    {
+      "epoch": 2.0617374280685796,
+      "grad_norm": 1.7188702025251388,
+      "learning_rate": 2.695039303711696e-06,
+      "loss": 0.6446,
+      "step": 52130
+    },
+    {
+      "epoch": 2.0621329272874687,
+      "grad_norm": 1.6467002183447301,
+      "learning_rate": 2.6929976363847298e-06,
+      "loss": 0.6744,
+      "step": 52140
+    },
+    {
+      "epoch": 2.0625284265063577,
+      "grad_norm": 1.260646261849889,
+      "learning_rate": 2.690956457641343e-06,
+      "loss": 0.6742,
+      "step": 52150
+    },
+    {
+      "epoch": 2.0629239257252467,
+      "grad_norm": 1.5837327861819737,
+      "learning_rate": 2.688915767913819e-06,
+      "loss": 0.6565,
+      "step": 52160
+    },
+    {
+      "epoch": 2.063319424944136,
+      "grad_norm": 1.5758511586232795,
+      "learning_rate": 2.6868755676343388e-06,
+      "loss": 0.6737,
+      "step": 52170
+    },
+    {
+      "epoch": 2.063714924163025,
+      "grad_norm": 1.3234507251222656,
+      "learning_rate": 2.684835857234987e-06,
+      "loss": 0.6814,
+      "step": 52180
+    },
+    {
+      "epoch": 2.064110423381914,
+      "grad_norm": 1.51400259042956,
+      "learning_rate": 2.6827966371477365e-06,
+      "loss": 0.6712,
+      "step": 52190
+    },
+    {
+      "epoch": 2.064505922600803,
+      "grad_norm": 1.6393694577024078,
+      "learning_rate": 2.680757907804458e-06,
+      "loss": 0.6886,
+      "step": 52200
+    },
+    {
+      "epoch": 2.064901421819692,
+      "grad_norm": 1.8605484471750249,
+      "learning_rate": 2.6787196696369226e-06,
+      "loss": 0.6625,
+      "step": 52210
+    },
+    {
+      "epoch": 2.065296921038581,
+      "grad_norm": 1.3868704108873273,
+      "learning_rate": 2.6766819230767927e-06,
+      "loss": 0.6959,
+      "step": 52220
+    },
+    {
+      "epoch": 2.06569242025747,
+      "grad_norm": 1.3622943554688556,
+      "learning_rate": 2.674644668555626e-06,
+      "loss": 0.6679,
+      "step": 52230
+    },
+    {
+      "epoch": 2.066087919476359,
+      "grad_norm": 1.5666464248519238,
+      "learning_rate": 2.6726079065048817e-06,
+      "loss": 0.6923,
+      "step": 52240
+    },
+    {
+      "epoch": 2.066483418695248,
+      "grad_norm": 1.6330178441254855,
+      "learning_rate": 2.670571637355908e-06,
+      "loss": 0.6647,
+      "step": 52250
+    },
+    {
+      "epoch": 2.066878917914137,
+      "grad_norm": 1.2037696642592384,
+      "learning_rate": 2.6685358615399546e-06,
+      "loss": 0.6865,
+      "step": 52260
+    },
+    {
+      "epoch": 2.067274417133026,
+      "grad_norm": 1.3697055894795223,
+      "learning_rate": 2.6665005794881615e-06,
+      "loss": 0.6932,
+      "step": 52270
+    },
+    {
+      "epoch": 2.067669916351915,
+      "grad_norm": 1.4453789879478172,
+      "learning_rate": 2.664465791631569e-06,
+      "loss": 0.6841,
+      "step": 52280
+    },
+    {
+      "epoch": 2.0680654155708043,
+      "grad_norm": 1.5849662028931002,
+      "learning_rate": 2.662431498401108e-06,
+      "loss": 0.6824,
+      "step": 52290
+    },
+    {
+      "epoch": 2.0684609147896933,
+      "grad_norm": 1.6251055355777595,
+      "learning_rate": 2.660397700227609e-06,
+      "loss": 0.6755,
+      "step": 52300
+    },
+    {
+      "epoch": 2.0688564140085823,
+      "grad_norm": 1.6157866842704838,
+      "learning_rate": 2.658364397541795e-06,
+      "loss": 0.6747,
+      "step": 52310
+    },
+    {
+      "epoch": 2.0692519132274714,
+      "grad_norm": 1.3054970930113385,
+      "learning_rate": 2.6563315907742837e-06,
+      "loss": 0.6948,
+      "step": 52320
+    },
+    {
+      "epoch": 2.0696474124463604,
+      "grad_norm": 1.433933684809099,
+      "learning_rate": 2.654299280355588e-06,
+      "loss": 0.6765,
+      "step": 52330
+    },
+    {
+      "epoch": 2.0700429116652495,
+      "grad_norm": 1.6551965881475086,
+      "learning_rate": 2.6522674667161197e-06,
+      "loss": 0.6665,
+      "step": 52340
+    },
+    {
+      "epoch": 2.0704384108841385,
+      "grad_norm": 1.2629502516352678,
+      "learning_rate": 2.6502361502861774e-06,
+      "loss": 0.6871,
+      "step": 52350
+    },
+    {
+      "epoch": 2.0708339101030275,
+      "grad_norm": 1.35933503253559,
+      "learning_rate": 2.6482053314959643e-06,
+      "loss": 0.6796,
+      "step": 52360
+    },
+    {
+      "epoch": 2.0712294093219166,
+      "grad_norm": 1.5336653955637916,
+      "learning_rate": 2.646175010775569e-06,
+      "loss": 0.646,
+      "step": 52370
+    },
+    {
+      "epoch": 2.0716249085408056,
+      "grad_norm": 1.3641744591861404,
+      "learning_rate": 2.6441451885549817e-06,
+      "loss": 0.6822,
+      "step": 52380
+    },
+    {
+      "epoch": 2.0720204077596946,
+      "grad_norm": 1.2623193094389407,
+      "learning_rate": 2.642115865264081e-06,
+      "loss": 0.6736,
+      "step": 52390
+    },
+    {
+      "epoch": 2.0724159069785837,
+      "grad_norm": 1.5433798706176225,
+      "learning_rate": 2.640087041332646e-06,
+      "loss": 0.6872,
+      "step": 52400
+    },
+    {
+      "epoch": 2.0728114061974727,
+      "grad_norm": 1.4669634110621415,
+      "learning_rate": 2.638058717190345e-06,
+      "loss": 0.6763,
+      "step": 52410
+    },
+    {
+      "epoch": 2.0732069054163618,
+      "grad_norm": 1.3891825449392798,
+      "learning_rate": 2.6360308932667412e-06,
+      "loss": 0.6897,
+      "step": 52420
+    },
+    {
+      "epoch": 2.073602404635251,
+      "grad_norm": 1.4227344362373493,
+      "learning_rate": 2.6340035699912956e-06,
+      "loss": 0.6769,
+      "step": 52430
+    },
+    {
+      "epoch": 2.07399790385414,
+      "grad_norm": 1.5198227351741198,
+      "learning_rate": 2.631976747793359e-06,
+      "loss": 0.6787,
+      "step": 52440
+    },
+    {
+      "epoch": 2.074393403073029,
+      "grad_norm": 1.810672175998034,
+      "learning_rate": 2.629950427102178e-06,
+      "loss": 0.6567,
+      "step": 52450
+    },
+    {
+      "epoch": 2.074788902291918,
+      "grad_norm": 1.7971121818083846,
+      "learning_rate": 2.6279246083468907e-06,
+      "loss": 0.724,
+      "step": 52460
+    },
+    {
+      "epoch": 2.075184401510807,
+      "grad_norm": 1.600834365798754,
+      "learning_rate": 2.625899291956534e-06,
+      "loss": 0.6803,
+      "step": 52470
+    },
+    {
+      "epoch": 2.075579900729696,
+      "grad_norm": 1.2861592094948155,
+      "learning_rate": 2.623874478360032e-06,
+      "loss": 0.6671,
+      "step": 52480
+    },
+    {
+      "epoch": 2.075975399948585,
+      "grad_norm": 1.437374644881475,
+      "learning_rate": 2.62185016798621e-06,
+      "loss": 0.6529,
+      "step": 52490
+    },
+    {
+      "epoch": 2.076370899167474,
+      "grad_norm": 1.5306426768862746,
+      "learning_rate": 2.6198263612637763e-06,
+      "loss": 0.6386,
+      "step": 52500
+    },
+    {
+      "epoch": 2.076766398386363,
+      "grad_norm": 1.3526599147974985,
+      "learning_rate": 2.6178030586213444e-06,
+      "loss": 0.6565,
+      "step": 52510
+    },
+    {
+      "epoch": 2.077161897605252,
+      "grad_norm": 1.5255294019284535,
+      "learning_rate": 2.6157802604874107e-06,
+      "loss": 0.7065,
+      "step": 52520
+    },
+    {
+      "epoch": 2.077557396824141,
+      "grad_norm": 1.5097679189189859,
+      "learning_rate": 2.613757967290372e-06,
+      "loss": 0.7013,
+      "step": 52530
+    },
+    {
+      "epoch": 2.0779528960430302,
+      "grad_norm": 1.3806041516365242,
+      "learning_rate": 2.611736179458513e-06,
+      "loss": 0.6665,
+      "step": 52540
+    },
+    {
+      "epoch": 2.0783483952619193,
+      "grad_norm": 1.520370423641659,
+      "learning_rate": 2.609714897420018e-06,
+      "loss": 0.6847,
+      "step": 52550
+    },
+    {
+      "epoch": 2.0787438944808083,
+      "grad_norm": 1.386264884028776,
+      "learning_rate": 2.607694121602954e-06,
+      "loss": 0.6685,
+      "step": 52560
+    },
+    {
+      "epoch": 2.0791393936996974,
+      "grad_norm": 1.5455819338474668,
+      "learning_rate": 2.605673852435291e-06,
+      "loss": 0.6548,
+      "step": 52570
+    },
+    {
+      "epoch": 2.0795348929185864,
+      "grad_norm": 1.4819437150042558,
+      "learning_rate": 2.603654090344885e-06,
+      "loss": 0.6805,
+      "step": 52580
+    },
+    {
+      "epoch": 2.0799303921374754,
+      "grad_norm": 1.512918354138061,
+      "learning_rate": 2.6016348357594888e-06,
+      "loss": 0.6768,
+      "step": 52590
+    },
+    {
+      "epoch": 2.0803258913563645,
+      "grad_norm": 1.6184862258917365,
+      "learning_rate": 2.5996160891067434e-06,
+      "loss": 0.7019,
+      "step": 52600
+    },
+    {
+      "epoch": 2.0807213905752535,
+      "grad_norm": 1.4476359865777522,
+      "learning_rate": 2.5975978508141888e-06,
+      "loss": 0.6578,
+      "step": 52610
+    },
+    {
+      "epoch": 2.0811168897941426,
+      "grad_norm": 1.2904862258690026,
+      "learning_rate": 2.595580121309249e-06,
+      "loss": 0.6831,
+      "step": 52620
+    },
+    {
+      "epoch": 2.0815123890130316,
+      "grad_norm": 1.4542574663689345,
+      "learning_rate": 2.593562901019249e-06,
+      "loss": 0.6453,
+      "step": 52630
+    },
+    {
+      "epoch": 2.0819078882319206,
+      "grad_norm": 1.541047213286812,
+      "learning_rate": 2.591546190371398e-06,
+      "loss": 0.6861,
+      "step": 52640
+    },
+    {
+      "epoch": 2.0823033874508097,
+      "grad_norm": 1.6199363763865149,
+      "learning_rate": 2.5895299897928006e-06,
+      "loss": 0.6846,
+      "step": 52650
+    },
+    {
+      "epoch": 2.0826988866696987,
+      "grad_norm": 1.5446952311698432,
+      "learning_rate": 2.587514299710456e-06,
+      "loss": 0.667,
+      "step": 52660
+    },
+    {
+      "epoch": 2.0830943858885878,
+      "grad_norm": 1.576213081727735,
+      "learning_rate": 2.585499120551252e-06,
+      "loss": 0.6638,
+      "step": 52670
+    },
+    {
+      "epoch": 2.083489885107477,
+      "grad_norm": 1.2719748237983468,
+      "learning_rate": 2.583484452741967e-06,
+      "loss": 0.6748,
+      "step": 52680
+    },
+    {
+      "epoch": 2.083885384326366,
+      "grad_norm": 1.1710654079071265,
+      "learning_rate": 2.5814702967092753e-06,
+      "loss": 0.6539,
+      "step": 52690
+    },
+    {
+      "epoch": 2.084280883545255,
+      "grad_norm": 1.398658133848948,
+      "learning_rate": 2.5794566528797415e-06,
+      "loss": 0.646,
+      "step": 52700
+    },
+    {
+      "epoch": 2.084676382764144,
+      "grad_norm": 1.2338963160268046,
+      "learning_rate": 2.5774435216798167e-06,
+      "loss": 0.6865,
+      "step": 52710
+    },
+    {
+      "epoch": 2.085071881983033,
+      "grad_norm": 1.697359288983376,
+      "learning_rate": 2.575430903535853e-06,
+      "loss": 0.6939,
+      "step": 52720
+    },
+    {
+      "epoch": 2.085467381201922,
+      "grad_norm": 1.2576503902545464,
+      "learning_rate": 2.573418798874083e-06,
+      "loss": 0.7158,
+      "step": 52730
+    },
+    {
+      "epoch": 2.085862880420811,
+      "grad_norm": 1.3741506318956629,
+      "learning_rate": 2.5714072081206407e-06,
+      "loss": 0.6665,
+      "step": 52740
+    },
+    {
+      "epoch": 2.0862583796397,
+      "grad_norm": 1.3470493251290068,
+      "learning_rate": 2.5693961317015427e-06,
+      "loss": 0.6736,
+      "step": 52750
+    },
+    {
+      "epoch": 2.086653878858589,
+      "grad_norm": 1.4764124229156748,
+      "learning_rate": 2.5673855700427046e-06,
+      "loss": 0.6806,
+      "step": 52760
+    },
+    {
+      "epoch": 2.087049378077478,
+      "grad_norm": 1.588954147864825,
+      "learning_rate": 2.565375523569925e-06,
+      "loss": 0.6765,
+      "step": 52770
+    },
+    {
+      "epoch": 2.087444877296367,
+      "grad_norm": 1.5939057146452666,
+      "learning_rate": 2.5633659927089007e-06,
+      "loss": 0.6687,
+      "step": 52780
+    },
+    {
+      "epoch": 2.0878403765152562,
+      "grad_norm": 1.1449002652091103,
+      "learning_rate": 2.561356977885213e-06,
+      "loss": 0.6782,
+      "step": 52790
+    },
+    {
+      "epoch": 2.0882358757341453,
+      "grad_norm": 1.2665726635762782,
+      "learning_rate": 2.5593484795243413e-06,
+      "loss": 0.6358,
+      "step": 52800
+    },
+    {
+      "epoch": 2.0886313749530343,
+      "grad_norm": 1.2797589686314872,
+      "learning_rate": 2.557340498051644e-06,
+      "loss": 0.7054,
+      "step": 52810
+    },
+    {
+      "epoch": 2.0890268741719233,
+      "grad_norm": 1.368405470057439,
+      "learning_rate": 2.5553330338923833e-06,
+      "loss": 0.6835,
+      "step": 52820
+    },
+    {
+      "epoch": 2.0894223733908124,
+      "grad_norm": 1.5883166974820286,
+      "learning_rate": 2.5533260874717013e-06,
+      "loss": 0.7001,
+      "step": 52830
+    },
+    {
+      "epoch": 2.0898178726097014,
+      "grad_norm": 1.4782327155067416,
+      "learning_rate": 2.5513196592146393e-06,
+      "loss": 0.6664,
+      "step": 52840
+    },
+    {
+      "epoch": 2.0902133718285905,
+      "grad_norm": 1.347431111755391,
+      "learning_rate": 2.5493137495461208e-06,
+      "loss": 0.7057,
+      "step": 52850
+    },
+    {
+      "epoch": 2.0906088710474795,
+      "grad_norm": 1.5451260950793235,
+      "learning_rate": 2.5473083588909654e-06,
+      "loss": 0.6547,
+      "step": 52860
+    },
+    {
+      "epoch": 2.0910043702663685,
+      "grad_norm": 1.2349444793984876,
+      "learning_rate": 2.5453034876738804e-06,
+      "loss": 0.6763,
+      "step": 52870
+    },
+    {
+      "epoch": 2.0913998694852576,
+      "grad_norm": 1.2750941373065188,
+      "learning_rate": 2.54329913631946e-06,
+      "loss": 0.6634,
+      "step": 52880
+    },
+    {
+      "epoch": 2.0917953687041466,
+      "grad_norm": 1.3420017160413147,
+      "learning_rate": 2.541295305252196e-06,
+      "loss": 0.6338,
+      "step": 52890
+    },
+    {
+      "epoch": 2.0921908679230357,
+      "grad_norm": 1.3481168399417625,
+      "learning_rate": 2.539291994896461e-06,
+      "loss": 0.6848,
+      "step": 52900
+    },
+    {
+      "epoch": 2.0925863671419247,
+      "grad_norm": 1.3653752338546252,
+      "learning_rate": 2.5372892056765264e-06,
+      "loss": 0.6793,
+      "step": 52910
+    },
+    {
+      "epoch": 2.0929818663608137,
+      "grad_norm": 1.5091123395393253,
+      "learning_rate": 2.5352869380165467e-06,
+      "loss": 0.6321,
+      "step": 52920
+    },
+    {
+      "epoch": 2.0933773655797028,
+      "grad_norm": 1.4791289171450297,
+      "learning_rate": 2.533285192340568e-06,
+      "loss": 0.6981,
+      "step": 52930
+    },
+    {
+      "epoch": 2.093772864798592,
+      "grad_norm": 1.3951629220824813,
+      "learning_rate": 2.531283969072522e-06,
+      "loss": 0.6315,
+      "step": 52940
+    },
+    {
+      "epoch": 2.094168364017481,
+      "grad_norm": 1.556194253506756,
+      "learning_rate": 2.5292832686362403e-06,
+      "loss": 0.6762,
+      "step": 52950
+    },
+    {
+      "epoch": 2.09456386323637,
+      "grad_norm": 1.3563370697161923,
+      "learning_rate": 2.5272830914554302e-06,
+      "loss": 0.6576,
+      "step": 52960
+    },
+    {
+      "epoch": 2.094959362455259,
+      "grad_norm": 1.4635084950990478,
+      "learning_rate": 2.5252834379537004e-06,
+      "loss": 0.6764,
+      "step": 52970
+    },
+    {
+      "epoch": 2.0953548616741484,
+      "grad_norm": 1.2901409397049215,
+      "learning_rate": 2.52328430855454e-06,
+      "loss": 0.668,
+      "step": 52980
+    },
+    {
+      "epoch": 2.095750360893037,
+      "grad_norm": 1.3269814231219497,
+      "learning_rate": 2.521285703681333e-06,
+      "loss": 0.6581,
+      "step": 52990
+    },
+    {
+      "epoch": 2.0961458601119265,
+      "grad_norm": 1.5281349010595011,
+      "learning_rate": 2.5192876237573464e-06,
+      "loss": 0.667,
+      "step": 53000
+    },
+    {
+      "epoch": 2.0965413593308155,
+      "grad_norm": 1.3806584739507546,
+      "learning_rate": 2.5172900692057433e-06,
+      "loss": 0.6538,
+      "step": 53010
+    },
+    {
+      "epoch": 2.0969368585497046,
+      "grad_norm": 1.382791330061861,
+      "learning_rate": 2.5152930404495666e-06,
+      "loss": 0.6716,
+      "step": 53020
+    },
+    {
+      "epoch": 2.0973323577685936,
+      "grad_norm": 1.6452506356859764,
+      "learning_rate": 2.513296537911758e-06,
+      "loss": 0.6574,
+      "step": 53030
+    },
+    {
+      "epoch": 2.0977278569874827,
+      "grad_norm": 1.3803466437735963,
+      "learning_rate": 2.51130056201514e-06,
+      "loss": 0.6697,
+      "step": 53040
+    },
+    {
+      "epoch": 2.0981233562063717,
+      "grad_norm": 1.3989944734573991,
+      "learning_rate": 2.509305113182425e-06,
+      "loss": 0.6586,
+      "step": 53050
+    },
+    {
+      "epoch": 2.0985188554252607,
+      "grad_norm": 1.56645721200081,
+      "learning_rate": 2.5073101918362154e-06,
+      "loss": 0.6738,
+      "step": 53060
+    },
+    {
+      "epoch": 2.0989143546441498,
+      "grad_norm": 1.6086521453677383,
+      "learning_rate": 2.505315798399003e-06,
+      "loss": 0.6112,
+      "step": 53070
+    },
+    {
+      "epoch": 2.099309853863039,
+      "grad_norm": 1.3652055358391482,
+      "learning_rate": 2.5033219332931637e-06,
+      "loss": 0.6753,
+      "step": 53080
+    },
+    {
+      "epoch": 2.099705353081928,
+      "grad_norm": 1.263788163685856,
+      "learning_rate": 2.5013285969409673e-06,
+      "loss": 0.6757,
+      "step": 53090
+    },
+    {
+      "epoch": 2.100100852300817,
+      "grad_norm": 1.5017467112050908,
+      "learning_rate": 2.499335789764566e-06,
+      "loss": 0.7035,
+      "step": 53100
+    },
+    {
+      "epoch": 2.100496351519706,
+      "grad_norm": 1.70164536757861,
+      "learning_rate": 2.497343512186001e-06,
+      "loss": 0.6854,
+      "step": 53110
+    },
+    {
+      "epoch": 2.100891850738595,
+      "grad_norm": 1.5623921866035804,
+      "learning_rate": 2.4953517646272052e-06,
+      "loss": 0.7,
+      "step": 53120
+    },
+    {
+      "epoch": 2.101287349957484,
+      "grad_norm": 1.2149791259308627,
+      "learning_rate": 2.493360547509994e-06,
+      "loss": 0.6983,
+      "step": 53130
+    },
+    {
+      "epoch": 2.101682849176373,
+      "grad_norm": 1.4920892407652067,
+      "learning_rate": 2.4913698612560774e-06,
+      "loss": 0.695,
+      "step": 53140
+    },
+    {
+      "epoch": 2.102078348395262,
+      "grad_norm": 1.6124787133435043,
+      "learning_rate": 2.4893797062870435e-06,
+      "loss": 0.6613,
+      "step": 53150
+    },
+    {
+      "epoch": 2.102473847614151,
+      "grad_norm": 1.4268511241186925,
+      "learning_rate": 2.4873900830243787e-06,
+      "loss": 0.6651,
+      "step": 53160
+    },
+    {
+      "epoch": 2.10286934683304,
+      "grad_norm": 1.3137992140730472,
+      "learning_rate": 2.4854009918894446e-06,
+      "loss": 0.6765,
+      "step": 53170
+    },
+    {
+      "epoch": 2.103264846051929,
+      "grad_norm": 1.8234777466559235,
+      "learning_rate": 2.4834124333035016e-06,
+      "loss": 0.6384,
+      "step": 53180
+    },
+    {
+      "epoch": 2.1036603452708182,
+      "grad_norm": 1.350062295028163,
+      "learning_rate": 2.481424407687688e-06,
+      "loss": 0.671,
+      "step": 53190
+    },
+    {
+      "epoch": 2.1040558444897073,
+      "grad_norm": 1.5857495747917962,
+      "learning_rate": 2.4794369154630387e-06,
+      "loss": 0.6576,
+      "step": 53200
+    },
+    {
+      "epoch": 2.1044513437085963,
+      "grad_norm": 1.540036969933618,
+      "learning_rate": 2.4774499570504663e-06,
+      "loss": 0.6601,
+      "step": 53210
+    },
+    {
+      "epoch": 2.1048468429274854,
+      "grad_norm": 1.3301296719195193,
+      "learning_rate": 2.475463532870777e-06,
+      "loss": 0.6656,
+      "step": 53220
+    },
+    {
+      "epoch": 2.1052423421463744,
+      "grad_norm": 1.7453460723866556,
+      "learning_rate": 2.47347764334466e-06,
+      "loss": 0.6523,
+      "step": 53230
+    },
+    {
+      "epoch": 2.1056378413652634,
+      "grad_norm": 1.4527756973509505,
+      "learning_rate": 2.4714922888926947e-06,
+      "loss": 0.6693,
+      "step": 53240
+    },
+    {
+      "epoch": 2.1060333405841525,
+      "grad_norm": 1.7908665413682205,
+      "learning_rate": 2.4695074699353418e-06,
+      "loss": 0.6645,
+      "step": 53250
+    },
+    {
+      "epoch": 2.1064288398030415,
+      "grad_norm": 1.26526736653631,
+      "learning_rate": 2.4675231868929554e-06,
+      "loss": 0.6653,
+      "step": 53260
+    },
+    {
+      "epoch": 2.1068243390219306,
+      "grad_norm": 1.9698257155815406,
+      "learning_rate": 2.4655394401857694e-06,
+      "loss": 0.636,
+      "step": 53270
+    },
+    {
+      "epoch": 2.1072198382408196,
+      "grad_norm": 1.275203795446927,
+      "learning_rate": 2.463556230233911e-06,
+      "loss": 0.6896,
+      "step": 53280
+    },
+    {
+      "epoch": 2.1076153374597086,
+      "grad_norm": 1.4919697929138263,
+      "learning_rate": 2.4615735574573884e-06,
+      "loss": 0.672,
+      "step": 53290
+    },
+    {
+      "epoch": 2.1080108366785977,
+      "grad_norm": 1.4886663256821036,
+      "learning_rate": 2.4595914222760973e-06,
+      "loss": 0.6697,
+      "step": 53300
+    },
+    {
+      "epoch": 2.1084063358974867,
+      "grad_norm": 1.3169866549925069,
+      "learning_rate": 2.457609825109819e-06,
+      "loss": 0.6838,
+      "step": 53310
+    },
+    {
+      "epoch": 2.1088018351163758,
+      "grad_norm": 1.358044933100365,
+      "learning_rate": 2.4556287663782246e-06,
+      "loss": 0.6726,
+      "step": 53320
+    },
+    {
+      "epoch": 2.109197334335265,
+      "grad_norm": 1.5942567648283803,
+      "learning_rate": 2.453648246500864e-06,
+      "loss": 0.6957,
+      "step": 53330
+    },
+    {
+      "epoch": 2.109592833554154,
+      "grad_norm": 1.6749818744613212,
+      "learning_rate": 2.4516682658971834e-06,
+      "loss": 0.6396,
+      "step": 53340
+    },
+    {
+      "epoch": 2.109988332773043,
+      "grad_norm": 1.2764814017765473,
+      "learning_rate": 2.449688824986505e-06,
+      "loss": 0.6364,
+      "step": 53350
+    },
+    {
+      "epoch": 2.110383831991932,
+      "grad_norm": 1.3994920100399224,
+      "learning_rate": 2.447709924188039e-06,
+      "loss": 0.6344,
+      "step": 53360
+    },
+    {
+      "epoch": 2.110779331210821,
+      "grad_norm": 1.5331450521644923,
+      "learning_rate": 2.4457315639208874e-06,
+      "loss": 0.6571,
+      "step": 53370
+    },
+    {
+      "epoch": 2.11117483042971,
+      "grad_norm": 1.6940648811409018,
+      "learning_rate": 2.443753744604028e-06,
+      "loss": 0.6609,
+      "step": 53380
+    },
+    {
+      "epoch": 2.111570329648599,
+      "grad_norm": 1.4692843636583646,
+      "learning_rate": 2.4417764666563333e-06,
+      "loss": 0.671,
+      "step": 53390
+    },
+    {
+      "epoch": 2.111965828867488,
+      "grad_norm": 1.4539245070302387,
+      "learning_rate": 2.4397997304965527e-06,
+      "loss": 0.6607,
+      "step": 53400
+    },
+    {
+      "epoch": 2.112361328086377,
+      "grad_norm": 1.49416317049423,
+      "learning_rate": 2.437823536543331e-06,
+      "loss": 0.6918,
+      "step": 53410
+    },
+    {
+      "epoch": 2.112756827305266,
+      "grad_norm": 1.7328016158080546,
+      "learning_rate": 2.4358478852151858e-06,
+      "loss": 0.6419,
+      "step": 53420
+    },
+    {
+      "epoch": 2.113152326524155,
+      "grad_norm": 1.7896688136449164,
+      "learning_rate": 2.4338727769305297e-06,
+      "loss": 0.6431,
+      "step": 53430
+    },
+    {
+      "epoch": 2.1135478257430442,
+      "grad_norm": 1.3489596809357487,
+      "learning_rate": 2.431898212107655e-06,
+      "loss": 0.6492,
+      "step": 53440
+    },
+    {
+      "epoch": 2.1139433249619333,
+      "grad_norm": 1.6706352027823188,
+      "learning_rate": 2.4299241911647437e-06,
+      "loss": 0.6458,
+      "step": 53450
+    },
+    {
+      "epoch": 2.1143388241808223,
+      "grad_norm": 1.3189804564082626,
+      "learning_rate": 2.4279507145198555e-06,
+      "loss": 0.6589,
+      "step": 53460
+    },
+    {
+      "epoch": 2.1147343233997113,
+      "grad_norm": 1.3579734464037865,
+      "learning_rate": 2.425977782590943e-06,
+      "loss": 0.6771,
+      "step": 53470
+    },
+    {
+      "epoch": 2.1151298226186004,
+      "grad_norm": 1.4704679190284409,
+      "learning_rate": 2.4240053957958355e-06,
+      "loss": 0.6746,
+      "step": 53480
+    },
+    {
+      "epoch": 2.1155253218374894,
+      "grad_norm": 1.4408933641084356,
+      "learning_rate": 2.422033554552255e-06,
+      "loss": 0.7022,
+      "step": 53490
+    },
+    {
+      "epoch": 2.1159208210563785,
+      "grad_norm": 1.4391374918190838,
+      "learning_rate": 2.4200622592777988e-06,
+      "loss": 0.6666,
+      "step": 53500
+    },
+    {
+      "epoch": 2.1163163202752675,
+      "grad_norm": 1.2962933769133393,
+      "learning_rate": 2.418091510389959e-06,
+      "loss": 0.6553,
+      "step": 53510
+    },
+    {
+      "epoch": 2.1167118194941565,
+      "grad_norm": 1.5751859118775398,
+      "learning_rate": 2.416121308306103e-06,
+      "loss": 0.6571,
+      "step": 53520
+    },
+    {
+      "epoch": 2.1171073187130456,
+      "grad_norm": 1.625446807409782,
+      "learning_rate": 2.414151653443486e-06,
+      "loss": 0.6768,
+      "step": 53530
+    },
+    {
+      "epoch": 2.1175028179319346,
+      "grad_norm": 1.2434371830761635,
+      "learning_rate": 2.412182546219247e-06,
+      "loss": 0.6731,
+      "step": 53540
+    },
+    {
+      "epoch": 2.1178983171508237,
+      "grad_norm": 1.512027827644632,
+      "learning_rate": 2.4102139870504106e-06,
+      "loss": 0.664,
+      "step": 53550
+    },
+    {
+      "epoch": 2.1182938163697127,
+      "grad_norm": 1.2590069779396396,
+      "learning_rate": 2.4082459763538806e-06,
+      "loss": 0.6708,
+      "step": 53560
+    },
+    {
+      "epoch": 2.1186893155886017,
+      "grad_norm": 1.5501126021660727,
+      "learning_rate": 2.4062785145464535e-06,
+      "loss": 0.6787,
+      "step": 53570
+    },
+    {
+      "epoch": 2.119084814807491,
+      "grad_norm": 1.6408829605958855,
+      "learning_rate": 2.4043116020448004e-06,
+      "loss": 0.6948,
+      "step": 53580
+    },
+    {
+      "epoch": 2.11948031402638,
+      "grad_norm": 1.5343757391839363,
+      "learning_rate": 2.4023452392654785e-06,
+      "loss": 0.6245,
+      "step": 53590
+    },
+    {
+      "epoch": 2.119875813245269,
+      "grad_norm": 1.5872563076749362,
+      "learning_rate": 2.4003794266249325e-06,
+      "loss": 0.689,
+      "step": 53600
+    },
+    {
+      "epoch": 2.120271312464158,
+      "grad_norm": 1.357578009161746,
+      "learning_rate": 2.3984141645394854e-06,
+      "loss": 0.6659,
+      "step": 53610
+    },
+    {
+      "epoch": 2.120666811683047,
+      "grad_norm": 1.2616945437501441,
+      "learning_rate": 2.3964494534253496e-06,
+      "loss": 0.6569,
+      "step": 53620
+    },
+    {
+      "epoch": 2.121062310901936,
+      "grad_norm": 1.1731663550018403,
+      "learning_rate": 2.394485293698613e-06,
+      "loss": 0.7181,
+      "step": 53630
+    },
+    {
+      "epoch": 2.121457810120825,
+      "grad_norm": 1.632773518719414,
+      "learning_rate": 2.3925216857752543e-06,
+      "loss": 0.6559,
+      "step": 53640
+    },
+    {
+      "epoch": 2.121853309339714,
+      "grad_norm": 1.52230042364544,
+      "learning_rate": 2.3905586300711314e-06,
+      "loss": 0.6636,
+      "step": 53650
+    },
+    {
+      "epoch": 2.122248808558603,
+      "grad_norm": 1.2901725974737541,
+      "learning_rate": 2.388596127001985e-06,
+      "loss": 0.7103,
+      "step": 53660
+    },
+    {
+      "epoch": 2.122644307777492,
+      "grad_norm": 1.4511785611787091,
+      "learning_rate": 2.386634176983438e-06,
+      "loss": 0.6896,
+      "step": 53670
+    },
+    {
+      "epoch": 2.123039806996381,
+      "grad_norm": 1.607219965731853,
+      "learning_rate": 2.3846727804310014e-06,
+      "loss": 0.6443,
+      "step": 53680
+    },
+    {
+      "epoch": 2.12343530621527,
+      "grad_norm": 1.5111949409734204,
+      "learning_rate": 2.382711937760062e-06,
+      "loss": 0.6716,
+      "step": 53690
+    },
+    {
+      "epoch": 2.1238308054341593,
+      "grad_norm": 1.4319820304323034,
+      "learning_rate": 2.3807516493858955e-06,
+      "loss": 0.6543,
+      "step": 53700
+    },
+    {
+      "epoch": 2.1242263046530483,
+      "grad_norm": 1.7562690778597556,
+      "learning_rate": 2.378791915723655e-06,
+      "loss": 0.6766,
+      "step": 53710
+    },
+    {
+      "epoch": 2.1246218038719373,
+      "grad_norm": 1.3989269646335447,
+      "learning_rate": 2.3768327371883813e-06,
+      "loss": 0.6829,
+      "step": 53720
+    },
+    {
+      "epoch": 2.1250173030908264,
+      "grad_norm": 1.5376655864437776,
+      "learning_rate": 2.3748741141949915e-06,
+      "loss": 0.6457,
+      "step": 53730
+    },
+    {
+      "epoch": 2.1254128023097154,
+      "grad_norm": 1.4184390567852088,
+      "learning_rate": 2.3729160471582923e-06,
+      "loss": 0.6642,
+      "step": 53740
+    },
+    {
+      "epoch": 2.1258083015286044,
+      "grad_norm": 1.4809553417427328,
+      "learning_rate": 2.3709585364929666e-06,
+      "loss": 0.6596,
+      "step": 53750
+    },
+    {
+      "epoch": 2.1262038007474935,
+      "grad_norm": 1.3336490811468675,
+      "learning_rate": 2.3690015826135794e-06,
+      "loss": 0.6686,
+      "step": 53760
+    },
+    {
+      "epoch": 2.1265992999663825,
+      "grad_norm": 1.4565373880158172,
+      "learning_rate": 2.367045185934584e-06,
+      "loss": 0.6744,
+      "step": 53770
+    },
+    {
+      "epoch": 2.1269947991852716,
+      "grad_norm": 1.3286726261138002,
+      "learning_rate": 2.36508934687031e-06,
+      "loss": 0.6837,
+      "step": 53780
+    },
+    {
+      "epoch": 2.1273902984041606,
+      "grad_norm": 1.4325503149989218,
+      "learning_rate": 2.3631340658349688e-06,
+      "loss": 0.6732,
+      "step": 53790
+    },
+    {
+      "epoch": 2.1277857976230496,
+      "grad_norm": 1.4666789408885827,
+      "learning_rate": 2.3611793432426593e-06,
+      "loss": 0.6744,
+      "step": 53800
+    },
+    {
+      "epoch": 2.1281812968419387,
+      "grad_norm": 1.571490968594034,
+      "learning_rate": 2.3592251795073564e-06,
+      "loss": 0.6869,
+      "step": 53810
+    },
+    {
+      "epoch": 2.1285767960608277,
+      "grad_norm": 1.292522513798636,
+      "learning_rate": 2.3572715750429155e-06,
+      "loss": 0.6968,
+      "step": 53820
+    },
+    {
+      "epoch": 2.1289722952797168,
+      "grad_norm": 1.3488799659021744,
+      "learning_rate": 2.3553185302630815e-06,
+      "loss": 0.7034,
+      "step": 53830
+    },
+    {
+      "epoch": 2.129367794498606,
+      "grad_norm": 1.4451203908014099,
+      "learning_rate": 2.3533660455814718e-06,
+      "loss": 0.6738,
+      "step": 53840
+    },
+    {
+      "epoch": 2.129763293717495,
+      "grad_norm": 1.393657861360009,
+      "learning_rate": 2.3514141214115927e-06,
+      "loss": 0.6559,
+      "step": 53850
+    },
+    {
+      "epoch": 2.130158792936384,
+      "grad_norm": 1.5912814754311821,
+      "learning_rate": 2.349462758166825e-06,
+      "loss": 0.6759,
+      "step": 53860
+    },
+    {
+      "epoch": 2.130554292155273,
+      "grad_norm": 1.5485787023789013,
+      "learning_rate": 2.347511956260437e-06,
+      "loss": 0.6736,
+      "step": 53870
+    },
+    {
+      "epoch": 2.130949791374162,
+      "grad_norm": 1.6080344965127011,
+      "learning_rate": 2.3455617161055726e-06,
+      "loss": 0.666,
+      "step": 53880
+    },
+    {
+      "epoch": 2.131345290593051,
+      "grad_norm": 1.531221782702087,
+      "learning_rate": 2.3436120381152614e-06,
+      "loss": 0.6696,
+      "step": 53890
+    },
+    {
+      "epoch": 2.13174078981194,
+      "grad_norm": 1.4445080563124875,
+      "learning_rate": 2.3416629227024117e-06,
+      "loss": 0.6479,
+      "step": 53900
+    },
+    {
+      "epoch": 2.132136289030829,
+      "grad_norm": 1.874177887668524,
+      "learning_rate": 2.3397143702798115e-06,
+      "loss": 0.6498,
+      "step": 53910
+    },
+    {
+      "epoch": 2.132531788249718,
+      "grad_norm": 1.3493915165648362,
+      "learning_rate": 2.337766381260129e-06,
+      "loss": 0.6527,
+      "step": 53920
+    },
+    {
+      "epoch": 2.132927287468607,
+      "grad_norm": 1.2167044156958442,
+      "learning_rate": 2.3358189560559193e-06,
+      "loss": 0.6788,
+      "step": 53930
+    },
+    {
+      "epoch": 2.133322786687496,
+      "grad_norm": 1.4357323115792433,
+      "learning_rate": 2.333872095079609e-06,
+      "loss": 0.691,
+      "step": 53940
+    },
+    {
+      "epoch": 2.1337182859063852,
+      "grad_norm": 1.306926339402921,
+      "learning_rate": 2.331925798743515e-06,
+      "loss": 0.6718,
+      "step": 53950
+    },
+    {
+      "epoch": 2.1341137851252743,
+      "grad_norm": 1.3008135420765468,
+      "learning_rate": 2.3299800674598245e-06,
+      "loss": 0.6743,
+      "step": 53960
+    },
+    {
+      "epoch": 2.1345092843441633,
+      "grad_norm": 1.425605224912783,
+      "learning_rate": 2.3280349016406147e-06,
+      "loss": 0.6526,
+      "step": 53970
+    },
+    {
+      "epoch": 2.1349047835630524,
+      "grad_norm": 1.4379848423230792,
+      "learning_rate": 2.3260903016978335e-06,
+      "loss": 0.6599,
+      "step": 53980
+    },
+    {
+      "epoch": 2.1353002827819414,
+      "grad_norm": 1.521055765933288,
+      "learning_rate": 2.324146268043319e-06,
+      "loss": 0.6873,
+      "step": 53990
+    },
+    {
+      "epoch": 2.1356957820008304,
+      "grad_norm": 1.319733720553865,
+      "learning_rate": 2.3222028010887816e-06,
+      "loss": 0.6593,
+      "step": 54000
+    },
+    {
+      "epoch": 2.1360912812197195,
+      "grad_norm": 1.2999696838911359,
+      "learning_rate": 2.320259901245812e-06,
+      "loss": 0.6943,
+      "step": 54010
+    },
+    {
+      "epoch": 2.1364867804386085,
+      "grad_norm": 1.6051226871964392,
+      "learning_rate": 2.318317568925888e-06,
+      "loss": 0.6562,
+      "step": 54020
+    },
+    {
+      "epoch": 2.1368822796574976,
+      "grad_norm": 1.8619276716937607,
+      "learning_rate": 2.31637580454036e-06,
+      "loss": 0.6891,
+      "step": 54030
+    },
+    {
+      "epoch": 2.1372777788763866,
+      "grad_norm": 1.59277340173497,
+      "learning_rate": 2.3144346085004597e-06,
+      "loss": 0.6563,
+      "step": 54040
+    },
+    {
+      "epoch": 2.1376732780952756,
+      "grad_norm": 1.273798492395337,
+      "learning_rate": 2.3124939812172982e-06,
+      "loss": 0.6849,
+      "step": 54050
+    },
+    {
+      "epoch": 2.1380687773141647,
+      "grad_norm": 1.321678538302541,
+      "learning_rate": 2.31055392310187e-06,
+      "loss": 0.6829,
+      "step": 54060
+    },
+    {
+      "epoch": 2.1384642765330537,
+      "grad_norm": 1.3963753388571825,
+      "learning_rate": 2.308614434565043e-06,
+      "loss": 0.6624,
+      "step": 54070
+    },
+    {
+      "epoch": 2.1388597757519427,
+      "grad_norm": 1.484462425097106,
+      "learning_rate": 2.3066755160175717e-06,
+      "loss": 0.6173,
+      "step": 54080
+    },
+    {
+      "epoch": 2.139255274970832,
+      "grad_norm": 1.6407093637554526,
+      "learning_rate": 2.3047371678700815e-06,
+      "loss": 0.6709,
+      "step": 54090
+    },
+    {
+      "epoch": 2.139650774189721,
+      "grad_norm": 1.509349572039471,
+      "learning_rate": 2.302799390533085e-06,
+      "loss": 0.6649,
+      "step": 54100
+    },
+    {
+      "epoch": 2.14004627340861,
+      "grad_norm": 1.4108010951524979,
+      "learning_rate": 2.300862184416967e-06,
+      "loss": 0.6792,
+      "step": 54110
+    },
+    {
+      "epoch": 2.140441772627499,
+      "grad_norm": 1.2472083765380304,
+      "learning_rate": 2.2989255499319984e-06,
+      "loss": 0.6652,
+      "step": 54120
+    },
+    {
+      "epoch": 2.140837271846388,
+      "grad_norm": 1.4015709150184785,
+      "learning_rate": 2.2969894874883207e-06,
+      "loss": 0.6619,
+      "step": 54130
+    },
+    {
+      "epoch": 2.141232771065277,
+      "grad_norm": 1.5410951199328564,
+      "learning_rate": 2.295053997495965e-06,
+      "loss": 0.6292,
+      "step": 54140
+    },
+    {
+      "epoch": 2.141628270284166,
+      "grad_norm": 1.6680565516903991,
+      "learning_rate": 2.293119080364827e-06,
+      "loss": 0.644,
+      "step": 54150
+    },
+    {
+      "epoch": 2.142023769503055,
+      "grad_norm": 1.3535567041046115,
+      "learning_rate": 2.291184736504695e-06,
+      "loss": 0.6705,
+      "step": 54160
+    },
+    {
+      "epoch": 2.142419268721944,
+      "grad_norm": 1.4945217778941242,
+      "learning_rate": 2.289250966325226e-06,
+      "loss": 0.6774,
+      "step": 54170
+    },
+    {
+      "epoch": 2.142814767940833,
+      "grad_norm": 1.434709533798301,
+      "learning_rate": 2.2873177702359635e-06,
+      "loss": 0.6858,
+      "step": 54180
+    },
+    {
+      "epoch": 2.143210267159722,
+      "grad_norm": 1.7442305799564761,
+      "learning_rate": 2.285385148646321e-06,
+      "loss": 0.638,
+      "step": 54190
+    },
+    {
+      "epoch": 2.143605766378611,
+      "grad_norm": 1.552536491614337,
+      "learning_rate": 2.283453101965598e-06,
+      "loss": 0.659,
+      "step": 54200
+    },
+    {
+      "epoch": 2.1440012655975003,
+      "grad_norm": 1.3283935986138802,
+      "learning_rate": 2.2815216306029663e-06,
+      "loss": 0.6883,
+      "step": 54210
+    },
+    {
+      "epoch": 2.1443967648163893,
+      "grad_norm": 1.509602973253364,
+      "learning_rate": 2.2795907349674817e-06,
+      "loss": 0.6659,
+      "step": 54220
+    },
+    {
+      "epoch": 2.1447922640352783,
+      "grad_norm": 1.5198556365869367,
+      "learning_rate": 2.2776604154680724e-06,
+      "loss": 0.6475,
+      "step": 54230
+    },
+    {
+      "epoch": 2.1451877632541674,
+      "grad_norm": 1.6092550299135604,
+      "learning_rate": 2.275730672513546e-06,
+      "loss": 0.6548,
+      "step": 54240
+    },
+    {
+      "epoch": 2.1455832624730564,
+      "grad_norm": 1.2477498386874621,
+      "learning_rate": 2.2738015065125925e-06,
+      "loss": 0.6733,
+      "step": 54250
+    },
+    {
+      "epoch": 2.1459787616919455,
+      "grad_norm": 1.492006757213174,
+      "learning_rate": 2.2718729178737718e-06,
+      "loss": 0.6244,
+      "step": 54260
+    },
+    {
+      "epoch": 2.1463742609108345,
+      "grad_norm": 1.5758683462952756,
+      "learning_rate": 2.2699449070055307e-06,
+      "loss": 0.6681,
+      "step": 54270
+    },
+    {
+      "epoch": 2.1467697601297235,
+      "grad_norm": 1.138003508520722,
+      "learning_rate": 2.2680174743161858e-06,
+      "loss": 0.6516,
+      "step": 54280
+    },
+    {
+      "epoch": 2.147165259348613,
+      "grad_norm": 1.5319169064967042,
+      "learning_rate": 2.2660906202139356e-06,
+      "loss": 0.6623,
+      "step": 54290
+    },
+    {
+      "epoch": 2.1475607585675016,
+      "grad_norm": 1.3087335506599393,
+      "learning_rate": 2.264164345106852e-06,
+      "loss": 0.6762,
+      "step": 54300
+    },
+    {
+      "epoch": 2.147956257786391,
+      "grad_norm": 1.3820860043636023,
+      "learning_rate": 2.2622386494028913e-06,
+      "loss": 0.6797,
+      "step": 54310
+    },
+    {
+      "epoch": 2.1483517570052797,
+      "grad_norm": 1.552956169200907,
+      "learning_rate": 2.2603135335098787e-06,
+      "loss": 0.6786,
+      "step": 54320
+    },
+    {
+      "epoch": 2.148747256224169,
+      "grad_norm": 1.331683367124008,
+      "learning_rate": 2.2583889978355252e-06,
+      "loss": 0.6497,
+      "step": 54330
+    },
+    {
+      "epoch": 2.1491427554430578,
+      "grad_norm": 1.320550715173064,
+      "learning_rate": 2.25646504278741e-06,
+      "loss": 0.6637,
+      "step": 54340
+    },
+    {
+      "epoch": 2.1495382546619473,
+      "grad_norm": 1.4287266092193445,
+      "learning_rate": 2.2545416687729977e-06,
+      "loss": 0.6645,
+      "step": 54350
+    },
+    {
+      "epoch": 2.149933753880836,
+      "grad_norm": 1.3896180673888243,
+      "learning_rate": 2.252618876199622e-06,
+      "loss": 0.6509,
+      "step": 54360
+    },
+    {
+      "epoch": 2.1503292530997253,
+      "grad_norm": 1.6553030079074602,
+      "learning_rate": 2.2506966654745023e-06,
+      "loss": 0.666,
+      "step": 54370
+    },
+    {
+      "epoch": 2.1507247523186144,
+      "grad_norm": 1.3238847842884285,
+      "learning_rate": 2.248775037004725e-06,
+      "loss": 0.6993,
+      "step": 54380
+    },
+    {
+      "epoch": 2.1511202515375034,
+      "grad_norm": 1.4404909444757465,
+      "learning_rate": 2.2468539911972643e-06,
+      "loss": 0.6371,
+      "step": 54390
+    },
+    {
+      "epoch": 2.1515157507563925,
+      "grad_norm": 1.609388912757096,
+      "learning_rate": 2.2449335284589567e-06,
+      "loss": 0.6639,
+      "step": 54400
+    },
+    {
+      "epoch": 2.1519112499752815,
+      "grad_norm": 1.8396210372809116,
+      "learning_rate": 2.2430136491965297e-06,
+      "loss": 0.6707,
+      "step": 54410
+    },
+    {
+      "epoch": 2.1523067491941705,
+      "grad_norm": 1.591304405457969,
+      "learning_rate": 2.241094353816577e-06,
+      "loss": 0.6494,
+      "step": 54420
+    },
+    {
+      "epoch": 2.1527022484130596,
+      "grad_norm": 1.658368004839637,
+      "learning_rate": 2.2391756427255757e-06,
+      "loss": 0.6717,
+      "step": 54430
+    },
+    {
+      "epoch": 2.1530977476319486,
+      "grad_norm": 1.54570432897911,
+      "learning_rate": 2.237257516329872e-06,
+      "loss": 0.6738,
+      "step": 54440
+    },
+    {
+      "epoch": 2.1534932468508377,
+      "grad_norm": 1.3149618666280893,
+      "learning_rate": 2.235339975035697e-06,
+      "loss": 0.685,
+      "step": 54450
+    },
+    {
+      "epoch": 2.1538887460697267,
+      "grad_norm": 1.466269762252572,
+      "learning_rate": 2.2334230192491503e-06,
+      "loss": 0.6774,
+      "step": 54460
+    },
+    {
+      "epoch": 2.1542842452886157,
+      "grad_norm": 1.3511419063448677,
+      "learning_rate": 2.231506649376209e-06,
+      "loss": 0.6753,
+      "step": 54470
+    },
+    {
+      "epoch": 2.1546797445075048,
+      "grad_norm": 1.456886402546452,
+      "learning_rate": 2.2295908658227307e-06,
+      "loss": 0.6681,
+      "step": 54480
+    },
+    {
+      "epoch": 2.155075243726394,
+      "grad_norm": 1.434479465961671,
+      "learning_rate": 2.2276756689944418e-06,
+      "loss": 0.6642,
+      "step": 54490
+    },
+    {
+      "epoch": 2.155470742945283,
+      "grad_norm": 1.3994210665916298,
+      "learning_rate": 2.2257610592969518e-06,
+      "loss": 0.6798,
+      "step": 54500
+    },
+    {
+      "epoch": 2.155866242164172,
+      "grad_norm": 1.5036375280952452,
+      "learning_rate": 2.2238470371357413e-06,
+      "loss": 0.6636,
+      "step": 54510
+    },
+    {
+      "epoch": 2.156261741383061,
+      "grad_norm": 1.107226191294125,
+      "learning_rate": 2.2219336029161665e-06,
+      "loss": 0.6724,
+      "step": 54520
+    },
+    {
+      "epoch": 2.15665724060195,
+      "grad_norm": 1.5462140734909817,
+      "learning_rate": 2.2200207570434584e-06,
+      "loss": 0.6495,
+      "step": 54530
+    },
+    {
+      "epoch": 2.157052739820839,
+      "grad_norm": 1.6795116088316382,
+      "learning_rate": 2.218108499922729e-06,
+      "loss": 0.673,
+      "step": 54540
+    },
+    {
+      "epoch": 2.157448239039728,
+      "grad_norm": 1.4596072097923238,
+      "learning_rate": 2.216196831958957e-06,
+      "loss": 0.6758,
+      "step": 54550
+    },
+    {
+      "epoch": 2.157843738258617,
+      "grad_norm": 1.5878795690377225,
+      "learning_rate": 2.2142857535570055e-06,
+      "loss": 0.6619,
+      "step": 54560
+    },
+    {
+      "epoch": 2.158239237477506,
+      "grad_norm": 1.3146101433583148,
+      "learning_rate": 2.212375265121604e-06,
+      "loss": 0.6653,
+      "step": 54570
+    },
+    {
+      "epoch": 2.158634736696395,
+      "grad_norm": 1.4576376459562261,
+      "learning_rate": 2.210465367057365e-06,
+      "loss": 0.6385,
+      "step": 54580
+    },
+    {
+      "epoch": 2.159030235915284,
+      "grad_norm": 1.6223548733116042,
+      "learning_rate": 2.2085560597687687e-06,
+      "loss": 0.658,
+      "step": 54590
+    },
+    {
+      "epoch": 2.1594257351341732,
+      "grad_norm": 1.5426116085171147,
+      "learning_rate": 2.2066473436601776e-06,
+      "loss": 0.6571,
+      "step": 54600
+    },
+    {
+      "epoch": 2.1598212343530623,
+      "grad_norm": 1.4974485469410952,
+      "learning_rate": 2.2047392191358208e-06,
+      "loss": 0.644,
+      "step": 54610
+    },
+    {
+      "epoch": 2.1602167335719513,
+      "grad_norm": 1.195370393755823,
+      "learning_rate": 2.20283168659981e-06,
+      "loss": 0.6801,
+      "step": 54620
+    },
+    {
+      "epoch": 2.1606122327908404,
+      "grad_norm": 1.3148991024053085,
+      "learning_rate": 2.2009247464561266e-06,
+      "loss": 0.6597,
+      "step": 54630
+    },
+    {
+      "epoch": 2.1610077320097294,
+      "grad_norm": 1.3794626140536659,
+      "learning_rate": 2.1990183991086277e-06,
+      "loss": 0.6622,
+      "step": 54640
+    },
+    {
+      "epoch": 2.1614032312286184,
+      "grad_norm": 1.7012542168239617,
+      "learning_rate": 2.197112644961043e-06,
+      "loss": 0.6482,
+      "step": 54650
+    },
+    {
+      "epoch": 2.1617987304475075,
+      "grad_norm": 1.4982081528093467,
+      "learning_rate": 2.1952074844169823e-06,
+      "loss": 0.6591,
+      "step": 54660
+    },
+    {
+      "epoch": 2.1621942296663965,
+      "grad_norm": 1.5420257954861227,
+      "learning_rate": 2.1933029178799225e-06,
+      "loss": 0.6496,
+      "step": 54670
+    },
+    {
+      "epoch": 2.1625897288852856,
+      "grad_norm": 1.1460707043086187,
+      "learning_rate": 2.1913989457532213e-06,
+      "loss": 0.6702,
+      "step": 54680
+    },
+    {
+      "epoch": 2.1629852281041746,
+      "grad_norm": 1.6270352799197734,
+      "learning_rate": 2.1894955684401064e-06,
+      "loss": 0.6909,
+      "step": 54690
+    },
+    {
+      "epoch": 2.1633807273230636,
+      "grad_norm": 1.3069684359085167,
+      "learning_rate": 2.1875927863436776e-06,
+      "loss": 0.6557,
+      "step": 54700
+    },
+    {
+      "epoch": 2.1637762265419527,
+      "grad_norm": 1.2356279574277762,
+      "learning_rate": 2.1856905998669166e-06,
+      "loss": 0.6756,
+      "step": 54710
+    },
+    {
+      "epoch": 2.1641717257608417,
+      "grad_norm": 1.4164068573765753,
+      "learning_rate": 2.1837890094126685e-06,
+      "loss": 0.6541,
+      "step": 54720
+    },
+    {
+      "epoch": 2.1645672249797308,
+      "grad_norm": 1.491296864607764,
+      "learning_rate": 2.1818880153836624e-06,
+      "loss": 0.6276,
+      "step": 54730
+    },
+    {
+      "epoch": 2.16496272419862,
+      "grad_norm": 1.3115475109060677,
+      "learning_rate": 2.179987618182493e-06,
+      "loss": 0.6836,
+      "step": 54740
+    },
+    {
+      "epoch": 2.165358223417509,
+      "grad_norm": 1.5795478878929916,
+      "learning_rate": 2.1780878182116346e-06,
+      "loss": 0.6889,
+      "step": 54750
+    },
+    {
+      "epoch": 2.165753722636398,
+      "grad_norm": 1.627230469850056,
+      "learning_rate": 2.17618861587343e-06,
+      "loss": 0.6778,
+      "step": 54760
+    },
+    {
+      "epoch": 2.166149221855287,
+      "grad_norm": 1.844227277785479,
+      "learning_rate": 2.1742900115700993e-06,
+      "loss": 0.6392,
+      "step": 54770
+    },
+    {
+      "epoch": 2.166544721074176,
+      "grad_norm": 1.2597116520771612,
+      "learning_rate": 2.1723920057037318e-06,
+      "loss": 0.684,
+      "step": 54780
+    },
+    {
+      "epoch": 2.166940220293065,
+      "grad_norm": 1.6131081013569701,
+      "learning_rate": 2.170494598676296e-06,
+      "loss": 0.6809,
+      "step": 54790
+    },
+    {
+      "epoch": 2.167335719511954,
+      "grad_norm": 1.4791812610230533,
+      "learning_rate": 2.1685977908896265e-06,
+      "loss": 0.6749,
+      "step": 54800
+    },
+    {
+      "epoch": 2.167731218730843,
+      "grad_norm": 1.2586990219671308,
+      "learning_rate": 2.166701582745439e-06,
+      "loss": 0.6557,
+      "step": 54810
+    },
+    {
+      "epoch": 2.168126717949732,
+      "grad_norm": 1.2427136825322551,
+      "learning_rate": 2.1648059746453136e-06,
+      "loss": 0.6685,
+      "step": 54820
+    },
+    {
+      "epoch": 2.168522217168621,
+      "grad_norm": 1.402752931165652,
+      "learning_rate": 2.1629109669907116e-06,
+      "loss": 0.6413,
+      "step": 54830
+    },
+    {
+      "epoch": 2.16891771638751,
+      "grad_norm": 1.3592600023884258,
+      "learning_rate": 2.161016560182959e-06,
+      "loss": 0.65,
+      "step": 54840
+    },
+    {
+      "epoch": 2.1693132156063992,
+      "grad_norm": 1.6332465760251773,
+      "learning_rate": 2.1591227546232633e-06,
+      "loss": 0.662,
+      "step": 54850
+    },
+    {
+      "epoch": 2.1697087148252883,
+      "grad_norm": 1.3031249624795824,
+      "learning_rate": 2.157229550712696e-06,
+      "loss": 0.6961,
+      "step": 54860
+    },
+    {
+      "epoch": 2.1701042140441773,
+      "grad_norm": 1.560155811013162,
+      "learning_rate": 2.1553369488522084e-06,
+      "loss": 0.6585,
+      "step": 54870
+    },
+    {
+      "epoch": 2.1704997132630663,
+      "grad_norm": 1.3158720730677125,
+      "learning_rate": 2.1534449494426203e-06,
+      "loss": 0.6866,
+      "step": 54880
+    },
+    {
+      "epoch": 2.1708952124819554,
+      "grad_norm": 1.3360160633558826,
+      "learning_rate": 2.1515535528846238e-06,
+      "loss": 0.6659,
+      "step": 54890
+    },
+    {
+      "epoch": 2.1712907117008444,
+      "grad_norm": 1.803842540897346,
+      "learning_rate": 2.1496627595787827e-06,
+      "loss": 0.6188,
+      "step": 54900
+    },
+    {
+      "epoch": 2.1716862109197335,
+      "grad_norm": 1.48380766661754,
+      "learning_rate": 2.1477725699255384e-06,
+      "loss": 0.65,
+      "step": 54910
+    },
+    {
+      "epoch": 2.1720817101386225,
+      "grad_norm": 1.6226487076725338,
+      "learning_rate": 2.1458829843251973e-06,
+      "loss": 0.6762,
+      "step": 54920
+    },
+    {
+      "epoch": 2.1724772093575115,
+      "grad_norm": 1.3850841981657511,
+      "learning_rate": 2.1439940031779443e-06,
+      "loss": 0.6611,
+      "step": 54930
+    },
+    {
+      "epoch": 2.1728727085764006,
+      "grad_norm": 1.5037113775171946,
+      "learning_rate": 2.1421056268838324e-06,
+      "loss": 0.6585,
+      "step": 54940
+    },
+    {
+      "epoch": 2.1732682077952896,
+      "grad_norm": 1.2312445835536803,
+      "learning_rate": 2.1402178558427846e-06,
+      "loss": 0.6567,
+      "step": 54950
+    },
+    {
+      "epoch": 2.1736637070141787,
+      "grad_norm": 1.6411216422318087,
+      "learning_rate": 2.1383306904546027e-06,
+      "loss": 0.6845,
+      "step": 54960
+    },
+    {
+      "epoch": 2.1740592062330677,
+      "grad_norm": 1.6024632241331704,
+      "learning_rate": 2.1364441311189515e-06,
+      "loss": 0.6691,
+      "step": 54970
+    },
+    {
+      "epoch": 2.1744547054519567,
+      "grad_norm": 1.3754437725374022,
+      "learning_rate": 2.1345581782353765e-06,
+      "loss": 0.6763,
+      "step": 54980
+    },
+    {
+      "epoch": 2.1748502046708458,
+      "grad_norm": 1.5567165298381425,
+      "learning_rate": 2.1326728322032863e-06,
+      "loss": 0.6578,
+      "step": 54990
+    },
+    {
+      "epoch": 2.175245703889735,
+      "grad_norm": 1.2703706760644615,
+      "learning_rate": 2.13078809342197e-06,
+      "loss": 0.691,
+      "step": 55000
+    },
+    {
+      "epoch": 2.175641203108624,
+      "grad_norm": 1.6043818402896892,
+      "learning_rate": 2.128903962290576e-06,
+      "loss": 0.6799,
+      "step": 55010
+    },
+    {
+      "epoch": 2.176036702327513,
+      "grad_norm": 1.4042953948996555,
+      "learning_rate": 2.1270204392081366e-06,
+      "loss": 0.6664,
+      "step": 55020
+    },
+    {
+      "epoch": 2.176432201546402,
+      "grad_norm": 1.3462383248655843,
+      "learning_rate": 2.1251375245735463e-06,
+      "loss": 0.6546,
+      "step": 55030
+    },
+    {
+      "epoch": 2.176827700765291,
+      "grad_norm": 1.8395580276812962,
+      "learning_rate": 2.123255218785577e-06,
+      "loss": 0.6419,
+      "step": 55040
+    },
+    {
+      "epoch": 2.17722319998418,
+      "grad_norm": 1.4291423292389434,
+      "learning_rate": 2.121373522242866e-06,
+      "loss": 0.6637,
+      "step": 55050
+    },
+    {
+      "epoch": 2.177618699203069,
+      "grad_norm": 1.2932672609945908,
+      "learning_rate": 2.119492435343927e-06,
+      "loss": 0.6965,
+      "step": 55060
+    },
+    {
+      "epoch": 2.178014198421958,
+      "grad_norm": 1.4295757892204086,
+      "learning_rate": 2.1176119584871395e-06,
+      "loss": 0.6599,
+      "step": 55070
+    },
+    {
+      "epoch": 2.178409697640847,
+      "grad_norm": 1.3950743531240286,
+      "learning_rate": 2.1157320920707593e-06,
+      "loss": 0.6386,
+      "step": 55080
+    },
+    {
+      "epoch": 2.178805196859736,
+      "grad_norm": 1.4170559422726166,
+      "learning_rate": 2.113852836492906e-06,
+      "loss": 0.6506,
+      "step": 55090
+    },
+    {
+      "epoch": 2.179200696078625,
+      "grad_norm": 1.2460317146698219,
+      "learning_rate": 2.111974192151578e-06,
+      "loss": 0.6644,
+      "step": 55100
+    },
+    {
+      "epoch": 2.1795961952975142,
+      "grad_norm": 1.5005435649255958,
+      "learning_rate": 2.1100961594446377e-06,
+      "loss": 0.6739,
+      "step": 55110
+    },
+    {
+      "epoch": 2.1799916945164033,
+      "grad_norm": 1.337576581434563,
+      "learning_rate": 2.1082187387698212e-06,
+      "loss": 0.6743,
+      "step": 55120
+    },
+    {
+      "epoch": 2.1803871937352923,
+      "grad_norm": 1.542551486678024,
+      "learning_rate": 2.106341930524731e-06,
+      "loss": 0.6708,
+      "step": 55130
+    },
+    {
+      "epoch": 2.1807826929541814,
+      "grad_norm": 1.3268083381965459,
+      "learning_rate": 2.1044657351068467e-06,
+      "loss": 0.6705,
+      "step": 55140
+    },
+    {
+      "epoch": 2.1811781921730704,
+      "grad_norm": 1.4727253360596646,
+      "learning_rate": 2.1025901529135113e-06,
+      "loss": 0.6815,
+      "step": 55150
+    },
+    {
+      "epoch": 2.1815736913919594,
+      "grad_norm": 1.6765978470726397,
+      "learning_rate": 2.1007151843419443e-06,
+      "loss": 0.6251,
+      "step": 55160
+    },
+    {
+      "epoch": 2.1819691906108485,
+      "grad_norm": 1.525952958619666,
+      "learning_rate": 2.09884082978923e-06,
+      "loss": 0.6466,
+      "step": 55170
+    },
+    {
+      "epoch": 2.1823646898297375,
+      "grad_norm": 1.4821454817183586,
+      "learning_rate": 2.0969670896523233e-06,
+      "loss": 0.6569,
+      "step": 55180
+    },
+    {
+      "epoch": 2.1827601890486266,
+      "grad_norm": 1.473021833103436,
+      "learning_rate": 2.095093964328053e-06,
+      "loss": 0.6665,
+      "step": 55190
+    },
+    {
+      "epoch": 2.1831556882675156,
+      "grad_norm": 1.5388775406837523,
+      "learning_rate": 2.0932214542131125e-06,
+      "loss": 0.6641,
+      "step": 55200
+    },
+    {
+      "epoch": 2.1835511874864046,
+      "grad_norm": 1.4147031653262039,
+      "learning_rate": 2.09134955970407e-06,
+      "loss": 0.6556,
+      "step": 55210
+    },
+    {
+      "epoch": 2.1839466867052937,
+      "grad_norm": 1.4589106048394442,
+      "learning_rate": 2.0894782811973574e-06,
+      "loss": 0.6588,
+      "step": 55220
+    },
+    {
+      "epoch": 2.1843421859241827,
+      "grad_norm": 1.4575392943643484,
+      "learning_rate": 2.087607619089283e-06,
+      "loss": 0.6524,
+      "step": 55230
+    },
+    {
+      "epoch": 2.1847376851430718,
+      "grad_norm": 1.4381784157098458,
+      "learning_rate": 2.0857375737760194e-06,
+      "loss": 0.6589,
+      "step": 55240
+    },
+    {
+      "epoch": 2.185133184361961,
+      "grad_norm": 1.815968991448052,
+      "learning_rate": 2.0838681456536096e-06,
+      "loss": 0.6614,
+      "step": 55250
+    },
+    {
+      "epoch": 2.18552868358085,
+      "grad_norm": 1.4321266325091235,
+      "learning_rate": 2.081999335117965e-06,
+      "loss": 0.6887,
+      "step": 55260
+    },
+    {
+      "epoch": 2.185924182799739,
+      "grad_norm": 1.5833569557253544,
+      "learning_rate": 2.0801311425648718e-06,
+      "loss": 0.6591,
+      "step": 55270
+    },
+    {
+      "epoch": 2.186319682018628,
+      "grad_norm": 1.349337713206238,
+      "learning_rate": 2.078263568389977e-06,
+      "loss": 0.6982,
+      "step": 55280
+    },
+    {
+      "epoch": 2.186715181237517,
+      "grad_norm": 1.28846024488585,
+      "learning_rate": 2.076396612988804e-06,
+      "loss": 0.6721,
+      "step": 55290
+    },
+    {
+      "epoch": 2.187110680456406,
+      "grad_norm": 1.7136283349944543,
+      "learning_rate": 2.074530276756739e-06,
+      "loss": 0.6627,
+      "step": 55300
+    },
+    {
+      "epoch": 2.187506179675295,
+      "grad_norm": 1.3837230247038177,
+      "learning_rate": 2.0726645600890438e-06,
+      "loss": 0.6371,
+      "step": 55310
+    },
+    {
+      "epoch": 2.187901678894184,
+      "grad_norm": 1.2763360790287788,
+      "learning_rate": 2.070799463380841e-06,
+      "loss": 0.6734,
+      "step": 55320
+    },
+    {
+      "epoch": 2.188297178113073,
+      "grad_norm": 1.1839243288028374,
+      "learning_rate": 2.0689349870271302e-06,
+      "loss": 0.6763,
+      "step": 55330
+    },
+    {
+      "epoch": 2.188692677331962,
+      "grad_norm": 1.3844961064365453,
+      "learning_rate": 2.0670711314227737e-06,
+      "loss": 0.6737,
+      "step": 55340
+    },
+    {
+      "epoch": 2.189088176550851,
+      "grad_norm": 1.3543975863261621,
+      "learning_rate": 2.065207896962502e-06,
+      "loss": 0.6876,
+      "step": 55350
+    },
+    {
+      "epoch": 2.1894836757697402,
+      "grad_norm": 1.6200709306981103,
+      "learning_rate": 2.0633452840409203e-06,
+      "loss": 0.6677,
+      "step": 55360
+    },
+    {
+      "epoch": 2.1898791749886293,
+      "grad_norm": 1.3088681856730384,
+      "learning_rate": 2.0614832930524956e-06,
+      "loss": 0.6628,
+      "step": 55370
+    },
+    {
+      "epoch": 2.1902746742075183,
+      "grad_norm": 1.585168510119543,
+      "learning_rate": 2.0596219243915648e-06,
+      "loss": 0.6618,
+      "step": 55380
+    },
+    {
+      "epoch": 2.1906701734264074,
+      "grad_norm": 1.415108320183506,
+      "learning_rate": 2.0577611784523376e-06,
+      "loss": 0.677,
+      "step": 55390
+    },
+    {
+      "epoch": 2.1910656726452964,
+      "grad_norm": 1.378881458503356,
+      "learning_rate": 2.0559010556288853e-06,
+      "loss": 0.6895,
+      "step": 55400
+    },
+    {
+      "epoch": 2.1914611718641854,
+      "grad_norm": 1.7175462402128503,
+      "learning_rate": 2.054041556315149e-06,
+      "loss": 0.6558,
+      "step": 55410
+    },
+    {
+      "epoch": 2.1918566710830745,
+      "grad_norm": 1.434742667447602,
+      "learning_rate": 2.0521826809049415e-06,
+      "loss": 0.6672,
+      "step": 55420
+    },
+    {
+      "epoch": 2.1922521703019635,
+      "grad_norm": 1.3923857888628648,
+      "learning_rate": 2.050324429791938e-06,
+      "loss": 0.6781,
+      "step": 55430
+    },
+    {
+      "epoch": 2.1926476695208525,
+      "grad_norm": 1.4864855189505777,
+      "learning_rate": 2.0484668033696887e-06,
+      "loss": 0.6445,
+      "step": 55440
+    },
+    {
+      "epoch": 2.1930431687397416,
+      "grad_norm": 1.514001876148184,
+      "learning_rate": 2.0466098020316017e-06,
+      "loss": 0.7041,
+      "step": 55450
+    },
+    {
+      "epoch": 2.1934386679586306,
+      "grad_norm": 1.4393693179062899,
+      "learning_rate": 2.0447534261709623e-06,
+      "loss": 0.6483,
+      "step": 55460
+    },
+    {
+      "epoch": 2.1938341671775197,
+      "grad_norm": 1.6471815359178719,
+      "learning_rate": 2.0428976761809156e-06,
+      "loss": 0.6333,
+      "step": 55470
+    },
+    {
+      "epoch": 2.1942296663964087,
+      "grad_norm": 1.50965200079764,
+      "learning_rate": 2.0410425524544813e-06,
+      "loss": 0.6456,
+      "step": 55480
+    },
+    {
+      "epoch": 2.1946251656152977,
+      "grad_norm": 1.4271593861899239,
+      "learning_rate": 2.0391880553845405e-06,
+      "loss": 0.6774,
+      "step": 55490
+    },
+    {
+      "epoch": 2.195020664834187,
+      "grad_norm": 1.5024065913579605,
+      "learning_rate": 2.037334185363845e-06,
+      "loss": 0.6267,
+      "step": 55500
+    },
+    {
+      "epoch": 2.195416164053076,
+      "grad_norm": 1.251720669120062,
+      "learning_rate": 2.03548094278501e-06,
+      "loss": 0.6988,
+      "step": 55510
+    },
+    {
+      "epoch": 2.195811663271965,
+      "grad_norm": 1.4577803130311289,
+      "learning_rate": 2.033628328040525e-06,
+      "loss": 0.6707,
+      "step": 55520
+    },
+    {
+      "epoch": 2.196207162490854,
+      "grad_norm": 1.733576936940708,
+      "learning_rate": 2.031776341522737e-06,
+      "loss": 0.6667,
+      "step": 55530
+    },
+    {
+      "epoch": 2.196602661709743,
+      "grad_norm": 1.5985478513808171,
+      "learning_rate": 2.0299249836238698e-06,
+      "loss": 0.6611,
+      "step": 55540
+    },
+    {
+      "epoch": 2.196998160928632,
+      "grad_norm": 1.373277732981005,
+      "learning_rate": 2.028074254736006e-06,
+      "loss": 0.6536,
+      "step": 55550
+    },
+    {
+      "epoch": 2.197393660147521,
+      "grad_norm": 1.4428927938146197,
+      "learning_rate": 2.026224155251101e-06,
+      "loss": 0.6344,
+      "step": 55560
+    },
+    {
+      "epoch": 2.19778915936641,
+      "grad_norm": 1.7125403072980094,
+      "learning_rate": 2.0243746855609705e-06,
+      "loss": 0.6641,
+      "step": 55570
+    },
+    {
+      "epoch": 2.198184658585299,
+      "grad_norm": 1.4616725632791163,
+      "learning_rate": 2.0225258460573044e-06,
+      "loss": 0.6744,
+      "step": 55580
+    },
+    {
+      "epoch": 2.198580157804188,
+      "grad_norm": 1.56280968591115,
+      "learning_rate": 2.020677637131653e-06,
+      "loss": 0.6586,
+      "step": 55590
+    },
+    {
+      "epoch": 2.198975657023077,
+      "grad_norm": 1.3897154816337431,
+      "learning_rate": 2.0188300591754353e-06,
+      "loss": 0.7077,
+      "step": 55600
+    },
+    {
+      "epoch": 2.199371156241966,
+      "grad_norm": 1.671967927327787,
+      "learning_rate": 2.0169831125799377e-06,
+      "loss": 0.6727,
+      "step": 55610
+    },
+    {
+      "epoch": 2.1997666554608553,
+      "grad_norm": 1.6562121360254696,
+      "learning_rate": 2.0151367977363117e-06,
+      "loss": 0.6716,
+      "step": 55620
+    },
+    {
+      "epoch": 2.2001621546797443,
+      "grad_norm": 1.2709020694628932,
+      "learning_rate": 2.0132911150355744e-06,
+      "loss": 0.6983,
+      "step": 55630
+    },
+    {
+      "epoch": 2.200557653898634,
+      "grad_norm": 1.4108853072245306,
+      "learning_rate": 2.0114460648686083e-06,
+      "loss": 0.6387,
+      "step": 55640
+    },
+    {
+      "epoch": 2.2009531531175224,
+      "grad_norm": 1.3144000303238608,
+      "learning_rate": 2.0096016476261678e-06,
+      "loss": 0.6377,
+      "step": 55650
+    },
+    {
+      "epoch": 2.201348652336412,
+      "grad_norm": 1.4126155160785583,
+      "learning_rate": 2.007757863698864e-06,
+      "loss": 0.6299,
+      "step": 55660
+    },
+    {
+      "epoch": 2.2017441515553005,
+      "grad_norm": 1.5177191937709549,
+      "learning_rate": 2.0059147134771824e-06,
+      "loss": 0.6471,
+      "step": 55670
+    },
+    {
+      "epoch": 2.20213965077419,
+      "grad_norm": 1.8833569262949779,
+      "learning_rate": 2.0040721973514677e-06,
+      "loss": 0.66,
+      "step": 55680
+    },
+    {
+      "epoch": 2.2025351499930785,
+      "grad_norm": 1.7941984966864863,
+      "learning_rate": 2.0022303157119367e-06,
+      "loss": 0.6219,
+      "step": 55690
+    },
+    {
+      "epoch": 2.202930649211968,
+      "grad_norm": 1.466832427685248,
+      "learning_rate": 2.0003890689486643e-06,
+      "loss": 0.6386,
+      "step": 55700
+    },
+    {
+      "epoch": 2.203326148430857,
+      "grad_norm": 1.4680888869112423,
+      "learning_rate": 1.9985484574515993e-06,
+      "loss": 0.6358,
+      "step": 55710
+    },
+    {
+      "epoch": 2.203721647649746,
+      "grad_norm": 1.826877541323233,
+      "learning_rate": 1.996708481610548e-06,
+      "loss": 0.6808,
+      "step": 55720
+    },
+    {
+      "epoch": 2.204117146868635,
+      "grad_norm": 1.2128174528183193,
+      "learning_rate": 1.9948691418151904e-06,
+      "loss": 0.6708,
+      "step": 55730
+    },
+    {
+      "epoch": 2.204512646087524,
+      "grad_norm": 1.7102703882369514,
+      "learning_rate": 1.9930304384550607e-06,
+      "loss": 0.6459,
+      "step": 55740
+    },
+    {
+      "epoch": 2.204908145306413,
+      "grad_norm": 1.3154355290699002,
+      "learning_rate": 1.9911923719195704e-06,
+      "loss": 0.6455,
+      "step": 55750
+    },
+    {
+      "epoch": 2.2053036445253023,
+      "grad_norm": 1.7461869371676682,
+      "learning_rate": 1.989354942597986e-06,
+      "loss": 0.6769,
+      "step": 55760
+    },
+    {
+      "epoch": 2.2056991437441913,
+      "grad_norm": 1.6766754589499235,
+      "learning_rate": 1.9875181508794476e-06,
+      "loss": 0.647,
+      "step": 55770
+    },
+    {
+      "epoch": 2.2060946429630803,
+      "grad_norm": 1.3733294435849956,
+      "learning_rate": 1.9856819971529527e-06,
+      "loss": 0.6793,
+      "step": 55780
+    },
+    {
+      "epoch": 2.2064901421819694,
+      "grad_norm": 1.350790783662417,
+      "learning_rate": 1.983846481807371e-06,
+      "loss": 0.679,
+      "step": 55790
+    },
+    {
+      "epoch": 2.2068856414008584,
+      "grad_norm": 1.511280442973517,
+      "learning_rate": 1.982011605231429e-06,
+      "loss": 0.6698,
+      "step": 55800
+    },
+    {
+      "epoch": 2.2072811406197475,
+      "grad_norm": 1.6302616143014053,
+      "learning_rate": 1.9801773678137266e-06,
+      "loss": 0.6756,
+      "step": 55810
+    },
+    {
+      "epoch": 2.2076766398386365,
+      "grad_norm": 1.6669755135186197,
+      "learning_rate": 1.978343769942721e-06,
+      "loss": 0.6687,
+      "step": 55820
+    },
+    {
+      "epoch": 2.2080721390575255,
+      "grad_norm": 1.419657251798545,
+      "learning_rate": 1.9765108120067355e-06,
+      "loss": 0.6697,
+      "step": 55830
+    },
+    {
+      "epoch": 2.2084676382764146,
+      "grad_norm": 1.9387774666590267,
+      "learning_rate": 1.9746784943939627e-06,
+      "loss": 0.6129,
+      "step": 55840
+    },
+    {
+      "epoch": 2.2088631374953036,
+      "grad_norm": 1.4085316120697977,
+      "learning_rate": 1.972846817492453e-06,
+      "loss": 0.6548,
+      "step": 55850
+    },
+    {
+      "epoch": 2.2092586367141926,
+      "grad_norm": 1.3703629194926459,
+      "learning_rate": 1.9710157816901275e-06,
+      "loss": 0.6797,
+      "step": 55860
+    },
+    {
+      "epoch": 2.2096541359330817,
+      "grad_norm": 1.6305671899887957,
+      "learning_rate": 1.969185387374766e-06,
+      "loss": 0.665,
+      "step": 55870
+    },
+    {
+      "epoch": 2.2100496351519707,
+      "grad_norm": 1.4197981325960134,
+      "learning_rate": 1.967355634934015e-06,
+      "loss": 0.6645,
+      "step": 55880
+    },
+    {
+      "epoch": 2.2104451343708598,
+      "grad_norm": 1.4597196294207555,
+      "learning_rate": 1.9655265247553833e-06,
+      "loss": 0.6616,
+      "step": 55890
+    },
+    {
+      "epoch": 2.210840633589749,
+      "grad_norm": 1.4349110107401246,
+      "learning_rate": 1.9636980572262476e-06,
+      "loss": 0.6833,
+      "step": 55900
+    },
+    {
+      "epoch": 2.211236132808638,
+      "grad_norm": 1.3285041627939984,
+      "learning_rate": 1.9618702327338433e-06,
+      "loss": 0.6419,
+      "step": 55910
+    },
+    {
+      "epoch": 2.211631632027527,
+      "grad_norm": 1.336458420742804,
+      "learning_rate": 1.9600430516652765e-06,
+      "loss": 0.6548,
+      "step": 55920
+    },
+    {
+      "epoch": 2.212027131246416,
+      "grad_norm": 1.3712226329795318,
+      "learning_rate": 1.9582165144075073e-06,
+      "loss": 0.6715,
+      "step": 55930
+    },
+    {
+      "epoch": 2.212422630465305,
+      "grad_norm": 1.289725068137227,
+      "learning_rate": 1.9563906213473705e-06,
+      "loss": 0.6793,
+      "step": 55940
+    },
+    {
+      "epoch": 2.212818129684194,
+      "grad_norm": 1.3684855015971884,
+      "learning_rate": 1.954565372871554e-06,
+      "loss": 0.637,
+      "step": 55950
+    },
+    {
+      "epoch": 2.213213628903083,
+      "grad_norm": 1.6592268240201962,
+      "learning_rate": 1.952740769366619e-06,
+      "loss": 0.6687,
+      "step": 55960
+    },
+    {
+      "epoch": 2.213609128121972,
+      "grad_norm": 1.4120912613364356,
+      "learning_rate": 1.9509168112189804e-06,
+      "loss": 0.6502,
+      "step": 55970
+    },
+    {
+      "epoch": 2.214004627340861,
+      "grad_norm": 1.5333297648380488,
+      "learning_rate": 1.9490934988149275e-06,
+      "loss": 0.6822,
+      "step": 55980
+    },
+    {
+      "epoch": 2.21440012655975,
+      "grad_norm": 1.6433836364096805,
+      "learning_rate": 1.9472708325405993e-06,
+      "loss": 0.6679,
+      "step": 55990
+    },
+    {
+      "epoch": 2.214795625778639,
+      "grad_norm": 1.6214430695055573,
+      "learning_rate": 1.945448812782011e-06,
+      "loss": 0.6845,
+      "step": 56000
+    },
+    {
+      "epoch": 2.2151911249975282,
+      "grad_norm": 1.3626081798981615,
+      "learning_rate": 1.9436274399250303e-06,
+      "loss": 0.6722,
+      "step": 56010
+    },
+    {
+      "epoch": 2.2155866242164173,
+      "grad_norm": 1.4780307921497446,
+      "learning_rate": 1.9418067143553983e-06,
+      "loss": 0.6636,
+      "step": 56020
+    },
+    {
+      "epoch": 2.2159821234353063,
+      "grad_norm": 1.4394379393799175,
+      "learning_rate": 1.9399866364587084e-06,
+      "loss": 0.6788,
+      "step": 56030
+    },
+    {
+      "epoch": 2.2163776226541954,
+      "grad_norm": 1.4512876050047627,
+      "learning_rate": 1.938167206620426e-06,
+      "loss": 0.6615,
+      "step": 56040
+    },
+    {
+      "epoch": 2.2167731218730844,
+      "grad_norm": 1.192293141807901,
+      "learning_rate": 1.936348425225873e-06,
+      "loss": 0.6657,
+      "step": 56050
+    },
+    {
+      "epoch": 2.2171686210919734,
+      "grad_norm": 1.307150319908702,
+      "learning_rate": 1.934530292660235e-06,
+      "loss": 0.675,
+      "step": 56060
+    },
+    {
+      "epoch": 2.2175641203108625,
+      "grad_norm": 1.3936515514615915,
+      "learning_rate": 1.9327128093085647e-06,
+      "loss": 0.6448,
+      "step": 56070
+    },
+    {
+      "epoch": 2.2179596195297515,
+      "grad_norm": 1.2604982469995145,
+      "learning_rate": 1.93089597555577e-06,
+      "loss": 0.6858,
+      "step": 56080
+    },
+    {
+      "epoch": 2.2183551187486406,
+      "grad_norm": 1.3388377138240866,
+      "learning_rate": 1.9290797917866293e-06,
+      "loss": 0.6614,
+      "step": 56090
+    },
+    {
+      "epoch": 2.2187506179675296,
+      "grad_norm": 1.5756046515903475,
+      "learning_rate": 1.927264258385777e-06,
+      "loss": 0.6348,
+      "step": 56100
+    },
+    {
+      "epoch": 2.2191461171864186,
+      "grad_norm": 1.266632090976251,
+      "learning_rate": 1.9254493757377118e-06,
+      "loss": 0.6523,
+      "step": 56110
+    },
+    {
+      "epoch": 2.2195416164053077,
+      "grad_norm": 1.3605980661976635,
+      "learning_rate": 1.9236351442267936e-06,
+      "loss": 0.6679,
+      "step": 56120
+    },
+    {
+      "epoch": 2.2199371156241967,
+      "grad_norm": 1.435802616820598,
+      "learning_rate": 1.9218215642372483e-06,
+      "loss": 0.6734,
+      "step": 56130
+    },
+    {
+      "epoch": 2.2203326148430858,
+      "grad_norm": 1.3487513921350804,
+      "learning_rate": 1.920008636153158e-06,
+      "loss": 0.6955,
+      "step": 56140
+    },
+    {
+      "epoch": 2.220728114061975,
+      "grad_norm": 1.3844409532429283,
+      "learning_rate": 1.918196360358474e-06,
+      "loss": 0.6598,
+      "step": 56150
+    },
+    {
+      "epoch": 2.221123613280864,
+      "grad_norm": 1.4692629930619787,
+      "learning_rate": 1.916384737237001e-06,
+      "loss": 0.6534,
+      "step": 56160
+    },
+    {
+      "epoch": 2.221519112499753,
+      "grad_norm": 1.568874716346847,
+      "learning_rate": 1.914573767172413e-06,
+      "loss": 0.6384,
+      "step": 56170
+    },
+    {
+      "epoch": 2.221914611718642,
+      "grad_norm": 1.2888907028910421,
+      "learning_rate": 1.9127634505482394e-06,
+      "loss": 0.6895,
+      "step": 56180
+    },
+    {
+      "epoch": 2.222310110937531,
+      "grad_norm": 1.4315007722405082,
+      "learning_rate": 1.9109537877478773e-06,
+      "loss": 0.6883,
+      "step": 56190
+    },
+    {
+      "epoch": 2.22270561015642,
+      "grad_norm": 1.4925370882725184,
+      "learning_rate": 1.9091447791545797e-06,
+      "loss": 0.7027,
+      "step": 56200
+    },
+    {
+      "epoch": 2.223101109375309,
+      "grad_norm": 1.434823166299685,
+      "learning_rate": 1.9073364251514658e-06,
+      "loss": 0.6679,
+      "step": 56210
+    },
+    {
+      "epoch": 2.223496608594198,
+      "grad_norm": 1.314422053543772,
+      "learning_rate": 1.9055287261215133e-06,
+      "loss": 0.6746,
+      "step": 56220
+    },
+    {
+      "epoch": 2.223892107813087,
+      "grad_norm": 1.4634406846981405,
+      "learning_rate": 1.9037216824475618e-06,
+      "loss": 0.6784,
+      "step": 56230
+    },
+    {
+      "epoch": 2.224287607031976,
+      "grad_norm": 1.7209427741649248,
+      "learning_rate": 1.9019152945123098e-06,
+      "loss": 0.6678,
+      "step": 56240
+    },
+    {
+      "epoch": 2.224683106250865,
+      "grad_norm": 1.3133593028830022,
+      "learning_rate": 1.900109562698323e-06,
+      "loss": 0.642,
+      "step": 56250
+    },
+    {
+      "epoch": 2.2250786054697542,
+      "grad_norm": 1.4217547369672734,
+      "learning_rate": 1.8983044873880213e-06,
+      "loss": 0.6278,
+      "step": 56260
+    },
+    {
+      "epoch": 2.2254741046886433,
+      "grad_norm": 1.2041896901158697,
+      "learning_rate": 1.8965000689636925e-06,
+      "loss": 0.6649,
+      "step": 56270
+    },
+    {
+      "epoch": 2.2258696039075323,
+      "grad_norm": 1.396883148452029,
+      "learning_rate": 1.8946963078074794e-06,
+      "loss": 0.6842,
+      "step": 56280
+    },
+    {
+      "epoch": 2.2262651031264213,
+      "grad_norm": 1.245185383960608,
+      "learning_rate": 1.8928932043013854e-06,
+      "loss": 0.662,
+      "step": 56290
+    },
+    {
+      "epoch": 2.2266606023453104,
+      "grad_norm": 1.4469023243975065,
+      "learning_rate": 1.891090758827281e-06,
+      "loss": 0.6654,
+      "step": 56300
+    },
+    {
+      "epoch": 2.2270561015641994,
+      "grad_norm": 1.3647389039208557,
+      "learning_rate": 1.88928897176689e-06,
+      "loss": 0.6447,
+      "step": 56310
+    },
+    {
+      "epoch": 2.2274516007830885,
+      "grad_norm": 1.258365695865808,
+      "learning_rate": 1.8874878435018028e-06,
+      "loss": 0.6634,
+      "step": 56320
+    },
+    {
+      "epoch": 2.2278471000019775,
+      "grad_norm": 1.2789613698259803,
+      "learning_rate": 1.8856873744134647e-06,
+      "loss": 0.6951,
+      "step": 56330
+    },
+    {
+      "epoch": 2.2282425992208665,
+      "grad_norm": 1.5063845721766689,
+      "learning_rate": 1.8838875648831874e-06,
+      "loss": 0.6632,
+      "step": 56340
+    },
+    {
+      "epoch": 2.2286380984397556,
+      "grad_norm": 1.343985022364594,
+      "learning_rate": 1.8820884152921382e-06,
+      "loss": 0.6727,
+      "step": 56350
+    },
+    {
+      "epoch": 2.2290335976586446,
+      "grad_norm": 1.3195762296427764,
+      "learning_rate": 1.8802899260213458e-06,
+      "loss": 0.6633,
+      "step": 56360
+    },
+    {
+      "epoch": 2.2294290968775337,
+      "grad_norm": 1.4487709596771288,
+      "learning_rate": 1.878492097451698e-06,
+      "loss": 0.6842,
+      "step": 56370
+    },
+    {
+      "epoch": 2.2298245960964227,
+      "grad_norm": 1.238861972100529,
+      "learning_rate": 1.8766949299639475e-06,
+      "loss": 0.634,
+      "step": 56380
+    },
+    {
+      "epoch": 2.2302200953153117,
+      "grad_norm": 1.265566653488306,
+      "learning_rate": 1.8748984239386996e-06,
+      "loss": 0.656,
+      "step": 56390
+    },
+    {
+      "epoch": 2.2306155945342008,
+      "grad_norm": 1.6392009645232655,
+      "learning_rate": 1.8731025797564278e-06,
+      "loss": 0.6135,
+      "step": 56400
+    },
+    {
+      "epoch": 2.23101109375309,
+      "grad_norm": 1.8560676463992838,
+      "learning_rate": 1.8713073977974572e-06,
+      "loss": 0.6358,
+      "step": 56410
+    },
+    {
+      "epoch": 2.231406592971979,
+      "grad_norm": 1.4270879044693834,
+      "learning_rate": 1.8695128784419803e-06,
+      "loss": 0.6647,
+      "step": 56420
+    },
+    {
+      "epoch": 2.231802092190868,
+      "grad_norm": 1.1775420223359605,
+      "learning_rate": 1.8677190220700419e-06,
+      "loss": 0.6688,
+      "step": 56430
+    },
+    {
+      "epoch": 2.232197591409757,
+      "grad_norm": 1.3969876657621385,
+      "learning_rate": 1.8659258290615535e-06,
+      "loss": 0.6525,
+      "step": 56440
+    },
+    {
+      "epoch": 2.232593090628646,
+      "grad_norm": 1.422047648398132,
+      "learning_rate": 1.8641332997962786e-06,
+      "loss": 0.681,
+      "step": 56450
+    },
+    {
+      "epoch": 2.232988589847535,
+      "grad_norm": 1.788866674564538,
+      "learning_rate": 1.8623414346538488e-06,
+      "loss": 0.6588,
+      "step": 56460
+    },
+    {
+      "epoch": 2.233384089066424,
+      "grad_norm": 1.4063625912303486,
+      "learning_rate": 1.8605502340137483e-06,
+      "loss": 0.6804,
+      "step": 56470
+    },
+    {
+      "epoch": 2.233779588285313,
+      "grad_norm": 1.3971012230148743,
+      "learning_rate": 1.8587596982553224e-06,
+      "loss": 0.6399,
+      "step": 56480
+    },
+    {
+      "epoch": 2.234175087504202,
+      "grad_norm": 1.5483857583153908,
+      "learning_rate": 1.8569698277577746e-06,
+      "loss": 0.6531,
+      "step": 56490
+    },
+    {
+      "epoch": 2.234570586723091,
+      "grad_norm": 1.233723600661161,
+      "learning_rate": 1.8551806229001718e-06,
+      "loss": 0.6842,
+      "step": 56500
+    },
+    {
+      "epoch": 2.23496608594198,
+      "grad_norm": 1.6947271254138232,
+      "learning_rate": 1.8533920840614334e-06,
+      "loss": 0.6198,
+      "step": 56510
+    },
+    {
+      "epoch": 2.2353615851608692,
+      "grad_norm": 1.4090932462271977,
+      "learning_rate": 1.8516042116203452e-06,
+      "loss": 0.6483,
+      "step": 56520
+    },
+    {
+      "epoch": 2.2357570843797583,
+      "grad_norm": 1.7536861712117378,
+      "learning_rate": 1.8498170059555466e-06,
+      "loss": 0.6525,
+      "step": 56530
+    },
+    {
+      "epoch": 2.2361525835986473,
+      "grad_norm": 1.4169051713145988,
+      "learning_rate": 1.8480304674455347e-06,
+      "loss": 0.6718,
+      "step": 56540
+    },
+    {
+      "epoch": 2.2365480828175364,
+      "grad_norm": 1.4518560967693332,
+      "learning_rate": 1.846244596468671e-06,
+      "loss": 0.66,
+      "step": 56550
+    },
+    {
+      "epoch": 2.2369435820364254,
+      "grad_norm": 1.321908633996863,
+      "learning_rate": 1.8444593934031695e-06,
+      "loss": 0.6525,
+      "step": 56560
+    },
+    {
+      "epoch": 2.2373390812553144,
+      "grad_norm": 1.4557489658831209,
+      "learning_rate": 1.8426748586271087e-06,
+      "loss": 0.6437,
+      "step": 56570
+    },
+    {
+      "epoch": 2.2377345804742035,
+      "grad_norm": 1.662164731970988,
+      "learning_rate": 1.8408909925184193e-06,
+      "loss": 0.6677,
+      "step": 56580
+    },
+    {
+      "epoch": 2.2381300796930925,
+      "grad_norm": 1.3354636469933834,
+      "learning_rate": 1.8391077954548992e-06,
+      "loss": 0.6425,
+      "step": 56590
+    },
+    {
+      "epoch": 2.2385255789119816,
+      "grad_norm": 2.0407428627736817,
+      "learning_rate": 1.8373252678141912e-06,
+      "loss": 0.6464,
+      "step": 56600
+    },
+    {
+      "epoch": 2.2389210781308706,
+      "grad_norm": 1.4228120371973003,
+      "learning_rate": 1.8355434099738095e-06,
+      "loss": 0.6433,
+      "step": 56610
+    },
+    {
+      "epoch": 2.2393165773497596,
+      "grad_norm": 1.459021274733276,
+      "learning_rate": 1.8337622223111178e-06,
+      "loss": 0.6855,
+      "step": 56620
+    },
+    {
+      "epoch": 2.2397120765686487,
+      "grad_norm": 1.500836335928012,
+      "learning_rate": 1.8319817052033445e-06,
+      "loss": 0.6963,
+      "step": 56630
+    },
+    {
+      "epoch": 2.2401075757875377,
+      "grad_norm": 1.3895081909386187,
+      "learning_rate": 1.8302018590275694e-06,
+      "loss": 0.6721,
+      "step": 56640
+    },
+    {
+      "epoch": 2.2405030750064268,
+      "grad_norm": 1.4772143176780497,
+      "learning_rate": 1.8284226841607366e-06,
+      "loss": 0.6814,
+      "step": 56650
+    },
+    {
+      "epoch": 2.240898574225316,
+      "grad_norm": 1.7482312266724933,
+      "learning_rate": 1.8266441809796414e-06,
+      "loss": 0.6662,
+      "step": 56660
+    },
+    {
+      "epoch": 2.241294073444205,
+      "grad_norm": 1.3033825177029148,
+      "learning_rate": 1.8248663498609443e-06,
+      "loss": 0.6581,
+      "step": 56670
+    },
+    {
+      "epoch": 2.241689572663094,
+      "grad_norm": 1.667824217317311,
+      "learning_rate": 1.8230891911811554e-06,
+      "loss": 0.6442,
+      "step": 56680
+    },
+    {
+      "epoch": 2.242085071881983,
+      "grad_norm": 1.5148632174043146,
+      "learning_rate": 1.8213127053166496e-06,
+      "loss": 0.6408,
+      "step": 56690
+    },
+    {
+      "epoch": 2.242480571100872,
+      "grad_norm": 1.3933994678234267,
+      "learning_rate": 1.8195368926436558e-06,
+      "loss": 0.6605,
+      "step": 56700
+    },
+    {
+      "epoch": 2.242876070319761,
+      "grad_norm": 1.5538457724383805,
+      "learning_rate": 1.8177617535382592e-06,
+      "loss": 0.6225,
+      "step": 56710
+    },
+    {
+      "epoch": 2.24327156953865,
+      "grad_norm": 1.6427098243480491,
+      "learning_rate": 1.815987288376403e-06,
+      "loss": 0.6678,
+      "step": 56720
+    },
+    {
+      "epoch": 2.243667068757539,
+      "grad_norm": 1.7193446250775357,
+      "learning_rate": 1.8142134975338915e-06,
+      "loss": 0.657,
+      "step": 56730
+    },
+    {
+      "epoch": 2.244062567976428,
+      "grad_norm": 1.4362532723815795,
+      "learning_rate": 1.81244038138638e-06,
+      "loss": 0.6628,
+      "step": 56740
+    },
+    {
+      "epoch": 2.244458067195317,
+      "grad_norm": 1.2615105301421554,
+      "learning_rate": 1.810667940309388e-06,
+      "loss": 0.6889,
+      "step": 56750
+    },
+    {
+      "epoch": 2.244853566414206,
+      "grad_norm": 1.4974413493231196,
+      "learning_rate": 1.8088961746782856e-06,
+      "loss": 0.6506,
+      "step": 56760
+    },
+    {
+      "epoch": 2.2452490656330952,
+      "grad_norm": 1.702283462940168,
+      "learning_rate": 1.8071250848683015e-06,
+      "loss": 0.6796,
+      "step": 56770
+    },
+    {
+      "epoch": 2.2456445648519843,
+      "grad_norm": 1.5010915981516817,
+      "learning_rate": 1.805354671254525e-06,
+      "loss": 0.6693,
+      "step": 56780
+    },
+    {
+      "epoch": 2.2460400640708733,
+      "grad_norm": 1.3987135652736051,
+      "learning_rate": 1.803584934211896e-06,
+      "loss": 0.6482,
+      "step": 56790
+    },
+    {
+      "epoch": 2.2464355632897623,
+      "grad_norm": 1.4714821305013848,
+      "learning_rate": 1.8018158741152181e-06,
+      "loss": 0.6612,
+      "step": 56800
+    },
+    {
+      "epoch": 2.2468310625086514,
+      "grad_norm": 1.4467126209922527,
+      "learning_rate": 1.8000474913391447e-06,
+      "loss": 0.6767,
+      "step": 56810
+    },
+    {
+      "epoch": 2.2472265617275404,
+      "grad_norm": 1.3764708445008866,
+      "learning_rate": 1.7982797862581919e-06,
+      "loss": 0.6771,
+      "step": 56820
+    },
+    {
+      "epoch": 2.2476220609464295,
+      "grad_norm": 1.4056222687071145,
+      "learning_rate": 1.7965127592467264e-06,
+      "loss": 0.6565,
+      "step": 56830
+    },
+    {
+      "epoch": 2.2480175601653185,
+      "grad_norm": 1.4447925842674119,
+      "learning_rate": 1.7947464106789786e-06,
+      "loss": 0.6598,
+      "step": 56840
+    },
+    {
+      "epoch": 2.2484130593842075,
+      "grad_norm": 1.6981502707829608,
+      "learning_rate": 1.7929807409290251e-06,
+      "loss": 0.6248,
+      "step": 56850
+    },
+    {
+      "epoch": 2.2488085586030966,
+      "grad_norm": 1.3381738169941404,
+      "learning_rate": 1.7912157503708089e-06,
+      "loss": 0.6636,
+      "step": 56860
+    },
+    {
+      "epoch": 2.2492040578219856,
+      "grad_norm": 1.7812758873685959,
+      "learning_rate": 1.789451439378122e-06,
+      "loss": 0.6624,
+      "step": 56870
+    },
+    {
+      "epoch": 2.2495995570408747,
+      "grad_norm": 1.6468211772174832,
+      "learning_rate": 1.787687808324618e-06,
+      "loss": 0.6651,
+      "step": 56880
+    },
+    {
+      "epoch": 2.2499950562597637,
+      "grad_norm": 1.7019329011960929,
+      "learning_rate": 1.7859248575838e-06,
+      "loss": 0.687,
+      "step": 56890
+    },
+    {
+      "epoch": 2.2503905554786527,
+      "grad_norm": 1.53289615213947,
+      "learning_rate": 1.7841625875290353e-06,
+      "loss": 0.6742,
+      "step": 56900
+    },
+    {
+      "epoch": 2.250786054697542,
+      "grad_norm": 1.442120115058658,
+      "learning_rate": 1.7824009985335383e-06,
+      "loss": 0.6698,
+      "step": 56910
+    },
+    {
+      "epoch": 2.251181553916431,
+      "grad_norm": 1.5389697924827754,
+      "learning_rate": 1.7806400909703875e-06,
+      "loss": 0.6705,
+      "step": 56920
+    },
+    {
+      "epoch": 2.2515770531353203,
+      "grad_norm": 1.4902695257279648,
+      "learning_rate": 1.77887986521251e-06,
+      "loss": 0.6564,
+      "step": 56930
+    },
+    {
+      "epoch": 2.251972552354209,
+      "grad_norm": 1.718848011671999,
+      "learning_rate": 1.777120321632691e-06,
+      "loss": 0.6556,
+      "step": 56940
+    },
+    {
+      "epoch": 2.2523680515730984,
+      "grad_norm": 1.5808295743640888,
+      "learning_rate": 1.7753614606035746e-06,
+      "loss": 0.6529,
+      "step": 56950
+    },
+    {
+      "epoch": 2.252763550791987,
+      "grad_norm": 1.4850794532445288,
+      "learning_rate": 1.773603282497655e-06,
+      "loss": 0.6489,
+      "step": 56960
+    },
+    {
+      "epoch": 2.2531590500108765,
+      "grad_norm": 1.5127981658388805,
+      "learning_rate": 1.7718457876872841e-06,
+      "loss": 0.6737,
+      "step": 56970
+    },
+    {
+      "epoch": 2.253554549229765,
+      "grad_norm": 1.670672164536921,
+      "learning_rate": 1.7700889765446717e-06,
+      "loss": 0.6632,
+      "step": 56980
+    },
+    {
+      "epoch": 2.2539500484486545,
+      "grad_norm": 1.2796211535244366,
+      "learning_rate": 1.7683328494418777e-06,
+      "loss": 0.6745,
+      "step": 56990
+    },
+    {
+      "epoch": 2.254345547667543,
+      "grad_norm": 1.737965518331303,
+      "learning_rate": 1.7665774067508201e-06,
+      "loss": 0.6584,
+      "step": 57000
+    },
+    {
+      "epoch": 2.2547410468864326,
+      "grad_norm": 1.3051597217878708,
+      "learning_rate": 1.764822648843273e-06,
+      "loss": 0.657,
+      "step": 57010
+    },
+    {
+      "epoch": 2.255136546105321,
+      "grad_norm": 1.416979843607973,
+      "learning_rate": 1.7630685760908623e-06,
+      "loss": 0.6965,
+      "step": 57020
+    },
+    {
+      "epoch": 2.2555320453242107,
+      "grad_norm": 1.2901936454592218,
+      "learning_rate": 1.7613151888650726e-06,
+      "loss": 0.6432,
+      "step": 57030
+    },
+    {
+      "epoch": 2.2559275445430993,
+      "grad_norm": 1.6155756378778239,
+      "learning_rate": 1.7595624875372385e-06,
+      "loss": 0.6664,
+      "step": 57040
+    },
+    {
+      "epoch": 2.256323043761989,
+      "grad_norm": 1.4291756886832887,
+      "learning_rate": 1.7578104724785556e-06,
+      "loss": 0.6671,
+      "step": 57050
+    },
+    {
+      "epoch": 2.2567185429808774,
+      "grad_norm": 1.9051673408208258,
+      "learning_rate": 1.7560591440600665e-06,
+      "loss": 0.6221,
+      "step": 57060
+    },
+    {
+      "epoch": 2.257114042199767,
+      "grad_norm": 1.4345013343982636,
+      "learning_rate": 1.7543085026526774e-06,
+      "loss": 0.6662,
+      "step": 57070
+    },
+    {
+      "epoch": 2.2575095414186555,
+      "grad_norm": 1.4066077448653886,
+      "learning_rate": 1.7525585486271412e-06,
+      "loss": 0.6867,
+      "step": 57080
+    },
+    {
+      "epoch": 2.257905040637545,
+      "grad_norm": 1.2310540951399676,
+      "learning_rate": 1.750809282354069e-06,
+      "loss": 0.6592,
+      "step": 57090
+    },
+    {
+      "epoch": 2.258300539856434,
+      "grad_norm": 1.479782097624296,
+      "learning_rate": 1.7490607042039226e-06,
+      "loss": 0.6496,
+      "step": 57100
+    },
+    {
+      "epoch": 2.258696039075323,
+      "grad_norm": 1.6279422057663602,
+      "learning_rate": 1.7473128145470258e-06,
+      "loss": 0.6322,
+      "step": 57110
+    },
+    {
+      "epoch": 2.259091538294212,
+      "grad_norm": 1.406948905517128,
+      "learning_rate": 1.7455656137535471e-06,
+      "loss": 0.6725,
+      "step": 57120
+    },
+    {
+      "epoch": 2.259487037513101,
+      "grad_norm": 1.2998861084656765,
+      "learning_rate": 1.743819102193518e-06,
+      "loss": 0.639,
+      "step": 57130
+    },
+    {
+      "epoch": 2.25988253673199,
+      "grad_norm": 1.4955724776244252,
+      "learning_rate": 1.742073280236815e-06,
+      "loss": 0.6693,
+      "step": 57140
+    },
+    {
+      "epoch": 2.260278035950879,
+      "grad_norm": 1.5301768579838604,
+      "learning_rate": 1.740328148253178e-06,
+      "loss": 0.6483,
+      "step": 57150
+    },
+    {
+      "epoch": 2.260673535169768,
+      "grad_norm": 1.4628432047613005,
+      "learning_rate": 1.7385837066121924e-06,
+      "loss": 0.6426,
+      "step": 57160
+    },
+    {
+      "epoch": 2.2610690343886573,
+      "grad_norm": 1.5267684020560457,
+      "learning_rate": 1.7368399556833043e-06,
+      "loss": 0.6649,
+      "step": 57170
+    },
+    {
+      "epoch": 2.2614645336075463,
+      "grad_norm": 1.1257282297719096,
+      "learning_rate": 1.7350968958358083e-06,
+      "loss": 0.6685,
+      "step": 57180
+    },
+    {
+      "epoch": 2.2618600328264353,
+      "grad_norm": 1.3845602174893739,
+      "learning_rate": 1.733354527438853e-06,
+      "loss": 0.6435,
+      "step": 57190
+    },
+    {
+      "epoch": 2.2622555320453244,
+      "grad_norm": 1.7651997036133584,
+      "learning_rate": 1.731612850861446e-06,
+      "loss": 0.6684,
+      "step": 57200
+    },
+    {
+      "epoch": 2.2626510312642134,
+      "grad_norm": 1.6026459719233057,
+      "learning_rate": 1.7298718664724423e-06,
+      "loss": 0.6374,
+      "step": 57210
+    },
+    {
+      "epoch": 2.2630465304831024,
+      "grad_norm": 1.4384708999583027,
+      "learning_rate": 1.7281315746405526e-06,
+      "loss": 0.6769,
+      "step": 57220
+    },
+    {
+      "epoch": 2.2634420297019915,
+      "grad_norm": 1.3515263856584332,
+      "learning_rate": 1.72639197573434e-06,
+      "loss": 0.6841,
+      "step": 57230
+    },
+    {
+      "epoch": 2.2638375289208805,
+      "grad_norm": 1.594954434061927,
+      "learning_rate": 1.724653070122224e-06,
+      "loss": 0.6755,
+      "step": 57240
+    },
+    {
+      "epoch": 2.2642330281397696,
+      "grad_norm": 1.5447594339387254,
+      "learning_rate": 1.7229148581724726e-06,
+      "loss": 0.6648,
+      "step": 57250
+    },
+    {
+      "epoch": 2.2646285273586586,
+      "grad_norm": 1.6356337120313031,
+      "learning_rate": 1.7211773402532123e-06,
+      "loss": 0.6459,
+      "step": 57260
+    },
+    {
+      "epoch": 2.2650240265775476,
+      "grad_norm": 1.250288578927964,
+      "learning_rate": 1.7194405167324156e-06,
+      "loss": 0.6581,
+      "step": 57270
+    },
+    {
+      "epoch": 2.2654195257964367,
+      "grad_norm": 1.6810977534904172,
+      "learning_rate": 1.7177043879779171e-06,
+      "loss": 0.6659,
+      "step": 57280
+    },
+    {
+      "epoch": 2.2658150250153257,
+      "grad_norm": 1.6086514165376045,
+      "learning_rate": 1.7159689543573937e-06,
+      "loss": 0.6561,
+      "step": 57290
+    },
+    {
+      "epoch": 2.2662105242342148,
+      "grad_norm": 1.2587524167102466,
+      "learning_rate": 1.7142342162383852e-06,
+      "loss": 0.6332,
+      "step": 57300
+    },
+    {
+      "epoch": 2.266606023453104,
+      "grad_norm": 1.5816611326839283,
+      "learning_rate": 1.7125001739882757e-06,
+      "loss": 0.6879,
+      "step": 57310
+    },
+    {
+      "epoch": 2.267001522671993,
+      "grad_norm": 1.5914852857365567,
+      "learning_rate": 1.7107668279743084e-06,
+      "loss": 0.6481,
+      "step": 57320
+    },
+    {
+      "epoch": 2.267397021890882,
+      "grad_norm": 1.2157033436451423,
+      "learning_rate": 1.7090341785635757e-06,
+      "loss": 0.6948,
+      "step": 57330
+    },
+    {
+      "epoch": 2.267792521109771,
+      "grad_norm": 1.7803246563676012,
+      "learning_rate": 1.7073022261230226e-06,
+      "loss": 0.6538,
+      "step": 57340
+    },
+    {
+      "epoch": 2.26818802032866,
+      "grad_norm": 1.3001154680395732,
+      "learning_rate": 1.7055709710194452e-06,
+      "loss": 0.6846,
+      "step": 57350
+    },
+    {
+      "epoch": 2.268583519547549,
+      "grad_norm": 1.6657573371342396,
+      "learning_rate": 1.7038404136194965e-06,
+      "loss": 0.6423,
+      "step": 57360
+    },
+    {
+      "epoch": 2.268979018766438,
+      "grad_norm": 1.2987266925352685,
+      "learning_rate": 1.7021105542896765e-06,
+      "loss": 0.6781,
+      "step": 57370
+    },
+    {
+      "epoch": 2.269374517985327,
+      "grad_norm": 1.7055843078501114,
+      "learning_rate": 1.7003813933963426e-06,
+      "loss": 0.6559,
+      "step": 57380
+    },
+    {
+      "epoch": 2.269770017204216,
+      "grad_norm": 1.3833483045606862,
+      "learning_rate": 1.6986529313056982e-06,
+      "loss": 0.6719,
+      "step": 57390
+    },
+    {
+      "epoch": 2.270165516423105,
+      "grad_norm": 1.3986479432025962,
+      "learning_rate": 1.6969251683838057e-06,
+      "loss": 0.671,
+      "step": 57400
+    },
+    {
+      "epoch": 2.270561015641994,
+      "grad_norm": 1.598064121143924,
+      "learning_rate": 1.6951981049965732e-06,
+      "loss": 0.662,
+      "step": 57410
+    },
+    {
+      "epoch": 2.2709565148608832,
+      "grad_norm": 1.3996721457511865,
+      "learning_rate": 1.6934717415097618e-06,
+      "loss": 0.6427,
+      "step": 57420
+    },
+    {
+      "epoch": 2.2713520140797723,
+      "grad_norm": 1.3100170458920979,
+      "learning_rate": 1.6917460782889893e-06,
+      "loss": 0.6628,
+      "step": 57430
+    },
+    {
+      "epoch": 2.2717475132986613,
+      "grad_norm": 1.7192907129857418,
+      "learning_rate": 1.6900211156997182e-06,
+      "loss": 0.6303,
+      "step": 57440
+    },
+    {
+      "epoch": 2.2721430125175504,
+      "grad_norm": 1.4497992112254119,
+      "learning_rate": 1.6882968541072698e-06,
+      "loss": 0.636,
+      "step": 57450
+    },
+    {
+      "epoch": 2.2725385117364394,
+      "grad_norm": 1.424517628367475,
+      "learning_rate": 1.6865732938768103e-06,
+      "loss": 0.6761,
+      "step": 57460
+    },
+    {
+      "epoch": 2.2729340109553284,
+      "grad_norm": 1.7028883652228886,
+      "learning_rate": 1.6848504353733607e-06,
+      "loss": 0.6329,
+      "step": 57470
+    },
+    {
+      "epoch": 2.2733295101742175,
+      "grad_norm": 1.3406169916259694,
+      "learning_rate": 1.683128278961792e-06,
+      "loss": 0.6452,
+      "step": 57480
+    },
+    {
+      "epoch": 2.2737250093931065,
+      "grad_norm": 1.4435983804456758,
+      "learning_rate": 1.68140682500683e-06,
+      "loss": 0.6393,
+      "step": 57490
+    },
+    {
+      "epoch": 2.2741205086119956,
+      "grad_norm": 1.7632535182341844,
+      "learning_rate": 1.679686073873046e-06,
+      "loss": 0.6426,
+      "step": 57500
+    },
+    {
+      "epoch": 2.2745160078308846,
+      "grad_norm": 1.4736549012581635,
+      "learning_rate": 1.6779660259248693e-06,
+      "loss": 0.6398,
+      "step": 57510
+    },
+    {
+      "epoch": 2.2749115070497736,
+      "grad_norm": 1.4868355896250138,
+      "learning_rate": 1.6762466815265722e-06,
+      "loss": 0.6165,
+      "step": 57520
+    },
+    {
+      "epoch": 2.2753070062686627,
+      "grad_norm": 1.3288081421276643,
+      "learning_rate": 1.674528041042287e-06,
+      "loss": 0.6285,
+      "step": 57530
+    },
+    {
+      "epoch": 2.2757025054875517,
+      "grad_norm": 1.5871980916283486,
+      "learning_rate": 1.6728101048359884e-06,
+      "loss": 0.6548,
+      "step": 57540
+    },
+    {
+      "epoch": 2.2760980047064407,
+      "grad_norm": 1.2992695669355179,
+      "learning_rate": 1.6710928732715093e-06,
+      "loss": 0.6624,
+      "step": 57550
+    },
+    {
+      "epoch": 2.27649350392533,
+      "grad_norm": 1.6214569682743214,
+      "learning_rate": 1.6693763467125262e-06,
+      "loss": 0.6733,
+      "step": 57560
+    },
+    {
+      "epoch": 2.276889003144219,
+      "grad_norm": 1.5715821537528092,
+      "learning_rate": 1.6676605255225753e-06,
+      "loss": 0.6703,
+      "step": 57570
+    },
+    {
+      "epoch": 2.277284502363108,
+      "grad_norm": 1.2825438695845668,
+      "learning_rate": 1.6659454100650318e-06,
+      "loss": 0.6578,
+      "step": 57580
+    },
+    {
+      "epoch": 2.277680001581997,
+      "grad_norm": 1.4912912567077334,
+      "learning_rate": 1.664231000703132e-06,
+      "loss": 0.6604,
+      "step": 57590
+    },
+    {
+      "epoch": 2.278075500800886,
+      "grad_norm": 1.3372721678849402,
+      "learning_rate": 1.662517297799956e-06,
+      "loss": 0.6379,
+      "step": 57600
+    },
+    {
+      "epoch": 2.278471000019775,
+      "grad_norm": 1.3510983600573596,
+      "learning_rate": 1.6608043017184395e-06,
+      "loss": 0.6624,
+      "step": 57610
+    },
+    {
+      "epoch": 2.278866499238664,
+      "grad_norm": 1.491593008341789,
+      "learning_rate": 1.6590920128213623e-06,
+      "loss": 0.6824,
+      "step": 57620
+    },
+    {
+      "epoch": 2.279261998457553,
+      "grad_norm": 1.2950374325888685,
+      "learning_rate": 1.6573804314713616e-06,
+      "loss": 0.6708,
+      "step": 57630
+    },
+    {
+      "epoch": 2.279657497676442,
+      "grad_norm": 1.414082697029235,
+      "learning_rate": 1.655669558030919e-06,
+      "loss": 0.688,
+      "step": 57640
+    },
+    {
+      "epoch": 2.280052996895331,
+      "grad_norm": 1.5270323552824139,
+      "learning_rate": 1.653959392862367e-06,
+      "loss": 0.6371,
+      "step": 57650
+    },
+    {
+      "epoch": 2.28044849611422,
+      "grad_norm": 1.6408441809043868,
+      "learning_rate": 1.6522499363278915e-06,
+      "loss": 0.654,
+      "step": 57660
+    },
+    {
+      "epoch": 2.280843995333109,
+      "grad_norm": 1.393915299510681,
+      "learning_rate": 1.6505411887895245e-06,
+      "loss": 0.6597,
+      "step": 57670
+    },
+    {
+      "epoch": 2.2812394945519983,
+      "grad_norm": 1.567641139918831,
+      "learning_rate": 1.648833150609151e-06,
+      "loss": 0.6796,
+      "step": 57680
+    },
+    {
+      "epoch": 2.2816349937708873,
+      "grad_norm": 1.4602377625263734,
+      "learning_rate": 1.6471258221485037e-06,
+      "loss": 0.653,
+      "step": 57690
+    },
+    {
+      "epoch": 2.2820304929897763,
+      "grad_norm": 1.6635297503480375,
+      "learning_rate": 1.6454192037691653e-06,
+      "loss": 0.6897,
+      "step": 57700
+    },
+    {
+      "epoch": 2.2824259922086654,
+      "grad_norm": 1.2434893150116195,
+      "learning_rate": 1.6437132958325663e-06,
+      "loss": 0.66,
+      "step": 57710
+    },
+    {
+      "epoch": 2.2828214914275544,
+      "grad_norm": 1.1860223731327566,
+      "learning_rate": 1.6420080986999925e-06,
+      "loss": 0.6566,
+      "step": 57720
+    },
+    {
+      "epoch": 2.2832169906464435,
+      "grad_norm": 1.5192886544249655,
+      "learning_rate": 1.6403036127325723e-06,
+      "loss": 0.6671,
+      "step": 57730
+    },
+    {
+      "epoch": 2.2836124898653325,
+      "grad_norm": 1.4677915420538692,
+      "learning_rate": 1.6385998382912892e-06,
+      "loss": 0.6392,
+      "step": 57740
+    },
+    {
+      "epoch": 2.2840079890842215,
+      "grad_norm": 1.2732380959663254,
+      "learning_rate": 1.6368967757369708e-06,
+      "loss": 0.6447,
+      "step": 57750
+    },
+    {
+      "epoch": 2.2844034883031106,
+      "grad_norm": 1.2439017697047998,
+      "learning_rate": 1.6351944254302993e-06,
+      "loss": 0.6778,
+      "step": 57760
+    },
+    {
+      "epoch": 2.2847989875219996,
+      "grad_norm": 1.7164507940755784,
+      "learning_rate": 1.6334927877318008e-06,
+      "loss": 0.6557,
+      "step": 57770
+    },
+    {
+      "epoch": 2.2851944867408887,
+      "grad_norm": 1.6023879301931152,
+      "learning_rate": 1.6317918630018552e-06,
+      "loss": 0.6529,
+      "step": 57780
+    },
+    {
+      "epoch": 2.2855899859597777,
+      "grad_norm": 1.3174525669716117,
+      "learning_rate": 1.6300916516006871e-06,
+      "loss": 0.6571,
+      "step": 57790
+    },
+    {
+      "epoch": 2.2859854851786667,
+      "grad_norm": 1.5634580441480015,
+      "learning_rate": 1.628392153888375e-06,
+      "loss": 0.6656,
+      "step": 57800
+    },
+    {
+      "epoch": 2.2863809843975558,
+      "grad_norm": 1.5002028902256408,
+      "learning_rate": 1.62669337022484e-06,
+      "loss": 0.6628,
+      "step": 57810
+    },
+    {
+      "epoch": 2.286776483616445,
+      "grad_norm": 1.653627348827789,
+      "learning_rate": 1.62499530096986e-06,
+      "loss": 0.6397,
+      "step": 57820
+    },
+    {
+      "epoch": 2.287171982835334,
+      "grad_norm": 1.3228551588943012,
+      "learning_rate": 1.6232979464830512e-06,
+      "loss": 0.6952,
+      "step": 57830
+    },
+    {
+      "epoch": 2.287567482054223,
+      "grad_norm": 1.5451789610980418,
+      "learning_rate": 1.6216013071238884e-06,
+      "loss": 0.678,
+      "step": 57840
+    },
+    {
+      "epoch": 2.287962981273112,
+      "grad_norm": 1.6273381667535372,
+      "learning_rate": 1.6199053832516875e-06,
+      "loss": 0.634,
+      "step": 57850
+    },
+    {
+      "epoch": 2.288358480492001,
+      "grad_norm": 1.1599876504475686,
+      "learning_rate": 1.6182101752256201e-06,
+      "loss": 0.6506,
+      "step": 57860
+    },
+    {
+      "epoch": 2.28875397971089,
+      "grad_norm": 1.5176658895387827,
+      "learning_rate": 1.6165156834046996e-06,
+      "loss": 0.6808,
+      "step": 57870
+    },
+    {
+      "epoch": 2.289149478929779,
+      "grad_norm": 1.6738101398943044,
+      "learning_rate": 1.6148219081477901e-06,
+      "loss": 0.6563,
+      "step": 57880
+    },
+    {
+      "epoch": 2.289544978148668,
+      "grad_norm": 1.484106754214158,
+      "learning_rate": 1.613128849813606e-06,
+      "loss": 0.6912,
+      "step": 57890
+    },
+    {
+      "epoch": 2.289940477367557,
+      "grad_norm": 2.1509959343783,
+      "learning_rate": 1.6114365087607053e-06,
+      "loss": 0.6071,
+      "step": 57900
+    },
+    {
+      "epoch": 2.290335976586446,
+      "grad_norm": 1.323748398522322,
+      "learning_rate": 1.6097448853475e-06,
+      "loss": 0.6647,
+      "step": 57910
+    },
+    {
+      "epoch": 2.290731475805335,
+      "grad_norm": 1.821699853039601,
+      "learning_rate": 1.6080539799322442e-06,
+      "loss": 0.6669,
+      "step": 57920
+    },
+    {
+      "epoch": 2.2911269750242242,
+      "grad_norm": 1.4426798718781264,
+      "learning_rate": 1.6063637928730457e-06,
+      "loss": 0.6681,
+      "step": 57930
+    },
+    {
+      "epoch": 2.2915224742431133,
+      "grad_norm": 1.6176028663896787,
+      "learning_rate": 1.6046743245278556e-06,
+      "loss": 0.6643,
+      "step": 57940
+    },
+    {
+      "epoch": 2.2919179734620023,
+      "grad_norm": 1.5368376892843394,
+      "learning_rate": 1.6029855752544737e-06,
+      "loss": 0.6568,
+      "step": 57950
+    },
+    {
+      "epoch": 2.2923134726808914,
+      "grad_norm": 1.6495477891397017,
+      "learning_rate": 1.6012975454105472e-06,
+      "loss": 0.6708,
+      "step": 57960
+    },
+    {
+      "epoch": 2.2927089718997804,
+      "grad_norm": 1.675339368626984,
+      "learning_rate": 1.5996102353535753e-06,
+      "loss": 0.6371,
+      "step": 57970
+    },
+    {
+      "epoch": 2.2931044711186694,
+      "grad_norm": 1.418447469175257,
+      "learning_rate": 1.5979236454408975e-06,
+      "loss": 0.6489,
+      "step": 57980
+    },
+    {
+      "epoch": 2.2934999703375585,
+      "grad_norm": 1.863394765318512,
+      "learning_rate": 1.5962377760297083e-06,
+      "loss": 0.6536,
+      "step": 57990
+    },
+    {
+      "epoch": 2.2938954695564475,
+      "grad_norm": 1.5632786137165733,
+      "learning_rate": 1.5945526274770423e-06,
+      "loss": 0.6569,
+      "step": 58000
+    },
+    {
+      "epoch": 2.2942909687753366,
+      "grad_norm": 1.664471452019867,
+      "learning_rate": 1.592868200139789e-06,
+      "loss": 0.623,
+      "step": 58010
+    },
+    {
+      "epoch": 2.2946864679942256,
+      "grad_norm": 1.3245194241777019,
+      "learning_rate": 1.5911844943746774e-06,
+      "loss": 0.6561,
+      "step": 58020
+    },
+    {
+      "epoch": 2.2950819672131146,
+      "grad_norm": 1.3490462723433019,
+      "learning_rate": 1.5895015105382915e-06,
+      "loss": 0.6353,
+      "step": 58030
+    },
+    {
+      "epoch": 2.2954774664320037,
+      "grad_norm": 1.3904326446403537,
+      "learning_rate": 1.5878192489870543e-06,
+      "loss": 0.6616,
+      "step": 58040
+    },
+    {
+      "epoch": 2.2958729656508927,
+      "grad_norm": 1.561928048385546,
+      "learning_rate": 1.586137710077244e-06,
+      "loss": 0.6663,
+      "step": 58050
+    },
+    {
+      "epoch": 2.2962684648697818,
+      "grad_norm": 1.2986181394502438,
+      "learning_rate": 1.5844568941649795e-06,
+      "loss": 0.6742,
+      "step": 58060
+    },
+    {
+      "epoch": 2.296663964088671,
+      "grad_norm": 1.6383648008205771,
+      "learning_rate": 1.5827768016062295e-06,
+      "loss": 0.6476,
+      "step": 58070
+    },
+    {
+      "epoch": 2.29705946330756,
+      "grad_norm": 1.2337991535244286,
+      "learning_rate": 1.5810974327568064e-06,
+      "loss": 0.6498,
+      "step": 58080
+    },
+    {
+      "epoch": 2.297454962526449,
+      "grad_norm": 1.3983250667609106,
+      "learning_rate": 1.5794187879723755e-06,
+      "loss": 0.6523,
+      "step": 58090
+    },
+    {
+      "epoch": 2.297850461745338,
+      "grad_norm": 1.3251364210358583,
+      "learning_rate": 1.5777408676084416e-06,
+      "loss": 0.6714,
+      "step": 58100
+    },
+    {
+      "epoch": 2.298245960964227,
+      "grad_norm": 1.485177517136633,
+      "learning_rate": 1.5760636720203626e-06,
+      "loss": 0.6551,
+      "step": 58110
+    },
+    {
+      "epoch": 2.298641460183116,
+      "grad_norm": 1.457459592180781,
+      "learning_rate": 1.5743872015633383e-06,
+      "loss": 0.6576,
+      "step": 58120
+    },
+    {
+      "epoch": 2.299036959402005,
+      "grad_norm": 1.519833056103309,
+      "learning_rate": 1.572711456592415e-06,
+      "loss": 0.6706,
+      "step": 58130
+    },
+    {
+      "epoch": 2.299432458620894,
+      "grad_norm": 1.2310258239208047,
+      "learning_rate": 1.5710364374624897e-06,
+      "loss": 0.6795,
+      "step": 58140
+    },
+    {
+      "epoch": 2.299827957839783,
+      "grad_norm": 1.6219898840985205,
+      "learning_rate": 1.5693621445283002e-06,
+      "loss": 0.6621,
+      "step": 58150
+    },
+    {
+      "epoch": 2.300223457058672,
+      "grad_norm": 1.72127109905613,
+      "learning_rate": 1.5676885781444357e-06,
+      "loss": 0.6595,
+      "step": 58160
+    },
+    {
+      "epoch": 2.300618956277561,
+      "grad_norm": 1.2784419660467872,
+      "learning_rate": 1.5660157386653252e-06,
+      "loss": 0.6898,
+      "step": 58170
+    },
+    {
+      "epoch": 2.3010144554964502,
+      "grad_norm": 1.4028041658903252,
+      "learning_rate": 1.5643436264452527e-06,
+      "loss": 0.6905,
+      "step": 58180
+    },
+    {
+      "epoch": 2.3014099547153393,
+      "grad_norm": 1.4509910281962437,
+      "learning_rate": 1.5626722418383372e-06,
+      "loss": 0.6315,
+      "step": 58190
+    },
+    {
+      "epoch": 2.3018054539342283,
+      "grad_norm": 1.4389464337031619,
+      "learning_rate": 1.5610015851985533e-06,
+      "loss": 0.6589,
+      "step": 58200
+    },
+    {
+      "epoch": 2.3022009531531173,
+      "grad_norm": 1.6162702061779972,
+      "learning_rate": 1.5593316568797145e-06,
+      "loss": 0.6435,
+      "step": 58210
+    },
+    {
+      "epoch": 2.3025964523720064,
+      "grad_norm": 1.3017251205365583,
+      "learning_rate": 1.557662457235486e-06,
+      "loss": 0.6703,
+      "step": 58220
+    },
+    {
+      "epoch": 2.3029919515908954,
+      "grad_norm": 1.6342843100300628,
+      "learning_rate": 1.555993986619373e-06,
+      "loss": 0.6601,
+      "step": 58230
+    },
+    {
+      "epoch": 2.3033874508097845,
+      "grad_norm": 1.4379683509508339,
+      "learning_rate": 1.5543262453847318e-06,
+      "loss": 0.6612,
+      "step": 58240
+    },
+    {
+      "epoch": 2.3037829500286735,
+      "grad_norm": 1.4207268132884536,
+      "learning_rate": 1.5526592338847579e-06,
+      "loss": 0.6337,
+      "step": 58250
+    },
+    {
+      "epoch": 2.3041784492475625,
+      "grad_norm": 1.7506847451268952,
+      "learning_rate": 1.5509929524724999e-06,
+      "loss": 0.6606,
+      "step": 58260
+    },
+    {
+      "epoch": 2.3045739484664516,
+      "grad_norm": 1.5339848231245854,
+      "learning_rate": 1.5493274015008435e-06,
+      "loss": 0.6499,
+      "step": 58270
+    },
+    {
+      "epoch": 2.304969447685341,
+      "grad_norm": 1.554093932889167,
+      "learning_rate": 1.5476625813225276e-06,
+      "loss": 0.6519,
+      "step": 58280
+    },
+    {
+      "epoch": 2.3053649469042297,
+      "grad_norm": 1.2985812733157804,
+      "learning_rate": 1.5459984922901312e-06,
+      "loss": 0.6443,
+      "step": 58290
+    },
+    {
+      "epoch": 2.305760446123119,
+      "grad_norm": 1.4940357814146108,
+      "learning_rate": 1.5443351347560777e-06,
+      "loss": 0.6694,
+      "step": 58300
+    },
+    {
+      "epoch": 2.3061559453420077,
+      "grad_norm": 1.5596465786415064,
+      "learning_rate": 1.5426725090726407e-06,
+      "loss": 0.6423,
+      "step": 58310
+    },
+    {
+      "epoch": 2.3065514445608972,
+      "grad_norm": 1.4237617653192283,
+      "learning_rate": 1.5410106155919352e-06,
+      "loss": 0.6754,
+      "step": 58320
+    },
+    {
+      "epoch": 2.306946943779786,
+      "grad_norm": 1.2302332473431614,
+      "learning_rate": 1.53934945466592e-06,
+      "loss": 0.6792,
+      "step": 58330
+    },
+    {
+      "epoch": 2.3073424429986753,
+      "grad_norm": 1.4841925212686782,
+      "learning_rate": 1.537689026646403e-06,
+      "loss": 0.6579,
+      "step": 58340
+    },
+    {
+      "epoch": 2.307737942217564,
+      "grad_norm": 1.5674232813307791,
+      "learning_rate": 1.5360293318850327e-06,
+      "loss": 0.674,
+      "step": 58350
+    },
+    {
+      "epoch": 2.3081334414364534,
+      "grad_norm": 1.3277182227356745,
+      "learning_rate": 1.5343703707333035e-06,
+      "loss": 0.6645,
+      "step": 58360
+    },
+    {
+      "epoch": 2.308528940655342,
+      "grad_norm": 1.7565370101921338,
+      "learning_rate": 1.5327121435425573e-06,
+      "loss": 0.6122,
+      "step": 58370
+    },
+    {
+      "epoch": 2.3089244398742315,
+      "grad_norm": 1.443959377358891,
+      "learning_rate": 1.5310546506639756e-06,
+      "loss": 0.6371,
+      "step": 58380
+    },
+    {
+      "epoch": 2.30931993909312,
+      "grad_norm": 1.2468314994063057,
+      "learning_rate": 1.5293978924485898e-06,
+      "loss": 0.6684,
+      "step": 58390
+    },
+    {
+      "epoch": 2.3097154383120095,
+      "grad_norm": 1.533073051614103,
+      "learning_rate": 1.5277418692472696e-06,
+      "loss": 0.6752,
+      "step": 58400
+    },
+    {
+      "epoch": 2.310110937530898,
+      "grad_norm": 1.6903457432094442,
+      "learning_rate": 1.5260865814107356e-06,
+      "loss": 0.6848,
+      "step": 58410
+    },
+    {
+      "epoch": 2.3105064367497876,
+      "grad_norm": 1.3797874101974819,
+      "learning_rate": 1.5244320292895466e-06,
+      "loss": 0.6438,
+      "step": 58420
+    },
+    {
+      "epoch": 2.310901935968676,
+      "grad_norm": 1.3366210554745637,
+      "learning_rate": 1.5227782132341124e-06,
+      "loss": 0.6638,
+      "step": 58430
+    },
+    {
+      "epoch": 2.3112974351875657,
+      "grad_norm": 1.4089313087924513,
+      "learning_rate": 1.5211251335946774e-06,
+      "loss": 0.6623,
+      "step": 58440
+    },
+    {
+      "epoch": 2.3116929344064547,
+      "grad_norm": 1.564403536692619,
+      "learning_rate": 1.5194727907213396e-06,
+      "loss": 0.6699,
+      "step": 58450
+    },
+    {
+      "epoch": 2.3120884336253438,
+      "grad_norm": 1.3470041831455608,
+      "learning_rate": 1.5178211849640345e-06,
+      "loss": 0.6321,
+      "step": 58460
+    },
+    {
+      "epoch": 2.312483932844233,
+      "grad_norm": 1.5340558770808561,
+      "learning_rate": 1.5161703166725466e-06,
+      "loss": 0.6513,
+      "step": 58470
+    },
+    {
+      "epoch": 2.312879432063122,
+      "grad_norm": 1.5683140626013419,
+      "learning_rate": 1.5145201861964988e-06,
+      "loss": 0.6419,
+      "step": 58480
+    },
+    {
+      "epoch": 2.313274931282011,
+      "grad_norm": 1.8521633623477716,
+      "learning_rate": 1.5128707938853627e-06,
+      "loss": 0.6584,
+      "step": 58490
+    },
+    {
+      "epoch": 2.3136704305009,
+      "grad_norm": 1.5309877105310665,
+      "learning_rate": 1.5112221400884485e-06,
+      "loss": 0.6071,
+      "step": 58500
+    },
+    {
+      "epoch": 2.314065929719789,
+      "grad_norm": 1.345806768643149,
+      "learning_rate": 1.5095742251549167e-06,
+      "loss": 0.6742,
+      "step": 58510
+    },
+    {
+      "epoch": 2.314461428938678,
+      "grad_norm": 1.6424722452691336,
+      "learning_rate": 1.507927049433765e-06,
+      "loss": 0.6737,
+      "step": 58520
+    },
+    {
+      "epoch": 2.314856928157567,
+      "grad_norm": 1.395727716804011,
+      "learning_rate": 1.5062806132738362e-06,
+      "loss": 0.7003,
+      "step": 58530
+    },
+    {
+      "epoch": 2.315252427376456,
+      "grad_norm": 1.777091670701276,
+      "learning_rate": 1.5046349170238195e-06,
+      "loss": 0.6382,
+      "step": 58540
+    },
+    {
+      "epoch": 2.315647926595345,
+      "grad_norm": 1.4701249565898737,
+      "learning_rate": 1.5029899610322446e-06,
+      "loss": 0.6884,
+      "step": 58550
+    },
+    {
+      "epoch": 2.316043425814234,
+      "grad_norm": 1.4692638398286286,
+      "learning_rate": 1.5013457456474827e-06,
+      "loss": 0.6692,
+      "step": 58560
+    },
+    {
+      "epoch": 2.316438925033123,
+      "grad_norm": 1.4041738955726866,
+      "learning_rate": 1.4997022712177538e-06,
+      "loss": 0.6642,
+      "step": 58570
+    },
+    {
+      "epoch": 2.3168344242520122,
+      "grad_norm": 1.3469118053050946,
+      "learning_rate": 1.4980595380911167e-06,
+      "loss": 0.6968,
+      "step": 58580
+    },
+    {
+      "epoch": 2.3172299234709013,
+      "grad_norm": 1.426927940296544,
+      "learning_rate": 1.4964175466154712e-06,
+      "loss": 0.6568,
+      "step": 58590
+    },
+    {
+      "epoch": 2.3176254226897903,
+      "grad_norm": 1.351325834893919,
+      "learning_rate": 1.4947762971385671e-06,
+      "loss": 0.6443,
+      "step": 58600
+    },
+    {
+      "epoch": 2.3180209219086794,
+      "grad_norm": 1.571600262129865,
+      "learning_rate": 1.4931357900079896e-06,
+      "loss": 0.6538,
+      "step": 58610
+    },
+    {
+      "epoch": 2.3184164211275684,
+      "grad_norm": 1.4764750593585865,
+      "learning_rate": 1.491496025571173e-06,
+      "loss": 0.6424,
+      "step": 58620
+    },
+    {
+      "epoch": 2.3188119203464574,
+      "grad_norm": 1.578660614384939,
+      "learning_rate": 1.4898570041753886e-06,
+      "loss": 0.6435,
+      "step": 58630
+    },
+    {
+      "epoch": 2.3192074195653465,
+      "grad_norm": 1.2492216998705292,
+      "learning_rate": 1.4882187261677555e-06,
+      "loss": 0.6515,
+      "step": 58640
+    },
+    {
+      "epoch": 2.3196029187842355,
+      "grad_norm": 1.699200157935068,
+      "learning_rate": 1.48658119189523e-06,
+      "loss": 0.6402,
+      "step": 58650
+    },
+    {
+      "epoch": 2.3199984180031246,
+      "grad_norm": 1.3847294515883004,
+      "learning_rate": 1.4849444017046173e-06,
+      "loss": 0.6513,
+      "step": 58660
+    },
+    {
+      "epoch": 2.3203939172220136,
+      "grad_norm": 1.6170081171977473,
+      "learning_rate": 1.4833083559425598e-06,
+      "loss": 0.6748,
+      "step": 58670
+    },
+    {
+      "epoch": 2.3207894164409026,
+      "grad_norm": 1.4320738633561334,
+      "learning_rate": 1.4816730549555436e-06,
+      "loss": 0.653,
+      "step": 58680
+    },
+    {
+      "epoch": 2.3211849156597917,
+      "grad_norm": 1.3464218011610256,
+      "learning_rate": 1.4800384990898965e-06,
+      "loss": 0.6594,
+      "step": 58690
+    },
+    {
+      "epoch": 2.3215804148786807,
+      "grad_norm": 1.2664219459404893,
+      "learning_rate": 1.4784046886917919e-06,
+      "loss": 0.6888,
+      "step": 58700
+    },
+    {
+      "epoch": 2.3219759140975698,
+      "grad_norm": 1.64262450382083,
+      "learning_rate": 1.47677162410724e-06,
+      "loss": 0.6772,
+      "step": 58710
+    },
+    {
+      "epoch": 2.322371413316459,
+      "grad_norm": 1.4687884249862226,
+      "learning_rate": 1.4751393056820996e-06,
+      "loss": 0.6725,
+      "step": 58720
+    },
+    {
+      "epoch": 2.322766912535348,
+      "grad_norm": 1.526372832984667,
+      "learning_rate": 1.4735077337620634e-06,
+      "loss": 0.6737,
+      "step": 58730
+    },
+    {
+      "epoch": 2.323162411754237,
+      "grad_norm": 1.2434984411986378,
+      "learning_rate": 1.4718769086926742e-06,
+      "loss": 0.6559,
+      "step": 58740
+    },
+    {
+      "epoch": 2.323557910973126,
+      "grad_norm": 1.5748942810141886,
+      "learning_rate": 1.4702468308193102e-06,
+      "loss": 0.6794,
+      "step": 58750
+    },
+    {
+      "epoch": 2.323953410192015,
+      "grad_norm": 1.6068805492861729,
+      "learning_rate": 1.4686175004871966e-06,
+      "loss": 0.6632,
+      "step": 58760
+    },
+    {
+      "epoch": 2.324348909410904,
+      "grad_norm": 1.3607180031735213,
+      "learning_rate": 1.466988918041396e-06,
+      "loss": 0.6356,
+      "step": 58770
+    },
+    {
+      "epoch": 2.324744408629793,
+      "grad_norm": 1.5275811133382595,
+      "learning_rate": 1.465361083826813e-06,
+      "loss": 0.6906,
+      "step": 58780
+    },
+    {
+      "epoch": 2.325139907848682,
+      "grad_norm": 1.3645446397267935,
+      "learning_rate": 1.463733998188197e-06,
+      "loss": 0.6585,
+      "step": 58790
+    },
+    {
+      "epoch": 2.325535407067571,
+      "grad_norm": 1.3757428764176545,
+      "learning_rate": 1.4621076614701368e-06,
+      "loss": 0.6788,
+      "step": 58800
+    },
+    {
+      "epoch": 2.32593090628646,
+      "grad_norm": 1.4943658670214794,
+      "learning_rate": 1.4604820740170622e-06,
+      "loss": 0.6397,
+      "step": 58810
+    },
+    {
+      "epoch": 2.326326405505349,
+      "grad_norm": 1.3099215406675848,
+      "learning_rate": 1.4588572361732428e-06,
+      "loss": 0.6353,
+      "step": 58820
+    },
+    {
+      "epoch": 2.3267219047242382,
+      "grad_norm": 1.4080421032469292,
+      "learning_rate": 1.457233148282795e-06,
+      "loss": 0.6246,
+      "step": 58830
+    },
+    {
+      "epoch": 2.3271174039431273,
+      "grad_norm": 1.5910063067952556,
+      "learning_rate": 1.4556098106896698e-06,
+      "loss": 0.6693,
+      "step": 58840
+    },
+    {
+      "epoch": 2.3275129031620163,
+      "grad_norm": 1.273247061745168,
+      "learning_rate": 1.4539872237376646e-06,
+      "loss": 0.6395,
+      "step": 58850
+    },
+    {
+      "epoch": 2.3279084023809054,
+      "grad_norm": 1.4800588249259536,
+      "learning_rate": 1.452365387770413e-06,
+      "loss": 0.6492,
+      "step": 58860
+    },
+    {
+      "epoch": 2.3283039015997944,
+      "grad_norm": 1.5803342815483064,
+      "learning_rate": 1.450744303131395e-06,
+      "loss": 0.6418,
+      "step": 58870
+    },
+    {
+      "epoch": 2.3286994008186834,
+      "grad_norm": 1.5702078922429623,
+      "learning_rate": 1.449123970163926e-06,
+      "loss": 0.6649,
+      "step": 58880
+    },
+    {
+      "epoch": 2.3290949000375725,
+      "grad_norm": 1.1016771399149003,
+      "learning_rate": 1.4475043892111668e-06,
+      "loss": 0.6483,
+      "step": 58890
+    },
+    {
+      "epoch": 2.3294903992564615,
+      "grad_norm": 1.4473672733994998,
+      "learning_rate": 1.4458855606161143e-06,
+      "loss": 0.6572,
+      "step": 58900
+    },
+    {
+      "epoch": 2.3298858984753505,
+      "grad_norm": 1.543195619997971,
+      "learning_rate": 1.4442674847216127e-06,
+      "loss": 0.6655,
+      "step": 58910
+    },
+    {
+      "epoch": 2.3302813976942396,
+      "grad_norm": 1.4567336401416626,
+      "learning_rate": 1.4426501618703392e-06,
+      "loss": 0.6488,
+      "step": 58920
+    },
+    {
+      "epoch": 2.3306768969131286,
+      "grad_norm": 1.6509552237970642,
+      "learning_rate": 1.4410335924048169e-06,
+      "loss": 0.6308,
+      "step": 58930
+    },
+    {
+      "epoch": 2.3310723961320177,
+      "grad_norm": 1.653008636869871,
+      "learning_rate": 1.4394177766674055e-06,
+      "loss": 0.6773,
+      "step": 58940
+    },
+    {
+      "epoch": 2.3314678953509067,
+      "grad_norm": 1.2070845138616273,
+      "learning_rate": 1.4378027150003094e-06,
+      "loss": 0.656,
+      "step": 58950
+    },
+    {
+      "epoch": 2.3318633945697957,
+      "grad_norm": 1.643602810364153,
+      "learning_rate": 1.436188407745569e-06,
+      "loss": 0.6305,
+      "step": 58960
+    },
+    {
+      "epoch": 2.332258893788685,
+      "grad_norm": 1.2533714564833973,
+      "learning_rate": 1.4345748552450694e-06,
+      "loss": 0.6171,
+      "step": 58970
+    },
+    {
+      "epoch": 2.332654393007574,
+      "grad_norm": 1.4598709890067572,
+      "learning_rate": 1.43296205784053e-06,
+      "loss": 0.6843,
+      "step": 58980
+    },
+    {
+      "epoch": 2.333049892226463,
+      "grad_norm": 1.795831372239305,
+      "learning_rate": 1.4313500158735171e-06,
+      "loss": 0.6219,
+      "step": 58990
+    },
+    {
+      "epoch": 2.333445391445352,
+      "grad_norm": 1.3257915496488566,
+      "learning_rate": 1.4297387296854327e-06,
+      "loss": 0.652,
+      "step": 59000
+    },
+    {
+      "epoch": 2.333840890664241,
+      "grad_norm": 1.349164934271811,
+      "learning_rate": 1.4281281996175167e-06,
+      "loss": 0.6411,
+      "step": 59010
+    },
+    {
+      "epoch": 2.33423638988313,
+      "grad_norm": 1.3571014733250697,
+      "learning_rate": 1.4265184260108562e-06,
+      "loss": 0.6529,
+      "step": 59020
+    },
+    {
+      "epoch": 2.334631889102019,
+      "grad_norm": 1.9726802540595698,
+      "learning_rate": 1.4249094092063697e-06,
+      "loss": 0.6692,
+      "step": 59030
+    },
+    {
+      "epoch": 2.335027388320908,
+      "grad_norm": 1.517215322016344,
+      "learning_rate": 1.4233011495448228e-06,
+      "loss": 0.6193,
+      "step": 59040
+    },
+    {
+      "epoch": 2.335422887539797,
+      "grad_norm": 1.476802965089533,
+      "learning_rate": 1.4216936473668159e-06,
+      "loss": 0.6836,
+      "step": 59050
+    },
+    {
+      "epoch": 2.335818386758686,
+      "grad_norm": 1.535635423176671,
+      "learning_rate": 1.4200869030127896e-06,
+      "loss": 0.6241,
+      "step": 59060
+    },
+    {
+      "epoch": 2.336213885977575,
+      "grad_norm": 1.4791019565349466,
+      "learning_rate": 1.4184809168230245e-06,
+      "loss": 0.6802,
+      "step": 59070
+    },
+    {
+      "epoch": 2.336609385196464,
+      "grad_norm": 1.422139344051872,
+      "learning_rate": 1.4168756891376434e-06,
+      "loss": 0.6485,
+      "step": 59080
+    },
+    {
+      "epoch": 2.3370048844153533,
+      "grad_norm": 1.250139060542618,
+      "learning_rate": 1.415271220296603e-06,
+      "loss": 0.6561,
+      "step": 59090
+    },
+    {
+      "epoch": 2.3374003836342423,
+      "grad_norm": 1.730120130093888,
+      "learning_rate": 1.4136675106397051e-06,
+      "loss": 0.631,
+      "step": 59100
+    },
+    {
+      "epoch": 2.3377958828531313,
+      "grad_norm": 1.171299157037247,
+      "learning_rate": 1.4120645605065858e-06,
+      "loss": 0.68,
+      "step": 59110
+    },
+    {
+      "epoch": 2.3381913820720204,
+      "grad_norm": 1.256204715171071,
+      "learning_rate": 1.410462370236725e-06,
+      "loss": 0.6685,
+      "step": 59120
+    },
+    {
+      "epoch": 2.3385868812909094,
+      "grad_norm": 1.4909767608205295,
+      "learning_rate": 1.4088609401694353e-06,
+      "loss": 0.6415,
+      "step": 59130
+    },
+    {
+      "epoch": 2.3389823805097985,
+      "grad_norm": 1.464839828771158,
+      "learning_rate": 1.4072602706438765e-06,
+      "loss": 0.6355,
+      "step": 59140
+    },
+    {
+      "epoch": 2.3393778797286875,
+      "grad_norm": 1.407550713872548,
+      "learning_rate": 1.405660361999039e-06,
+      "loss": 0.6491,
+      "step": 59150
+    },
+    {
+      "epoch": 2.3397733789475765,
+      "grad_norm": 1.483199208642359,
+      "learning_rate": 1.4040612145737608e-06,
+      "loss": 0.6586,
+      "step": 59160
+    },
+    {
+      "epoch": 2.3401688781664656,
+      "grad_norm": 1.4822118430210998,
+      "learning_rate": 1.4024628287067088e-06,
+      "loss": 0.6817,
+      "step": 59170
+    },
+    {
+      "epoch": 2.3405643773853546,
+      "grad_norm": 1.2607731426652578,
+      "learning_rate": 1.4008652047363969e-06,
+      "loss": 0.6718,
+      "step": 59180
+    },
+    {
+      "epoch": 2.3409598766042437,
+      "grad_norm": 1.4850271041830017,
+      "learning_rate": 1.3992683430011722e-06,
+      "loss": 0.6518,
+      "step": 59190
+    },
+    {
+      "epoch": 2.3413553758231327,
+      "grad_norm": 1.4277620829199973,
+      "learning_rate": 1.3976722438392254e-06,
+      "loss": 0.65,
+      "step": 59200
+    },
+    {
+      "epoch": 2.3417508750420217,
+      "grad_norm": 1.4754328545544362,
+      "learning_rate": 1.39607690758858e-06,
+      "loss": 0.6861,
+      "step": 59210
+    },
+    {
+      "epoch": 2.3421463742609108,
+      "grad_norm": 1.2848009604767283,
+      "learning_rate": 1.3944823345871044e-06,
+      "loss": 0.6786,
+      "step": 59220
+    },
+    {
+      "epoch": 2.3425418734798,
+      "grad_norm": 1.3427506303437031,
+      "learning_rate": 1.3928885251725e-06,
+      "loss": 0.6461,
+      "step": 59230
+    },
+    {
+      "epoch": 2.342937372698689,
+      "grad_norm": 1.8386443146206752,
+      "learning_rate": 1.3912954796823064e-06,
+      "loss": 0.6896,
+      "step": 59240
+    },
+    {
+      "epoch": 2.343332871917578,
+      "grad_norm": 1.6415513179502648,
+      "learning_rate": 1.3897031984539067e-06,
+      "loss": 0.6465,
+      "step": 59250
+    },
+    {
+      "epoch": 2.343728371136467,
+      "grad_norm": 1.5105472299716078,
+      "learning_rate": 1.3881116818245154e-06,
+      "loss": 0.681,
+      "step": 59260
+    },
+    {
+      "epoch": 2.344123870355356,
+      "grad_norm": 1.411179528838079,
+      "learning_rate": 1.3865209301311928e-06,
+      "loss": 0.6655,
+      "step": 59270
+    },
+    {
+      "epoch": 2.344519369574245,
+      "grad_norm": 1.565916006096879,
+      "learning_rate": 1.3849309437108283e-06,
+      "loss": 0.6457,
+      "step": 59280
+    },
+    {
+      "epoch": 2.344914868793134,
+      "grad_norm": 1.6985129995597834,
+      "learning_rate": 1.383341722900159e-06,
+      "loss": 0.6068,
+      "step": 59290
+    },
+    {
+      "epoch": 2.345310368012023,
+      "grad_norm": 1.7523342050438377,
+      "learning_rate": 1.3817532680357481e-06,
+      "loss": 0.6535,
+      "step": 59300
+    },
+    {
+      "epoch": 2.345705867230912,
+      "grad_norm": 1.481222936280899,
+      "learning_rate": 1.3801655794540087e-06,
+      "loss": 0.6382,
+      "step": 59310
+    },
+    {
+      "epoch": 2.346101366449801,
+      "grad_norm": 1.2719378815407312,
+      "learning_rate": 1.378578657491182e-06,
+      "loss": 0.6826,
+      "step": 59320
+    },
+    {
+      "epoch": 2.34649686566869,
+      "grad_norm": 1.5770208930064684,
+      "learning_rate": 1.376992502483354e-06,
+      "loss": 0.6397,
+      "step": 59330
+    },
+    {
+      "epoch": 2.3468923648875792,
+      "grad_norm": 1.4328905492841875,
+      "learning_rate": 1.3754071147664432e-06,
+      "loss": 0.6459,
+      "step": 59340
+    },
+    {
+      "epoch": 2.3472878641064683,
+      "grad_norm": 1.6700468378830629,
+      "learning_rate": 1.373822494676209e-06,
+      "loss": 0.661,
+      "step": 59350
+    },
+    {
+      "epoch": 2.3476833633253573,
+      "grad_norm": 1.2650984169596182,
+      "learning_rate": 1.3722386425482454e-06,
+      "loss": 0.6646,
+      "step": 59360
+    },
+    {
+      "epoch": 2.3480788625442464,
+      "grad_norm": 1.342052914416015,
+      "learning_rate": 1.3706555587179864e-06,
+      "loss": 0.6574,
+      "step": 59370
+    },
+    {
+      "epoch": 2.3484743617631354,
+      "grad_norm": 1.4259567661685697,
+      "learning_rate": 1.3690732435207006e-06,
+      "loss": 0.6549,
+      "step": 59380
+    },
+    {
+      "epoch": 2.3488698609820244,
+      "grad_norm": 1.6446463839117533,
+      "learning_rate": 1.3674916972914976e-06,
+      "loss": 0.6469,
+      "step": 59390
+    },
+    {
+      "epoch": 2.3492653602009135,
+      "grad_norm": 1.558085067492573,
+      "learning_rate": 1.365910920365318e-06,
+      "loss": 0.6661,
+      "step": 59400
+    },
+    {
+      "epoch": 2.3496608594198025,
+      "grad_norm": 1.2039210028900755,
+      "learning_rate": 1.3643309130769494e-06,
+      "loss": 0.6767,
+      "step": 59410
+    },
+    {
+      "epoch": 2.3500563586386916,
+      "grad_norm": 1.5176618877166277,
+      "learning_rate": 1.3627516757610032e-06,
+      "loss": 0.6629,
+      "step": 59420
+    },
+    {
+      "epoch": 2.3504518578575806,
+      "grad_norm": 1.4430169207270038,
+      "learning_rate": 1.3611732087519397e-06,
+      "loss": 0.6797,
+      "step": 59430
+    },
+    {
+      "epoch": 2.3508473570764696,
+      "grad_norm": 1.9358104877105615,
+      "learning_rate": 1.3595955123840476e-06,
+      "loss": 0.6688,
+      "step": 59440
+    },
+    {
+      "epoch": 2.3512428562953587,
+      "grad_norm": 1.2966112649450476,
+      "learning_rate": 1.3580185869914597e-06,
+      "loss": 0.7071,
+      "step": 59450
+    },
+    {
+      "epoch": 2.3516383555142477,
+      "grad_norm": 1.442660163286812,
+      "learning_rate": 1.3564424329081398e-06,
+      "loss": 0.6737,
+      "step": 59460
+    },
+    {
+      "epoch": 2.3520338547331368,
+      "grad_norm": 1.614874860885327,
+      "learning_rate": 1.354867050467889e-06,
+      "loss": 0.647,
+      "step": 59470
+    },
+    {
+      "epoch": 2.352429353952026,
+      "grad_norm": 1.39062380619087,
+      "learning_rate": 1.3532924400043496e-06,
+      "loss": 0.6854,
+      "step": 59480
+    },
+    {
+      "epoch": 2.352824853170915,
+      "grad_norm": 1.6372282377873661,
+      "learning_rate": 1.3517186018509936e-06,
+      "loss": 0.6313,
+      "step": 59490
+    },
+    {
+      "epoch": 2.353220352389804,
+      "grad_norm": 1.4939086310379999,
+      "learning_rate": 1.3501455363411364e-06,
+      "loss": 0.6641,
+      "step": 59500
+    },
+    {
+      "epoch": 2.353615851608693,
+      "grad_norm": 1.7605222814424728,
+      "learning_rate": 1.348573243807923e-06,
+      "loss": 0.613,
+      "step": 59510
+    },
+    {
+      "epoch": 2.354011350827582,
+      "grad_norm": 1.834555047285748,
+      "learning_rate": 1.3470017245843408e-06,
+      "loss": 0.6586,
+      "step": 59520
+    },
+    {
+      "epoch": 2.354406850046471,
+      "grad_norm": 1.4537880489905297,
+      "learning_rate": 1.3454309790032093e-06,
+      "loss": 0.6798,
+      "step": 59530
+    },
+    {
+      "epoch": 2.35480234926536,
+      "grad_norm": 1.8650290561875345,
+      "learning_rate": 1.3438610073971863e-06,
+      "loss": 0.6182,
+      "step": 59540
+    },
+    {
+      "epoch": 2.355197848484249,
+      "grad_norm": 1.3792549427498257,
+      "learning_rate": 1.3422918100987625e-06,
+      "loss": 0.6406,
+      "step": 59550
+    },
+    {
+      "epoch": 2.355593347703138,
+      "grad_norm": 1.3926591348105821,
+      "learning_rate": 1.3407233874402703e-06,
+      "loss": 0.6793,
+      "step": 59560
+    },
+    {
+      "epoch": 2.355988846922027,
+      "grad_norm": 1.3892070418840154,
+      "learning_rate": 1.339155739753872e-06,
+      "loss": 0.638,
+      "step": 59570
+    },
+    {
+      "epoch": 2.356384346140916,
+      "grad_norm": 1.473607972554162,
+      "learning_rate": 1.337588867371571e-06,
+      "loss": 0.692,
+      "step": 59580
+    },
+    {
+      "epoch": 2.3567798453598052,
+      "grad_norm": 1.5750437920578224,
+      "learning_rate": 1.336022770625201e-06,
+      "loss": 0.6317,
+      "step": 59590
+    },
+    {
+      "epoch": 2.3571753445786943,
+      "grad_norm": 1.3371570634090677,
+      "learning_rate": 1.334457449846438e-06,
+      "loss": 0.6472,
+      "step": 59600
+    },
+    {
+      "epoch": 2.3575708437975837,
+      "grad_norm": 1.3035560578142398,
+      "learning_rate": 1.3328929053667866e-06,
+      "loss": 0.6458,
+      "step": 59610
+    },
+    {
+      "epoch": 2.3579663430164723,
+      "grad_norm": 1.3944212438036032,
+      "learning_rate": 1.331329137517594e-06,
+      "loss": 0.6382,
+      "step": 59620
+    },
+    {
+      "epoch": 2.358361842235362,
+      "grad_norm": 1.5005476053868148,
+      "learning_rate": 1.3297661466300366e-06,
+      "loss": 0.6576,
+      "step": 59630
+    },
+    {
+      "epoch": 2.3587573414542504,
+      "grad_norm": 1.5264474475420733,
+      "learning_rate": 1.328203933035131e-06,
+      "loss": 0.6349,
+      "step": 59640
+    },
+    {
+      "epoch": 2.35915284067314,
+      "grad_norm": 1.5970556218124319,
+      "learning_rate": 1.326642497063727e-06,
+      "loss": 0.6584,
+      "step": 59650
+    },
+    {
+      "epoch": 2.3595483398920285,
+      "grad_norm": 1.3766326130069657,
+      "learning_rate": 1.3250818390465092e-06,
+      "loss": 0.6685,
+      "step": 59660
+    },
+    {
+      "epoch": 2.359943839110918,
+      "grad_norm": 1.4963677502903887,
+      "learning_rate": 1.323521959313997e-06,
+      "loss": 0.6605,
+      "step": 59670
+    },
+    {
+      "epoch": 2.3603393383298066,
+      "grad_norm": 1.3302813930397257,
+      "learning_rate": 1.3219628581965493e-06,
+      "loss": 0.6655,
+      "step": 59680
+    },
+    {
+      "epoch": 2.360734837548696,
+      "grad_norm": 1.8136802835330335,
+      "learning_rate": 1.320404536024354e-06,
+      "loss": 0.6365,
+      "step": 59690
+    },
+    {
+      "epoch": 2.3611303367675847,
+      "grad_norm": 1.3834212107444948,
+      "learning_rate": 1.3188469931274394e-06,
+      "loss": 0.6787,
+      "step": 59700
+    },
+    {
+      "epoch": 2.361525835986474,
+      "grad_norm": 1.6115000671482202,
+      "learning_rate": 1.317290229835666e-06,
+      "loss": 0.6794,
+      "step": 59710
+    },
+    {
+      "epoch": 2.3619213352053627,
+      "grad_norm": 1.70829693992852,
+      "learning_rate": 1.315734246478727e-06,
+      "loss": 0.6583,
+      "step": 59720
+    },
+    {
+      "epoch": 2.362316834424252,
+      "grad_norm": 1.7795299826314106,
+      "learning_rate": 1.3141790433861574e-06,
+      "loss": 0.6541,
+      "step": 59730
+    },
+    {
+      "epoch": 2.362712333643141,
+      "grad_norm": 1.576112936163215,
+      "learning_rate": 1.3126246208873177e-06,
+      "loss": 0.6401,
+      "step": 59740
+    },
+    {
+      "epoch": 2.3631078328620303,
+      "grad_norm": 1.2382669149117873,
+      "learning_rate": 1.3110709793114128e-06,
+      "loss": 0.6478,
+      "step": 59750
+    },
+    {
+      "epoch": 2.363503332080919,
+      "grad_norm": 1.4180050814353615,
+      "learning_rate": 1.3095181189874733e-06,
+      "loss": 0.6622,
+      "step": 59760
+    },
+    {
+      "epoch": 2.3638988312998084,
+      "grad_norm": 1.353602984572126,
+      "learning_rate": 1.3079660402443716e-06,
+      "loss": 0.6654,
+      "step": 59770
+    },
+    {
+      "epoch": 2.3642943305186974,
+      "grad_norm": 1.4635536115259493,
+      "learning_rate": 1.3064147434108098e-06,
+      "loss": 0.6476,
+      "step": 59780
+    },
+    {
+      "epoch": 2.3646898297375865,
+      "grad_norm": 1.5019205941410168,
+      "learning_rate": 1.3048642288153268e-06,
+      "loss": 0.6444,
+      "step": 59790
+    },
+    {
+      "epoch": 2.3650853289564755,
+      "grad_norm": 1.1635429701984492,
+      "learning_rate": 1.3033144967862922e-06,
+      "loss": 0.6659,
+      "step": 59800
+    },
+    {
+      "epoch": 2.3654808281753645,
+      "grad_norm": 1.316139659650491,
+      "learning_rate": 1.3017655476519164e-06,
+      "loss": 0.6793,
+      "step": 59810
+    },
+    {
+      "epoch": 2.3658763273942536,
+      "grad_norm": 1.3870046542069292,
+      "learning_rate": 1.3002173817402375e-06,
+      "loss": 0.6723,
+      "step": 59820
+    },
+    {
+      "epoch": 2.3662718266131426,
+      "grad_norm": 1.3315779130439447,
+      "learning_rate": 1.2986699993791335e-06,
+      "loss": 0.662,
+      "step": 59830
+    },
+    {
+      "epoch": 2.3666673258320317,
+      "grad_norm": 1.3196653182571463,
+      "learning_rate": 1.2971234008963095e-06,
+      "loss": 0.6742,
+      "step": 59840
+    },
+    {
+      "epoch": 2.3670628250509207,
+      "grad_norm": 1.644057338081116,
+      "learning_rate": 1.2955775866193132e-06,
+      "loss": 0.645,
+      "step": 59850
+    },
+    {
+      "epoch": 2.3674583242698097,
+      "grad_norm": 1.4693549054439152,
+      "learning_rate": 1.294032556875517e-06,
+      "loss": 0.6549,
+      "step": 59860
+    },
+    {
+      "epoch": 2.3678538234886988,
+      "grad_norm": 1.3506758541748658,
+      "learning_rate": 1.2924883119921356e-06,
+      "loss": 0.6493,
+      "step": 59870
+    },
+    {
+      "epoch": 2.368249322707588,
+      "grad_norm": 1.779826698792813,
+      "learning_rate": 1.290944852296212e-06,
+      "loss": 0.6557,
+      "step": 59880
+    },
+    {
+      "epoch": 2.368644821926477,
+      "grad_norm": 1.4617164377679117,
+      "learning_rate": 1.2894021781146232e-06,
+      "loss": 0.7036,
+      "step": 59890
+    },
+    {
+      "epoch": 2.369040321145366,
+      "grad_norm": 1.5982077919923772,
+      "learning_rate": 1.2878602897740833e-06,
+      "loss": 0.6526,
+      "step": 59900
+    },
+    {
+      "epoch": 2.369435820364255,
+      "grad_norm": 1.5418488278722802,
+      "learning_rate": 1.2863191876011377e-06,
+      "loss": 0.6785,
+      "step": 59910
+    },
+    {
+      "epoch": 2.369831319583144,
+      "grad_norm": 1.4351145576462119,
+      "learning_rate": 1.2847788719221627e-06,
+      "loss": 0.6286,
+      "step": 59920
+    },
+    {
+      "epoch": 2.370226818802033,
+      "grad_norm": 1.3561329025592253,
+      "learning_rate": 1.2832393430633743e-06,
+      "loss": 0.6332,
+      "step": 59930
+    },
+    {
+      "epoch": 2.370622318020922,
+      "grad_norm": 1.610307683543711,
+      "learning_rate": 1.2817006013508166e-06,
+      "loss": 0.6688,
+      "step": 59940
+    },
+    {
+      "epoch": 2.371017817239811,
+      "grad_norm": 1.7967913675678822,
+      "learning_rate": 1.280162647110368e-06,
+      "loss": 0.6873,
+      "step": 59950
+    },
+    {
+      "epoch": 2.3714133164587,
+      "grad_norm": 1.3650903564260914,
+      "learning_rate": 1.2786254806677428e-06,
+      "loss": 0.7004,
+      "step": 59960
+    },
+    {
+      "epoch": 2.371808815677589,
+      "grad_norm": 1.7176979559601464,
+      "learning_rate": 1.2770891023484838e-06,
+      "loss": 0.6545,
+      "step": 59970
+    },
+    {
+      "epoch": 2.372204314896478,
+      "grad_norm": 1.3125911757228816,
+      "learning_rate": 1.2755535124779733e-06,
+      "loss": 0.6429,
+      "step": 59980
+    },
+    {
+      "epoch": 2.3725998141153672,
+      "grad_norm": 1.488890907232398,
+      "learning_rate": 1.2740187113814196e-06,
+      "loss": 0.6575,
+      "step": 59990
+    },
+    {
+      "epoch": 2.3729953133342563,
+      "grad_norm": 1.4188619088579277,
+      "learning_rate": 1.2724846993838696e-06,
+      "loss": 0.6166,
+      "step": 60000
+    },
+    {
+      "epoch": 2.3733908125531453,
+      "grad_norm": 1.5869439201226596,
+      "learning_rate": 1.2709514768101982e-06,
+      "loss": 0.6272,
+      "step": 60010
+    },
+    {
+      "epoch": 2.3737863117720344,
+      "grad_norm": 1.5208063386082633,
+      "learning_rate": 1.2694190439851205e-06,
+      "loss": 0.6505,
+      "step": 60020
+    },
+    {
+      "epoch": 2.3741818109909234,
+      "grad_norm": 1.4023165930533983,
+      "learning_rate": 1.2678874012331732e-06,
+      "loss": 0.6659,
+      "step": 60030
+    },
+    {
+      "epoch": 2.3745773102098124,
+      "grad_norm": 1.55656971084094,
+      "learning_rate": 1.2663565488787365e-06,
+      "loss": 0.625,
+      "step": 60040
+    },
+    {
+      "epoch": 2.3749728094287015,
+      "grad_norm": 1.280198765448565,
+      "learning_rate": 1.2648264872460165e-06,
+      "loss": 0.6823,
+      "step": 60050
+    },
+    {
+      "epoch": 2.3753683086475905,
+      "grad_norm": 1.282086373868942,
+      "learning_rate": 1.2632972166590557e-06,
+      "loss": 0.6827,
+      "step": 60060
+    },
+    {
+      "epoch": 2.3757638078664796,
+      "grad_norm": 1.6183942811456713,
+      "learning_rate": 1.2617687374417248e-06,
+      "loss": 0.6401,
+      "step": 60070
+    },
+    {
+      "epoch": 2.3761593070853686,
+      "grad_norm": 1.290320486427383,
+      "learning_rate": 1.260241049917733e-06,
+      "loss": 0.6903,
+      "step": 60080
+    },
+    {
+      "epoch": 2.3765548063042576,
+      "grad_norm": 1.393264511520163,
+      "learning_rate": 1.2587141544106147e-06,
+      "loss": 0.645,
+      "step": 60090
+    },
+    {
+      "epoch": 2.3769503055231467,
+      "grad_norm": 1.3693027720380315,
+      "learning_rate": 1.257188051243744e-06,
+      "loss": 0.6482,
+      "step": 60100
+    },
+    {
+      "epoch": 2.3773458047420357,
+      "grad_norm": 1.3737721789055617,
+      "learning_rate": 1.25566274074032e-06,
+      "loss": 0.6419,
+      "step": 60110
+    },
+    {
+      "epoch": 2.3777413039609248,
+      "grad_norm": 1.5808172562717315,
+      "learning_rate": 1.25413822322338e-06,
+      "loss": 0.6758,
+      "step": 60120
+    },
+    {
+      "epoch": 2.378136803179814,
+      "grad_norm": 1.4300721312341145,
+      "learning_rate": 1.2526144990157896e-06,
+      "loss": 0.6479,
+      "step": 60130
+    },
+    {
+      "epoch": 2.378532302398703,
+      "grad_norm": 1.5589774132785974,
+      "learning_rate": 1.2510915684402476e-06,
+      "loss": 0.6321,
+      "step": 60140
+    },
+    {
+      "epoch": 2.378927801617592,
+      "grad_norm": 1.5109835817451736,
+      "learning_rate": 1.2495694318192825e-06,
+      "loss": 0.6749,
+      "step": 60150
+    },
+    {
+      "epoch": 2.379323300836481,
+      "grad_norm": 1.3193461829025568,
+      "learning_rate": 1.2480480894752607e-06,
+      "loss": 0.6608,
+      "step": 60160
+    },
+    {
+      "epoch": 2.37971880005537,
+      "grad_norm": 1.7361197051816402,
+      "learning_rate": 1.246527541730374e-06,
+      "loss": 0.6349,
+      "step": 60170
+    },
+    {
+      "epoch": 2.380114299274259,
+      "grad_norm": 1.3659520000201715,
+      "learning_rate": 1.2450077889066476e-06,
+      "loss": 0.6568,
+      "step": 60180
+    },
+    {
+      "epoch": 2.380509798493148,
+      "grad_norm": 1.3582579488056623,
+      "learning_rate": 1.2434888313259413e-06,
+      "loss": 0.6418,
+      "step": 60190
+    },
+    {
+      "epoch": 2.380905297712037,
+      "grad_norm": 1.3786555784983439,
+      "learning_rate": 1.241970669309942e-06,
+      "loss": 0.6776,
+      "step": 60200
+    },
+    {
+      "epoch": 2.381300796930926,
+      "grad_norm": 1.5338849168254205,
+      "learning_rate": 1.240453303180173e-06,
+      "loss": 0.6728,
+      "step": 60210
+    },
+    {
+      "epoch": 2.381696296149815,
+      "grad_norm": 1.506702217549218,
+      "learning_rate": 1.2389367332579839e-06,
+      "loss": 0.6693,
+      "step": 60220
+    },
+    {
+      "epoch": 2.382091795368704,
+      "grad_norm": 1.700159884829986,
+      "learning_rate": 1.237420959864561e-06,
+      "loss": 0.6345,
+      "step": 60230
+    },
+    {
+      "epoch": 2.3824872945875932,
+      "grad_norm": 1.3334601416749623,
+      "learning_rate": 1.235905983320916e-06,
+      "loss": 0.641,
+      "step": 60240
+    },
+    {
+      "epoch": 2.3828827938064823,
+      "grad_norm": 1.361524541931307,
+      "learning_rate": 1.234391803947898e-06,
+      "loss": 0.6521,
+      "step": 60250
+    },
+    {
+      "epoch": 2.3832782930253713,
+      "grad_norm": 1.302166779138101,
+      "learning_rate": 1.232878422066181e-06,
+      "loss": 0.6603,
+      "step": 60260
+    },
+    {
+      "epoch": 2.3836737922442603,
+      "grad_norm": 1.3929624457021772,
+      "learning_rate": 1.2313658379962785e-06,
+      "loss": 0.6468,
+      "step": 60270
+    },
+    {
+      "epoch": 2.3840692914631494,
+      "grad_norm": 1.5646784580041087,
+      "learning_rate": 1.2298540520585239e-06,
+      "loss": 0.66,
+      "step": 60280
+    },
+    {
+      "epoch": 2.3844647906820384,
+      "grad_norm": 1.453340753328277,
+      "learning_rate": 1.228343064573091e-06,
+      "loss": 0.6768,
+      "step": 60290
+    },
+    {
+      "epoch": 2.3848602899009275,
+      "grad_norm": 1.2710447433472736,
+      "learning_rate": 1.226832875859979e-06,
+      "loss": 0.6625,
+      "step": 60300
+    },
+    {
+      "epoch": 2.3852557891198165,
+      "grad_norm": 1.5179303983822905,
+      "learning_rate": 1.225323486239023e-06,
+      "loss": 0.6475,
+      "step": 60310
+    },
+    {
+      "epoch": 2.3856512883387055,
+      "grad_norm": 1.4390768115991506,
+      "learning_rate": 1.223814896029883e-06,
+      "loss": 0.6869,
+      "step": 60320
+    },
+    {
+      "epoch": 2.3860467875575946,
+      "grad_norm": 1.2879109948492395,
+      "learning_rate": 1.2223071055520548e-06,
+      "loss": 0.644,
+      "step": 60330
+    },
+    {
+      "epoch": 2.3864422867764836,
+      "grad_norm": 1.4446520488917953,
+      "learning_rate": 1.2208001151248593e-06,
+      "loss": 0.6621,
+      "step": 60340
+    },
+    {
+      "epoch": 2.3868377859953727,
+      "grad_norm": 1.7177699519167628,
+      "learning_rate": 1.2192939250674556e-06,
+      "loss": 0.6374,
+      "step": 60350
+    },
+    {
+      "epoch": 2.3872332852142617,
+      "grad_norm": 1.5110958571884754,
+      "learning_rate": 1.2177885356988272e-06,
+      "loss": 0.6776,
+      "step": 60360
+    },
+    {
+      "epoch": 2.3876287844331507,
+      "grad_norm": 1.8865416778637636,
+      "learning_rate": 1.2162839473377874e-06,
+      "loss": 0.6536,
+      "step": 60370
+    },
+    {
+      "epoch": 2.38802428365204,
+      "grad_norm": 1.2466058794887338,
+      "learning_rate": 1.2147801603029858e-06,
+      "loss": 0.6691,
+      "step": 60380
+    },
+    {
+      "epoch": 2.388419782870929,
+      "grad_norm": 1.399261052596627,
+      "learning_rate": 1.2132771749128968e-06,
+      "loss": 0.6368,
+      "step": 60390
+    },
+    {
+      "epoch": 2.388815282089818,
+      "grad_norm": 1.5294209359199646,
+      "learning_rate": 1.2117749914858278e-06,
+      "loss": 0.6576,
+      "step": 60400
+    },
+    {
+      "epoch": 2.389210781308707,
+      "grad_norm": 1.1541573965797718,
+      "learning_rate": 1.2102736103399131e-06,
+      "loss": 0.628,
+      "step": 60410
+    },
+    {
+      "epoch": 2.389606280527596,
+      "grad_norm": 1.5562020024295269,
+      "learning_rate": 1.2087730317931234e-06,
+      "loss": 0.6788,
+      "step": 60420
+    },
+    {
+      "epoch": 2.390001779746485,
+      "grad_norm": 1.3303838930464016,
+      "learning_rate": 1.2072732561632517e-06,
+      "loss": 0.6592,
+      "step": 60430
+    },
+    {
+      "epoch": 2.390397278965374,
+      "grad_norm": 1.4643209353778344,
+      "learning_rate": 1.205774283767928e-06,
+      "loss": 0.6533,
+      "step": 60440
+    },
+    {
+      "epoch": 2.390792778184263,
+      "grad_norm": 1.9301841576090892,
+      "learning_rate": 1.2042761149246068e-06,
+      "loss": 0.6692,
+      "step": 60450
+    },
+    {
+      "epoch": 2.391188277403152,
+      "grad_norm": 1.3581607500107369,
+      "learning_rate": 1.202778749950576e-06,
+      "loss": 0.6754,
+      "step": 60460
+    },
+    {
+      "epoch": 2.391583776622041,
+      "grad_norm": 1.4465304591485115,
+      "learning_rate": 1.2012821891629506e-06,
+      "loss": 0.6277,
+      "step": 60470
+    },
+    {
+      "epoch": 2.39197927584093,
+      "grad_norm": 1.4712006554120876,
+      "learning_rate": 1.199786432878678e-06,
+      "loss": 0.6284,
+      "step": 60480
+    },
+    {
+      "epoch": 2.392374775059819,
+      "grad_norm": 1.5467911394235117,
+      "learning_rate": 1.198291481414532e-06,
+      "loss": 0.6247,
+      "step": 60490
+    },
+    {
+      "epoch": 2.3927702742787083,
+      "grad_norm": 1.4062238072676685,
+      "learning_rate": 1.1967973350871198e-06,
+      "loss": 0.636,
+      "step": 60500
+    },
+    {
+      "epoch": 2.3931657734975973,
+      "grad_norm": 1.3316811234444652,
+      "learning_rate": 1.1953039942128747e-06,
+      "loss": 0.6328,
+      "step": 60510
+    },
+    {
+      "epoch": 2.3935612727164863,
+      "grad_norm": 1.3839751745652498,
+      "learning_rate": 1.1938114591080614e-06,
+      "loss": 0.6461,
+      "step": 60520
+    },
+    {
+      "epoch": 2.3939567719353754,
+      "grad_norm": 1.5571631658497918,
+      "learning_rate": 1.1923197300887707e-06,
+      "loss": 0.6327,
+      "step": 60530
+    },
+    {
+      "epoch": 2.3943522711542644,
+      "grad_norm": 1.6334308492010725,
+      "learning_rate": 1.190828807470929e-06,
+      "loss": 0.6564,
+      "step": 60540
+    },
+    {
+      "epoch": 2.3947477703731535,
+      "grad_norm": 1.418491763997048,
+      "learning_rate": 1.1893386915702849e-06,
+      "loss": 0.6595,
+      "step": 60550
+    },
+    {
+      "epoch": 2.3951432695920425,
+      "grad_norm": 1.48611872633573,
+      "learning_rate": 1.1878493827024223e-06,
+      "loss": 0.6649,
+      "step": 60560
+    },
+    {
+      "epoch": 2.3955387688109315,
+      "grad_norm": 1.2897192077821347,
+      "learning_rate": 1.1863608811827487e-06,
+      "loss": 0.6429,
+      "step": 60570
+    },
+    {
+      "epoch": 2.3959342680298206,
+      "grad_norm": 1.4684912959462901,
+      "learning_rate": 1.1848731873265057e-06,
+      "loss": 0.661,
+      "step": 60580
+    },
+    {
+      "epoch": 2.3963297672487096,
+      "grad_norm": 1.491397517901672,
+      "learning_rate": 1.1833863014487601e-06,
+      "loss": 0.6491,
+      "step": 60590
+    },
+    {
+      "epoch": 2.3967252664675986,
+      "grad_norm": 1.439643725719278,
+      "learning_rate": 1.1819002238644078e-06,
+      "loss": 0.6123,
+      "step": 60600
+    },
+    {
+      "epoch": 2.3971207656864877,
+      "grad_norm": 1.50555570261791,
+      "learning_rate": 1.1804149548881771e-06,
+      "loss": 0.648,
+      "step": 60610
+    },
+    {
+      "epoch": 2.3975162649053767,
+      "grad_norm": 1.493759672470509,
+      "learning_rate": 1.1789304948346192e-06,
+      "loss": 0.6467,
+      "step": 60620
+    },
+    {
+      "epoch": 2.3979117641242658,
+      "grad_norm": 1.2590367263133964,
+      "learning_rate": 1.1774468440181215e-06,
+      "loss": 0.6686,
+      "step": 60630
+    },
+    {
+      "epoch": 2.398307263343155,
+      "grad_norm": 1.7154889530827062,
+      "learning_rate": 1.1759640027528923e-06,
+      "loss": 0.6636,
+      "step": 60640
+    },
+    {
+      "epoch": 2.398702762562044,
+      "grad_norm": 1.3137260105772055,
+      "learning_rate": 1.1744819713529742e-06,
+      "loss": 0.6737,
+      "step": 60650
+    },
+    {
+      "epoch": 2.399098261780933,
+      "grad_norm": 1.2598564465211248,
+      "learning_rate": 1.1730007501322333e-06,
+      "loss": 0.6689,
+      "step": 60660
+    },
+    {
+      "epoch": 2.399493760999822,
+      "grad_norm": 1.1638866359656532,
+      "learning_rate": 1.17152033940437e-06,
+      "loss": 0.6432,
+      "step": 60670
+    },
+    {
+      "epoch": 2.399889260218711,
+      "grad_norm": 1.3151691657067,
+      "learning_rate": 1.1700407394829071e-06,
+      "loss": 0.6492,
+      "step": 60680
+    },
+    {
+      "epoch": 2.4002847594376,
+      "grad_norm": 1.544299212709401,
+      "learning_rate": 1.1685619506812019e-06,
+      "loss": 0.6345,
+      "step": 60690
+    },
+    {
+      "epoch": 2.400680258656489,
+      "grad_norm": 1.5101256898393642,
+      "learning_rate": 1.1670839733124328e-06,
+      "loss": 0.6272,
+      "step": 60700
+    },
+    {
+      "epoch": 2.401075757875378,
+      "grad_norm": 1.629622232170866,
+      "learning_rate": 1.1656068076896133e-06,
+      "loss": 0.6542,
+      "step": 60710
+    },
+    {
+      "epoch": 2.401471257094267,
+      "grad_norm": 1.362979777159695,
+      "learning_rate": 1.1641304541255793e-06,
+      "loss": 0.6729,
+      "step": 60720
+    },
+    {
+      "epoch": 2.401866756313156,
+      "grad_norm": 1.4582497685187814,
+      "learning_rate": 1.1626549129329994e-06,
+      "loss": 0.6578,
+      "step": 60730
+    },
+    {
+      "epoch": 2.402262255532045,
+      "grad_norm": 1.4198343937352749,
+      "learning_rate": 1.1611801844243653e-06,
+      "loss": 0.6474,
+      "step": 60740
+    },
+    {
+      "epoch": 2.4026577547509342,
+      "grad_norm": 1.5561261182820643,
+      "learning_rate": 1.1597062689120025e-06,
+      "loss": 0.6665,
+      "step": 60750
+    },
+    {
+      "epoch": 2.4030532539698233,
+      "grad_norm": 1.6049471813565488,
+      "learning_rate": 1.1582331667080592e-06,
+      "loss": 0.6206,
+      "step": 60760
+    },
+    {
+      "epoch": 2.4034487531887123,
+      "grad_norm": 1.6786217835036092,
+      "learning_rate": 1.1567608781245133e-06,
+      "loss": 0.6113,
+      "step": 60770
+    },
+    {
+      "epoch": 2.4038442524076014,
+      "grad_norm": 1.488190374089625,
+      "learning_rate": 1.1552894034731687e-06,
+      "loss": 0.6632,
+      "step": 60780
+    },
+    {
+      "epoch": 2.4042397516264904,
+      "grad_norm": 1.4246333959101352,
+      "learning_rate": 1.1538187430656618e-06,
+      "loss": 0.633,
+      "step": 60790
+    },
+    {
+      "epoch": 2.4046352508453794,
+      "grad_norm": 1.3032663001725917,
+      "learning_rate": 1.15234889721345e-06,
+      "loss": 0.6375,
+      "step": 60800
+    },
+    {
+      "epoch": 2.4050307500642685,
+      "grad_norm": 1.3086106189047355,
+      "learning_rate": 1.1508798662278248e-06,
+      "loss": 0.6678,
+      "step": 60810
+    },
+    {
+      "epoch": 2.4054262492831575,
+      "grad_norm": 1.7276415892437251,
+      "learning_rate": 1.1494116504199e-06,
+      "loss": 0.6619,
+      "step": 60820
+    },
+    {
+      "epoch": 2.4058217485020466,
+      "grad_norm": 1.1340889323379753,
+      "learning_rate": 1.1479442501006166e-06,
+      "loss": 0.6405,
+      "step": 60830
+    },
+    {
+      "epoch": 2.4062172477209356,
+      "grad_norm": 1.6662959419954302,
+      "learning_rate": 1.1464776655807492e-06,
+      "loss": 0.6409,
+      "step": 60840
+    },
+    {
+      "epoch": 2.4066127469398246,
+      "grad_norm": 1.3467360463469853,
+      "learning_rate": 1.1450118971708907e-06,
+      "loss": 0.6383,
+      "step": 60850
+    },
+    {
+      "epoch": 2.4070082461587137,
+      "grad_norm": 1.3629825280027281,
+      "learning_rate": 1.1435469451814701e-06,
+      "loss": 0.6386,
+      "step": 60860
+    },
+    {
+      "epoch": 2.4074037453776027,
+      "grad_norm": 1.493295078801304,
+      "learning_rate": 1.1420828099227355e-06,
+      "loss": 0.6505,
+      "step": 60870
+    },
+    {
+      "epoch": 2.4077992445964918,
+      "grad_norm": 1.4834088359161504,
+      "learning_rate": 1.1406194917047698e-06,
+      "loss": 0.6762,
+      "step": 60880
+    },
+    {
+      "epoch": 2.408194743815381,
+      "grad_norm": 1.6251605261775033,
+      "learning_rate": 1.1391569908374734e-06,
+      "loss": 0.6691,
+      "step": 60890
+    },
+    {
+      "epoch": 2.40859024303427,
+      "grad_norm": 1.3862890862619546,
+      "learning_rate": 1.1376953076305825e-06,
+      "loss": 0.6311,
+      "step": 60900
+    },
+    {
+      "epoch": 2.408985742253159,
+      "grad_norm": 1.2292694053280526,
+      "learning_rate": 1.136234442393655e-06,
+      "loss": 0.6381,
+      "step": 60910
+    },
+    {
+      "epoch": 2.409381241472048,
+      "grad_norm": 1.4940990584477818,
+      "learning_rate": 1.1347743954360791e-06,
+      "loss": 0.6486,
+      "step": 60920
+    },
+    {
+      "epoch": 2.409776740690937,
+      "grad_norm": 1.6119023869121893,
+      "learning_rate": 1.133315167067065e-06,
+      "loss": 0.6494,
+      "step": 60930
+    },
+    {
+      "epoch": 2.410172239909826,
+      "grad_norm": 1.4466785758021523,
+      "learning_rate": 1.1318567575956552e-06,
+      "loss": 0.6359,
+      "step": 60940
+    },
+    {
+      "epoch": 2.410567739128715,
+      "grad_norm": 1.4157370381367196,
+      "learning_rate": 1.130399167330713e-06,
+      "loss": 0.6719,
+      "step": 60950
+    },
+    {
+      "epoch": 2.4109632383476045,
+      "grad_norm": 1.4081379226485597,
+      "learning_rate": 1.1289423965809337e-06,
+      "loss": 0.6438,
+      "step": 60960
+    },
+    {
+      "epoch": 2.411358737566493,
+      "grad_norm": 1.6047245390174836,
+      "learning_rate": 1.1274864456548334e-06,
+      "loss": 0.6719,
+      "step": 60970
+    },
+    {
+      "epoch": 2.4117542367853826,
+      "grad_norm": 1.5985440151479797,
+      "learning_rate": 1.1260313148607616e-06,
+      "loss": 0.6383,
+      "step": 60980
+    },
+    {
+      "epoch": 2.412149736004271,
+      "grad_norm": 1.8051044827715523,
+      "learning_rate": 1.1245770045068855e-06,
+      "loss": 0.654,
+      "step": 60990
+    },
+    {
+      "epoch": 2.4125452352231607,
+      "grad_norm": 1.5348047869595263,
+      "learning_rate": 1.1231235149012082e-06,
+      "loss": 0.6216,
+      "step": 61000
+    },
+    {
+      "epoch": 2.4129407344420493,
+      "grad_norm": 1.2468646891487871,
+      "learning_rate": 1.1216708463515486e-06,
+      "loss": 0.6605,
+      "step": 61010
+    },
+    {
+      "epoch": 2.4133362336609387,
+      "grad_norm": 1.5928298124860365,
+      "learning_rate": 1.120218999165561e-06,
+      "loss": 0.6642,
+      "step": 61020
+    },
+    {
+      "epoch": 2.4137317328798273,
+      "grad_norm": 1.3940623844004774,
+      "learning_rate": 1.1187679736507184e-06,
+      "loss": 0.6448,
+      "step": 61030
+    },
+    {
+      "epoch": 2.414127232098717,
+      "grad_norm": 1.3053382297503529,
+      "learning_rate": 1.1173177701143267e-06,
+      "loss": 0.6602,
+      "step": 61040
+    },
+    {
+      "epoch": 2.4145227313176054,
+      "grad_norm": 1.5173553184792226,
+      "learning_rate": 1.1158683888635108e-06,
+      "loss": 0.6653,
+      "step": 61050
+    },
+    {
+      "epoch": 2.414918230536495,
+      "grad_norm": 1.4179613876805257,
+      "learning_rate": 1.1144198302052285e-06,
+      "loss": 0.663,
+      "step": 61060
+    },
+    {
+      "epoch": 2.4153137297553835,
+      "grad_norm": 1.4964988085060997,
+      "learning_rate": 1.112972094446257e-06,
+      "loss": 0.6601,
+      "step": 61070
+    },
+    {
+      "epoch": 2.415709228974273,
+      "grad_norm": 1.5579015889614098,
+      "learning_rate": 1.111525181893201e-06,
+      "loss": 0.5985,
+      "step": 61080
+    },
+    {
+      "epoch": 2.4161047281931616,
+      "grad_norm": 1.5543731641027563,
+      "learning_rate": 1.1100790928524952e-06,
+      "loss": 0.6759,
+      "step": 61090
+    },
+    {
+      "epoch": 2.416500227412051,
+      "grad_norm": 1.5533371778260152,
+      "learning_rate": 1.1086338276303937e-06,
+      "loss": 0.6424,
+      "step": 61100
+    },
+    {
+      "epoch": 2.4168957266309397,
+      "grad_norm": 1.640508402683737,
+      "learning_rate": 1.1071893865329809e-06,
+      "loss": 0.6546,
+      "step": 61110
+    },
+    {
+      "epoch": 2.417291225849829,
+      "grad_norm": 1.922679998780759,
+      "learning_rate": 1.1057457698661634e-06,
+      "loss": 0.6288,
+      "step": 61120
+    },
+    {
+      "epoch": 2.417686725068718,
+      "grad_norm": 1.464637869938643,
+      "learning_rate": 1.1043029779356746e-06,
+      "loss": 0.6788,
+      "step": 61130
+    },
+    {
+      "epoch": 2.418082224287607,
+      "grad_norm": 1.4836737500505095,
+      "learning_rate": 1.1028610110470721e-06,
+      "loss": 0.6867,
+      "step": 61140
+    },
+    {
+      "epoch": 2.4184777235064963,
+      "grad_norm": 1.5008039641123663,
+      "learning_rate": 1.1014198695057425e-06,
+      "loss": 0.684,
+      "step": 61150
+    },
+    {
+      "epoch": 2.4188732227253853,
+      "grad_norm": 1.7285466124624471,
+      "learning_rate": 1.0999795536168923e-06,
+      "loss": 0.6598,
+      "step": 61160
+    },
+    {
+      "epoch": 2.4192687219442743,
+      "grad_norm": 1.5317446327926802,
+      "learning_rate": 1.0985400636855575e-06,
+      "loss": 0.6441,
+      "step": 61170
+    },
+    {
+      "epoch": 2.4196642211631634,
+      "grad_norm": 1.439633524673699,
+      "learning_rate": 1.0971014000165953e-06,
+      "loss": 0.6655,
+      "step": 61180
+    },
+    {
+      "epoch": 2.4200597203820524,
+      "grad_norm": 1.3834204559553223,
+      "learning_rate": 1.0956635629146928e-06,
+      "loss": 0.6558,
+      "step": 61190
+    },
+    {
+      "epoch": 2.4204552196009415,
+      "grad_norm": 1.4429096140548068,
+      "learning_rate": 1.0942265526843565e-06,
+      "loss": 0.646,
+      "step": 61200
+    },
+    {
+      "epoch": 2.4208507188198305,
+      "grad_norm": 1.2789180166857825,
+      "learning_rate": 1.0927903696299236e-06,
+      "loss": 0.6421,
+      "step": 61210
+    },
+    {
+      "epoch": 2.4212462180387195,
+      "grad_norm": 1.4565206704965734,
+      "learning_rate": 1.0913550140555496e-06,
+      "loss": 0.6717,
+      "step": 61220
+    },
+    {
+      "epoch": 2.4216417172576086,
+      "grad_norm": 1.4279653826072902,
+      "learning_rate": 1.0899204862652218e-06,
+      "loss": 0.6622,
+      "step": 61230
+    },
+    {
+      "epoch": 2.4220372164764976,
+      "grad_norm": 1.338945014598067,
+      "learning_rate": 1.0884867865627462e-06,
+      "loss": 0.675,
+      "step": 61240
+    },
+    {
+      "epoch": 2.4224327156953867,
+      "grad_norm": 1.4670619474772613,
+      "learning_rate": 1.0870539152517568e-06,
+      "loss": 0.6419,
+      "step": 61250
+    },
+    {
+      "epoch": 2.4228282149142757,
+      "grad_norm": 1.530382368666391,
+      "learning_rate": 1.0856218726357092e-06,
+      "loss": 0.6496,
+      "step": 61260
+    },
+    {
+      "epoch": 2.4232237141331647,
+      "grad_norm": 1.7566544375640354,
+      "learning_rate": 1.0841906590178885e-06,
+      "loss": 0.6739,
+      "step": 61270
+    },
+    {
+      "epoch": 2.4236192133520538,
+      "grad_norm": 1.802849502370634,
+      "learning_rate": 1.0827602747013987e-06,
+      "loss": 0.6801,
+      "step": 61280
+    },
+    {
+      "epoch": 2.424014712570943,
+      "grad_norm": 1.5131869114513883,
+      "learning_rate": 1.0813307199891732e-06,
+      "loss": 0.6405,
+      "step": 61290
+    },
+    {
+      "epoch": 2.424410211789832,
+      "grad_norm": 1.4696017429840822,
+      "learning_rate": 1.0799019951839656e-06,
+      "loss": 0.644,
+      "step": 61300
+    },
+    {
+      "epoch": 2.424805711008721,
+      "grad_norm": 1.1960487304474925,
+      "learning_rate": 1.0784741005883542e-06,
+      "loss": 0.6396,
+      "step": 61310
+    },
+    {
+      "epoch": 2.42520121022761,
+      "grad_norm": 1.3779615554428748,
+      "learning_rate": 1.0770470365047452e-06,
+      "loss": 0.6472,
+      "step": 61320
+    },
+    {
+      "epoch": 2.425596709446499,
+      "grad_norm": 1.3969139885976096,
+      "learning_rate": 1.0756208032353643e-06,
+      "loss": 0.6578,
+      "step": 61330
+    },
+    {
+      "epoch": 2.425992208665388,
+      "grad_norm": 1.6232009471995144,
+      "learning_rate": 1.0741954010822653e-06,
+      "loss": 0.6766,
+      "step": 61340
+    },
+    {
+      "epoch": 2.426387707884277,
+      "grad_norm": 1.3685099763164983,
+      "learning_rate": 1.0727708303473212e-06,
+      "loss": 0.6633,
+      "step": 61350
+    },
+    {
+      "epoch": 2.426783207103166,
+      "grad_norm": 1.5502785356278703,
+      "learning_rate": 1.0713470913322343e-06,
+      "loss": 0.6273,
+      "step": 61360
+    },
+    {
+      "epoch": 2.427178706322055,
+      "grad_norm": 1.549311524200009,
+      "learning_rate": 1.0699241843385273e-06,
+      "loss": 0.6679,
+      "step": 61370
+    },
+    {
+      "epoch": 2.427574205540944,
+      "grad_norm": 1.51978344787089,
+      "learning_rate": 1.0685021096675468e-06,
+      "loss": 0.6354,
+      "step": 61380
+    },
+    {
+      "epoch": 2.427969704759833,
+      "grad_norm": 1.4134533321743648,
+      "learning_rate": 1.0670808676204636e-06,
+      "loss": 0.6347,
+      "step": 61390
+    },
+    {
+      "epoch": 2.4283652039787222,
+      "grad_norm": 1.3841824161300906,
+      "learning_rate": 1.0656604584982743e-06,
+      "loss": 0.6463,
+      "step": 61400
+    },
+    {
+      "epoch": 2.4287607031976113,
+      "grad_norm": 1.7679971822631761,
+      "learning_rate": 1.0642408826017947e-06,
+      "loss": 0.6494,
+      "step": 61410
+    },
+    {
+      "epoch": 2.4291562024165003,
+      "grad_norm": 1.5882746010635962,
+      "learning_rate": 1.062822140231669e-06,
+      "loss": 0.6734,
+      "step": 61420
+    },
+    {
+      "epoch": 2.4295517016353894,
+      "grad_norm": 1.4039767642203624,
+      "learning_rate": 1.0614042316883605e-06,
+      "loss": 0.6578,
+      "step": 61430
+    },
+    {
+      "epoch": 2.4299472008542784,
+      "grad_norm": 1.4293413099723287,
+      "learning_rate": 1.0599871572721605e-06,
+      "loss": 0.6457,
+      "step": 61440
+    },
+    {
+      "epoch": 2.4303427000731674,
+      "grad_norm": 1.5983790144291692,
+      "learning_rate": 1.0585709172831777e-06,
+      "loss": 0.6257,
+      "step": 61450
+    },
+    {
+      "epoch": 2.4307381992920565,
+      "grad_norm": 1.285780355011274,
+      "learning_rate": 1.0571555120213505e-06,
+      "loss": 0.6815,
+      "step": 61460
+    },
+    {
+      "epoch": 2.4311336985109455,
+      "grad_norm": 1.1432723233815385,
+      "learning_rate": 1.0557409417864367e-06,
+      "loss": 0.6545,
+      "step": 61470
+    },
+    {
+      "epoch": 2.4315291977298346,
+      "grad_norm": 1.4830195332034621,
+      "learning_rate": 1.0543272068780164e-06,
+      "loss": 0.6535,
+      "step": 61480
+    },
+    {
+      "epoch": 2.4319246969487236,
+      "grad_norm": 1.4665287621063017,
+      "learning_rate": 1.0529143075954968e-06,
+      "loss": 0.6605,
+      "step": 61490
+    },
+    {
+      "epoch": 2.4323201961676126,
+      "grad_norm": 1.4245650550237787,
+      "learning_rate": 1.051502244238105e-06,
+      "loss": 0.6601,
+      "step": 61500
+    },
+    {
+      "epoch": 2.4327156953865017,
+      "grad_norm": 1.4538133840707317,
+      "learning_rate": 1.0500910171048894e-06,
+      "loss": 0.6726,
+      "step": 61510
+    },
+    {
+      "epoch": 2.4331111946053907,
+      "grad_norm": 1.5405563254994379,
+      "learning_rate": 1.048680626494728e-06,
+      "loss": 0.6572,
+      "step": 61520
+    },
+    {
+      "epoch": 2.4335066938242798,
+      "grad_norm": 1.5526006017400207,
+      "learning_rate": 1.0472710727063145e-06,
+      "loss": 0.6522,
+      "step": 61530
+    },
+    {
+      "epoch": 2.433902193043169,
+      "grad_norm": 1.6113840757233477,
+      "learning_rate": 1.0458623560381682e-06,
+      "loss": 0.6011,
+      "step": 61540
+    },
+    {
+      "epoch": 2.434297692262058,
+      "grad_norm": 1.3529848016486252,
+      "learning_rate": 1.0444544767886323e-06,
+      "loss": 0.687,
+      "step": 61550
+    },
+    {
+      "epoch": 2.434693191480947,
+      "grad_norm": 1.7203540451056272,
+      "learning_rate": 1.04304743525587e-06,
+      "loss": 0.6242,
+      "step": 61560
+    },
+    {
+      "epoch": 2.435088690699836,
+      "grad_norm": 1.5137338493701369,
+      "learning_rate": 1.0416412317378716e-06,
+      "loss": 0.6326,
+      "step": 61570
+    },
+    {
+      "epoch": 2.435484189918725,
+      "grad_norm": 1.5154935200710762,
+      "learning_rate": 1.040235866532443e-06,
+      "loss": 0.6585,
+      "step": 61580
+    },
+    {
+      "epoch": 2.435879689137614,
+      "grad_norm": 1.3313570789298665,
+      "learning_rate": 1.0388313399372201e-06,
+      "loss": 0.6935,
+      "step": 61590
+    },
+    {
+      "epoch": 2.436275188356503,
+      "grad_norm": 1.5742679874702534,
+      "learning_rate": 1.0374276522496546e-06,
+      "loss": 0.6518,
+      "step": 61600
+    },
+    {
+      "epoch": 2.436670687575392,
+      "grad_norm": 1.484978954781372,
+      "learning_rate": 1.0360248037670273e-06,
+      "loss": 0.6419,
+      "step": 61610
+    },
+    {
+      "epoch": 2.437066186794281,
+      "grad_norm": 1.6230100547349011,
+      "learning_rate": 1.0346227947864323e-06,
+      "loss": 0.6335,
+      "step": 61620
+    },
+    {
+      "epoch": 2.43746168601317,
+      "grad_norm": 1.508863979277988,
+      "learning_rate": 1.033221625604795e-06,
+      "loss": 0.6358,
+      "step": 61630
+    },
+    {
+      "epoch": 2.437857185232059,
+      "grad_norm": 1.3207688631361822,
+      "learning_rate": 1.0318212965188568e-06,
+      "loss": 0.659,
+      "step": 61640
+    },
+    {
+      "epoch": 2.4382526844509482,
+      "grad_norm": 1.2422122200091579,
+      "learning_rate": 1.0304218078251853e-06,
+      "loss": 0.6612,
+      "step": 61650
+    },
+    {
+      "epoch": 2.4386481836698373,
+      "grad_norm": 1.290823321606612,
+      "learning_rate": 1.0290231598201666e-06,
+      "loss": 0.6836,
+      "step": 61660
+    },
+    {
+      "epoch": 2.4390436828887263,
+      "grad_norm": 1.3802244898816645,
+      "learning_rate": 1.0276253528000118e-06,
+      "loss": 0.6474,
+      "step": 61670
+    },
+    {
+      "epoch": 2.4394391821076153,
+      "grad_norm": 1.3739471336168543,
+      "learning_rate": 1.0262283870607509e-06,
+      "loss": 0.6883,
+      "step": 61680
+    },
+    {
+      "epoch": 2.4398346813265044,
+      "grad_norm": 1.4467098933249398,
+      "learning_rate": 1.0248322628982393e-06,
+      "loss": 0.6872,
+      "step": 61690
+    },
+    {
+      "epoch": 2.4402301805453934,
+      "grad_norm": 1.1580199054388167,
+      "learning_rate": 1.02343698060815e-06,
+      "loss": 0.6613,
+      "step": 61700
+    },
+    {
+      "epoch": 2.4406256797642825,
+      "grad_norm": 1.6253976706663749,
+      "learning_rate": 1.0220425404859825e-06,
+      "loss": 0.625,
+      "step": 61710
+    },
+    {
+      "epoch": 2.4410211789831715,
+      "grad_norm": 1.7512048321027551,
+      "learning_rate": 1.0206489428270532e-06,
+      "loss": 0.6518,
+      "step": 61720
+    },
+    {
+      "epoch": 2.4414166782020605,
+      "grad_norm": 1.520503957582064,
+      "learning_rate": 1.0192561879265017e-06,
+      "loss": 0.6383,
+      "step": 61730
+    },
+    {
+      "epoch": 2.4418121774209496,
+      "grad_norm": 1.4878472454208505,
+      "learning_rate": 1.0178642760792923e-06,
+      "loss": 0.6408,
+      "step": 61740
+    },
+    {
+      "epoch": 2.4422076766398386,
+      "grad_norm": 1.4681928952925842,
+      "learning_rate": 1.0164732075802064e-06,
+      "loss": 0.6744,
+      "step": 61750
+    },
+    {
+      "epoch": 2.4426031758587277,
+      "grad_norm": 1.5560241461750843,
+      "learning_rate": 1.0150829827238484e-06,
+      "loss": 0.6343,
+      "step": 61760
+    },
+    {
+      "epoch": 2.4429986750776167,
+      "grad_norm": 1.6122360279641574,
+      "learning_rate": 1.0136936018046433e-06,
+      "loss": 0.6319,
+      "step": 61770
+    },
+    {
+      "epoch": 2.4433941742965057,
+      "grad_norm": 1.8002739722284244,
+      "learning_rate": 1.0123050651168404e-06,
+      "loss": 0.6488,
+      "step": 61780
+    },
+    {
+      "epoch": 2.443789673515395,
+      "grad_norm": 1.5156827878034502,
+      "learning_rate": 1.0109173729545058e-06,
+      "loss": 0.6635,
+      "step": 61790
+    },
+    {
+      "epoch": 2.444185172734284,
+      "grad_norm": 1.480946256554298,
+      "learning_rate": 1.0095305256115306e-06,
+      "loss": 0.673,
+      "step": 61800
+    },
+    {
+      "epoch": 2.444580671953173,
+      "grad_norm": 1.6729230238763309,
+      "learning_rate": 1.0081445233816235e-06,
+      "loss": 0.6687,
+      "step": 61810
+    },
+    {
+      "epoch": 2.444976171172062,
+      "grad_norm": 1.4790354983141905,
+      "learning_rate": 1.006759366558318e-06,
+      "loss": 0.6674,
+      "step": 61820
+    },
+    {
+      "epoch": 2.445371670390951,
+      "grad_norm": 1.4564880887960199,
+      "learning_rate": 1.0053750554349652e-06,
+      "loss": 0.6431,
+      "step": 61830
+    },
+    {
+      "epoch": 2.44576716960984,
+      "grad_norm": 1.679158130216397,
+      "learning_rate": 1.0039915903047398e-06,
+      "loss": 0.6514,
+      "step": 61840
+    },
+    {
+      "epoch": 2.446162668828729,
+      "grad_norm": 1.6241238693990425,
+      "learning_rate": 1.002608971460634e-06,
+      "loss": 0.6454,
+      "step": 61850
+    },
+    {
+      "epoch": 2.446558168047618,
+      "grad_norm": 1.4976577718811317,
+      "learning_rate": 1.001227199195467e-06,
+      "loss": 0.6832,
+      "step": 61860
+    },
+    {
+      "epoch": 2.446953667266507,
+      "grad_norm": 1.306892079381734,
+      "learning_rate": 9.998462738018683e-07,
+      "loss": 0.674,
+      "step": 61870
+    },
+    {
+      "epoch": 2.447349166485396,
+      "grad_norm": 1.5629046344378226,
+      "learning_rate": 9.98466195572299e-07,
+      "loss": 0.6227,
+      "step": 61880
+    },
+    {
+      "epoch": 2.447744665704285,
+      "grad_norm": 1.4508346253659112,
+      "learning_rate": 9.970869647990327e-07,
+      "loss": 0.6469,
+      "step": 61890
+    },
+    {
+      "epoch": 2.448140164923174,
+      "grad_norm": 1.9290465565852426,
+      "learning_rate": 9.957085817741707e-07,
+      "loss": 0.6317,
+      "step": 61900
+    },
+    {
+      "epoch": 2.4485356641420633,
+      "grad_norm": 1.7856544279104343,
+      "learning_rate": 9.943310467896267e-07,
+      "loss": 0.6417,
+      "step": 61910
+    },
+    {
+      "epoch": 2.4489311633609523,
+      "grad_norm": 1.55771514639698,
+      "learning_rate": 9.929543601371428e-07,
+      "loss": 0.6557,
+      "step": 61920
+    },
+    {
+      "epoch": 2.4493266625798413,
+      "grad_norm": 1.463084215873033,
+      "learning_rate": 9.915785221082747e-07,
+      "loss": 0.6924,
+      "step": 61930
+    },
+    {
+      "epoch": 2.4497221617987304,
+      "grad_norm": 1.5546262033243476,
+      "learning_rate": 9.902035329944037e-07,
+      "loss": 0.6655,
+      "step": 61940
+    },
+    {
+      "epoch": 2.4501176610176194,
+      "grad_norm": 1.6910192612695814,
+      "learning_rate": 9.888293930867283e-07,
+      "loss": 0.6671,
+      "step": 61950
+    },
+    {
+      "epoch": 2.4505131602365084,
+      "grad_norm": 1.5081076597464609,
+      "learning_rate": 9.874561026762653e-07,
+      "loss": 0.6414,
+      "step": 61960
+    },
+    {
+      "epoch": 2.4509086594553975,
+      "grad_norm": 1.5947135474911969,
+      "learning_rate": 9.860836620538577e-07,
+      "loss": 0.6263,
+      "step": 61970
+    },
+    {
+      "epoch": 2.4513041586742865,
+      "grad_norm": 1.5982495936199257,
+      "learning_rate": 9.847120715101632e-07,
+      "loss": 0.6456,
+      "step": 61980
+    },
+    {
+      "epoch": 2.4516996578931756,
+      "grad_norm": 1.393370621178838,
+      "learning_rate": 9.833413313356604e-07,
+      "loss": 0.6419,
+      "step": 61990
+    },
+    {
+      "epoch": 2.4520951571120646,
+      "grad_norm": 1.4594935501580708,
+      "learning_rate": 9.819714418206506e-07,
+      "loss": 0.6573,
+      "step": 62000
+    },
+    {
+      "epoch": 2.4524906563309536,
+      "grad_norm": 1.3440150645443825,
+      "learning_rate": 9.806024032552514e-07,
+      "loss": 0.6674,
+      "step": 62010
+    },
+    {
+      "epoch": 2.4528861555498427,
+      "grad_norm": 1.503560926056615,
+      "learning_rate": 9.79234215929401e-07,
+      "loss": 0.6362,
+      "step": 62020
+    },
+    {
+      "epoch": 2.4532816547687317,
+      "grad_norm": 1.6815575487442866,
+      "learning_rate": 9.778668801328606e-07,
+      "loss": 0.677,
+      "step": 62030
+    },
+    {
+      "epoch": 2.4536771539876208,
+      "grad_norm": 1.3585129209049398,
+      "learning_rate": 9.765003961552055e-07,
+      "loss": 0.6324,
+      "step": 62040
+    },
+    {
+      "epoch": 2.45407265320651,
+      "grad_norm": 1.2943114263110564,
+      "learning_rate": 9.751347642858367e-07,
+      "loss": 0.6516,
+      "step": 62050
+    },
+    {
+      "epoch": 2.454468152425399,
+      "grad_norm": 1.5810697418849364,
+      "learning_rate": 9.737699848139687e-07,
+      "loss": 0.6421,
+      "step": 62060
+    },
+    {
+      "epoch": 2.454863651644288,
+      "grad_norm": 1.3837798597260171,
+      "learning_rate": 9.724060580286409e-07,
+      "loss": 0.6578,
+      "step": 62070
+    },
+    {
+      "epoch": 2.455259150863177,
+      "grad_norm": 1.4030850214162944,
+      "learning_rate": 9.710429842187075e-07,
+      "loss": 0.6635,
+      "step": 62080
+    },
+    {
+      "epoch": 2.455654650082066,
+      "grad_norm": 1.2935157562658848,
+      "learning_rate": 9.696807636728461e-07,
+      "loss": 0.6587,
+      "step": 62090
+    },
+    {
+      "epoch": 2.456050149300955,
+      "grad_norm": 1.4275532812321332,
+      "learning_rate": 9.683193966795512e-07,
+      "loss": 0.6443,
+      "step": 62100
+    },
+    {
+      "epoch": 2.456445648519844,
+      "grad_norm": 1.363085813590042,
+      "learning_rate": 9.669588835271366e-07,
+      "loss": 0.6697,
+      "step": 62110
+    },
+    {
+      "epoch": 2.456841147738733,
+      "grad_norm": 1.3131016348851743,
+      "learning_rate": 9.655992245037337e-07,
+      "loss": 0.6923,
+      "step": 62120
+    },
+    {
+      "epoch": 2.457236646957622,
+      "grad_norm": 1.4559672738106249,
+      "learning_rate": 9.642404198972988e-07,
+      "loss": 0.6379,
+      "step": 62130
+    },
+    {
+      "epoch": 2.457632146176511,
+      "grad_norm": 1.4758187857817757,
+      "learning_rate": 9.628824699956002e-07,
+      "loss": 0.6303,
+      "step": 62140
+    },
+    {
+      "epoch": 2.4580276453954,
+      "grad_norm": 1.4862585199584466,
+      "learning_rate": 9.615253750862308e-07,
+      "loss": 0.6242,
+      "step": 62150
+    },
+    {
+      "epoch": 2.4584231446142892,
+      "grad_norm": 1.66444901556112,
+      "learning_rate": 9.601691354565979e-07,
+      "loss": 0.6308,
+      "step": 62160
+    },
+    {
+      "epoch": 2.4588186438331783,
+      "grad_norm": 1.1451174595820228,
+      "learning_rate": 9.588137513939322e-07,
+      "loss": 0.6826,
+      "step": 62170
+    },
+    {
+      "epoch": 2.4592141430520673,
+      "grad_norm": 1.445211288315692,
+      "learning_rate": 9.574592231852797e-07,
+      "loss": 0.6555,
+      "step": 62180
+    },
+    {
+      "epoch": 2.4596096422709564,
+      "grad_norm": 1.3547554940248092,
+      "learning_rate": 9.561055511175048e-07,
+      "loss": 0.6102,
+      "step": 62190
+    },
+    {
+      "epoch": 2.4600051414898454,
+      "grad_norm": 1.5317981706398662,
+      "learning_rate": 9.547527354772951e-07,
+      "loss": 0.6432,
+      "step": 62200
+    },
+    {
+      "epoch": 2.4604006407087344,
+      "grad_norm": 1.5952520238558352,
+      "learning_rate": 9.534007765511505e-07,
+      "loss": 0.6552,
+      "step": 62210
+    },
+    {
+      "epoch": 2.4607961399276235,
+      "grad_norm": 1.2482111505444335,
+      "learning_rate": 9.520496746253955e-07,
+      "loss": 0.6057,
+      "step": 62220
+    },
+    {
+      "epoch": 2.4611916391465125,
+      "grad_norm": 1.3345106448942112,
+      "learning_rate": 9.506994299861693e-07,
+      "loss": 0.6436,
+      "step": 62230
+    },
+    {
+      "epoch": 2.4615871383654016,
+      "grad_norm": 1.2860768302922536,
+      "learning_rate": 9.493500429194302e-07,
+      "loss": 0.66,
+      "step": 62240
+    },
+    {
+      "epoch": 2.4619826375842906,
+      "grad_norm": 1.7214272374023878,
+      "learning_rate": 9.480015137109544e-07,
+      "loss": 0.6282,
+      "step": 62250
+    },
+    {
+      "epoch": 2.4623781368031796,
+      "grad_norm": 1.153778145211568,
+      "learning_rate": 9.466538426463395e-07,
+      "loss": 0.6429,
+      "step": 62260
+    },
+    {
+      "epoch": 2.4627736360220687,
+      "grad_norm": 1.377254642963884,
+      "learning_rate": 9.453070300109968e-07,
+      "loss": 0.6412,
+      "step": 62270
+    },
+    {
+      "epoch": 2.4631691352409577,
+      "grad_norm": 1.3144104816162459,
+      "learning_rate": 9.439610760901608e-07,
+      "loss": 0.6704,
+      "step": 62280
+    },
+    {
+      "epoch": 2.463564634459847,
+      "grad_norm": 1.410960223425946,
+      "learning_rate": 9.42615981168879e-07,
+      "loss": 0.6663,
+      "step": 62290
+    },
+    {
+      "epoch": 2.463960133678736,
+      "grad_norm": 1.548955934356688,
+      "learning_rate": 9.412717455320214e-07,
+      "loss": 0.6599,
+      "step": 62300
+    },
+    {
+      "epoch": 2.4643556328976253,
+      "grad_norm": 1.8830095633276083,
+      "learning_rate": 9.399283694642719e-07,
+      "loss": 0.6237,
+      "step": 62310
+    },
+    {
+      "epoch": 2.464751132116514,
+      "grad_norm": 1.4858962781557385,
+      "learning_rate": 9.385858532501374e-07,
+      "loss": 0.6399,
+      "step": 62320
+    },
+    {
+      "epoch": 2.4651466313354033,
+      "grad_norm": 1.1758318344683238,
+      "learning_rate": 9.372441971739371e-07,
+      "loss": 0.6445,
+      "step": 62330
+    },
+    {
+      "epoch": 2.465542130554292,
+      "grad_norm": 1.51863280205433,
+      "learning_rate": 9.359034015198126e-07,
+      "loss": 0.6541,
+      "step": 62340
+    },
+    {
+      "epoch": 2.4659376297731814,
+      "grad_norm": 1.6331341703058395,
+      "learning_rate": 9.345634665717212e-07,
+      "loss": 0.6306,
+      "step": 62350
+    },
+    {
+      "epoch": 2.46633312899207,
+      "grad_norm": 1.5850832986333157,
+      "learning_rate": 9.332243926134377e-07,
+      "loss": 0.6371,
+      "step": 62360
+    },
+    {
+      "epoch": 2.4667286282109595,
+      "grad_norm": 1.4918165819158464,
+      "learning_rate": 9.318861799285539e-07,
+      "loss": 0.6556,
+      "step": 62370
+    },
+    {
+      "epoch": 2.467124127429848,
+      "grad_norm": 1.729307261313895,
+      "learning_rate": 9.305488288004827e-07,
+      "loss": 0.659,
+      "step": 62380
+    },
+    {
+      "epoch": 2.4675196266487376,
+      "grad_norm": 1.2589755586999247,
+      "learning_rate": 9.292123395124498e-07,
+      "loss": 0.6528,
+      "step": 62390
+    },
+    {
+      "epoch": 2.467915125867626,
+      "grad_norm": 1.4258865980902158,
+      "learning_rate": 9.27876712347503e-07,
+      "loss": 0.6558,
+      "step": 62400
+    },
+    {
+      "epoch": 2.4683106250865157,
+      "grad_norm": 1.5996889358058768,
+      "learning_rate": 9.26541947588504e-07,
+      "loss": 0.6538,
+      "step": 62410
+    },
+    {
+      "epoch": 2.4687061243054043,
+      "grad_norm": 1.7470968860875724,
+      "learning_rate": 9.252080455181323e-07,
+      "loss": 0.6403,
+      "step": 62420
+    },
+    {
+      "epoch": 2.4691016235242937,
+      "grad_norm": 1.5989633140410042,
+      "learning_rate": 9.238750064188873e-07,
+      "loss": 0.658,
+      "step": 62430
+    },
+    {
+      "epoch": 2.4694971227431823,
+      "grad_norm": 1.4565099791856557,
+      "learning_rate": 9.225428305730822e-07,
+      "loss": 0.6592,
+      "step": 62440
+    },
+    {
+      "epoch": 2.469892621962072,
+      "grad_norm": 1.4378637988652545,
+      "learning_rate": 9.212115182628506e-07,
+      "loss": 0.637,
+      "step": 62450
+    },
+    {
+      "epoch": 2.470288121180961,
+      "grad_norm": 1.5468024163108722,
+      "learning_rate": 9.1988106977014e-07,
+      "loss": 0.6547,
+      "step": 62460
+    },
+    {
+      "epoch": 2.47068362039985,
+      "grad_norm": 1.3753385287803068,
+      "learning_rate": 9.185514853767197e-07,
+      "loss": 0.6751,
+      "step": 62470
+    },
+    {
+      "epoch": 2.471079119618739,
+      "grad_norm": 1.1817099948942396,
+      "learning_rate": 9.172227653641686e-07,
+      "loss": 0.6297,
+      "step": 62480
+    },
+    {
+      "epoch": 2.471474618837628,
+      "grad_norm": 1.5115803225740767,
+      "learning_rate": 9.158949100138897e-07,
+      "loss": 0.6344,
+      "step": 62490
+    },
+    {
+      "epoch": 2.471870118056517,
+      "grad_norm": 1.6701593994570985,
+      "learning_rate": 9.145679196070978e-07,
+      "loss": 0.6627,
+      "step": 62500
+    },
+    {
+      "epoch": 2.472265617275406,
+      "grad_norm": 1.3432859419678334,
+      "learning_rate": 9.132417944248295e-07,
+      "loss": 0.658,
+      "step": 62510
+    },
+    {
+      "epoch": 2.472661116494295,
+      "grad_norm": 1.7223608956543923,
+      "learning_rate": 9.119165347479331e-07,
+      "loss": 0.6302,
+      "step": 62520
+    },
+    {
+      "epoch": 2.473056615713184,
+      "grad_norm": 1.3084200109269424,
+      "learning_rate": 9.105921408570778e-07,
+      "loss": 0.6594,
+      "step": 62530
+    },
+    {
+      "epoch": 2.473452114932073,
+      "grad_norm": 1.737087609201176,
+      "learning_rate": 9.092686130327455e-07,
+      "loss": 0.6528,
+      "step": 62540
+    },
+    {
+      "epoch": 2.473847614150962,
+      "grad_norm": 1.4305464176140446,
+      "learning_rate": 9.079459515552385e-07,
+      "loss": 0.6803,
+      "step": 62550
+    },
+    {
+      "epoch": 2.4742431133698513,
+      "grad_norm": 1.429248974699674,
+      "learning_rate": 9.066241567046724e-07,
+      "loss": 0.6401,
+      "step": 62560
+    },
+    {
+      "epoch": 2.4746386125887403,
+      "grad_norm": 1.4025526118836549,
+      "learning_rate": 9.053032287609825e-07,
+      "loss": 0.6517,
+      "step": 62570
+    },
+    {
+      "epoch": 2.4750341118076293,
+      "grad_norm": 1.667657266035966,
+      "learning_rate": 9.039831680039162e-07,
+      "loss": 0.6217,
+      "step": 62580
+    },
+    {
+      "epoch": 2.4754296110265184,
+      "grad_norm": 1.6871116170448464,
+      "learning_rate": 9.026639747130444e-07,
+      "loss": 0.6623,
+      "step": 62590
+    },
+    {
+      "epoch": 2.4758251102454074,
+      "grad_norm": 1.552351408301868,
+      "learning_rate": 9.013456491677431e-07,
+      "loss": 0.6752,
+      "step": 62600
+    },
+    {
+      "epoch": 2.4762206094642965,
+      "grad_norm": 1.5074626328497924,
+      "learning_rate": 9.000281916472159e-07,
+      "loss": 0.6335,
+      "step": 62610
+    },
+    {
+      "epoch": 2.4766161086831855,
+      "grad_norm": 1.3484547178956428,
+      "learning_rate": 8.987116024304754e-07,
+      "loss": 0.6698,
+      "step": 62620
+    },
+    {
+      "epoch": 2.4770116079020745,
+      "grad_norm": 1.3605788799841034,
+      "learning_rate": 8.973958817963541e-07,
+      "loss": 0.6492,
+      "step": 62630
+    },
+    {
+      "epoch": 2.4774071071209636,
+      "grad_norm": 1.4256248434597538,
+      "learning_rate": 8.960810300234973e-07,
+      "loss": 0.6488,
+      "step": 62640
+    },
+    {
+      "epoch": 2.4778026063398526,
+      "grad_norm": 1.5857225928372,
+      "learning_rate": 8.947670473903708e-07,
+      "loss": 0.678,
+      "step": 62650
+    },
+    {
+      "epoch": 2.4781981055587416,
+      "grad_norm": 1.3916001682373047,
+      "learning_rate": 8.934539341752518e-07,
+      "loss": 0.6202,
+      "step": 62660
+    },
+    {
+      "epoch": 2.4785936047776307,
+      "grad_norm": 1.5879844648146813,
+      "learning_rate": 8.921416906562342e-07,
+      "loss": 0.6839,
+      "step": 62670
+    },
+    {
+      "epoch": 2.4789891039965197,
+      "grad_norm": 1.610584180477446,
+      "learning_rate": 8.90830317111231e-07,
+      "loss": 0.657,
+      "step": 62680
+    },
+    {
+      "epoch": 2.4793846032154088,
+      "grad_norm": 1.462989022813284,
+      "learning_rate": 8.895198138179662e-07,
+      "loss": 0.661,
+      "step": 62690
+    },
+    {
+      "epoch": 2.479780102434298,
+      "grad_norm": 1.346729610438674,
+      "learning_rate": 8.88210181053985e-07,
+      "loss": 0.6347,
+      "step": 62700
+    },
+    {
+      "epoch": 2.480175601653187,
+      "grad_norm": 1.404694002954589,
+      "learning_rate": 8.869014190966419e-07,
+      "loss": 0.6651,
+      "step": 62710
+    },
+    {
+      "epoch": 2.480571100872076,
+      "grad_norm": 1.4416693004426953,
+      "learning_rate": 8.855935282231148e-07,
+      "loss": 0.6374,
+      "step": 62720
+    },
+    {
+      "epoch": 2.480966600090965,
+      "grad_norm": 1.3913758857778433,
+      "learning_rate": 8.842865087103863e-07,
+      "loss": 0.6239,
+      "step": 62730
+    },
+    {
+      "epoch": 2.481362099309854,
+      "grad_norm": 1.440279853761296,
+      "learning_rate": 8.82980360835266e-07,
+      "loss": 0.6681,
+      "step": 62740
+    },
+    {
+      "epoch": 2.481757598528743,
+      "grad_norm": 1.5505994266108365,
+      "learning_rate": 8.816750848743705e-07,
+      "loss": 0.6532,
+      "step": 62750
+    },
+    {
+      "epoch": 2.482153097747632,
+      "grad_norm": 2.1636942681583813,
+      "learning_rate": 8.803706811041373e-07,
+      "loss": 0.6201,
+      "step": 62760
+    },
+    {
+      "epoch": 2.482548596966521,
+      "grad_norm": 1.7695730589459897,
+      "learning_rate": 8.790671498008141e-07,
+      "loss": 0.6165,
+      "step": 62770
+    },
+    {
+      "epoch": 2.48294409618541,
+      "grad_norm": 1.4994018571311274,
+      "learning_rate": 8.777644912404693e-07,
+      "loss": 0.6417,
+      "step": 62780
+    },
+    {
+      "epoch": 2.483339595404299,
+      "grad_norm": 1.176276158836899,
+      "learning_rate": 8.76462705698981e-07,
+      "loss": 0.6257,
+      "step": 62790
+    },
+    {
+      "epoch": 2.483735094623188,
+      "grad_norm": 1.4198950469951166,
+      "learning_rate": 8.751617934520479e-07,
+      "loss": 0.633,
+      "step": 62800
+    },
+    {
+      "epoch": 2.4841305938420772,
+      "grad_norm": 1.5267975687121929,
+      "learning_rate": 8.738617547751776e-07,
+      "loss": 0.6494,
+      "step": 62810
+    },
+    {
+      "epoch": 2.4845260930609663,
+      "grad_norm": 1.4114562111610234,
+      "learning_rate": 8.725625899436996e-07,
+      "loss": 0.6267,
+      "step": 62820
+    },
+    {
+      "epoch": 2.4849215922798553,
+      "grad_norm": 1.7341174176053717,
+      "learning_rate": 8.71264299232753e-07,
+      "loss": 0.6501,
+      "step": 62830
+    },
+    {
+      "epoch": 2.4853170914987444,
+      "grad_norm": 1.5297476215036843,
+      "learning_rate": 8.699668829172936e-07,
+      "loss": 0.6792,
+      "step": 62840
+    },
+    {
+      "epoch": 2.4857125907176334,
+      "grad_norm": 1.585368319975606,
+      "learning_rate": 8.686703412720904e-07,
+      "loss": 0.6261,
+      "step": 62850
+    },
+    {
+      "epoch": 2.4861080899365224,
+      "grad_norm": 1.430888819363717,
+      "learning_rate": 8.673746745717321e-07,
+      "loss": 0.6668,
+      "step": 62860
+    },
+    {
+      "epoch": 2.4865035891554115,
+      "grad_norm": 1.309847223506252,
+      "learning_rate": 8.660798830906148e-07,
+      "loss": 0.6486,
+      "step": 62870
+    },
+    {
+      "epoch": 2.4868990883743005,
+      "grad_norm": 1.6277582586387795,
+      "learning_rate": 8.647859671029568e-07,
+      "loss": 0.6608,
+      "step": 62880
+    },
+    {
+      "epoch": 2.4872945875931896,
+      "grad_norm": 1.5101654507004232,
+      "learning_rate": 8.634929268827852e-07,
+      "loss": 0.6679,
+      "step": 62890
+    },
+    {
+      "epoch": 2.4876900868120786,
+      "grad_norm": 1.3435015396226426,
+      "learning_rate": 8.622007627039436e-07,
+      "loss": 0.6634,
+      "step": 62900
+    },
+    {
+      "epoch": 2.4880855860309676,
+      "grad_norm": 1.6139767103014016,
+      "learning_rate": 8.60909474840092e-07,
+      "loss": 0.6746,
+      "step": 62910
+    },
+    {
+      "epoch": 2.4884810852498567,
+      "grad_norm": 1.3277232870043423,
+      "learning_rate": 8.596190635647006e-07,
+      "loss": 0.682,
+      "step": 62920
+    },
+    {
+      "epoch": 2.4888765844687457,
+      "grad_norm": 1.458653472983833,
+      "learning_rate": 8.583295291510591e-07,
+      "loss": 0.638,
+      "step": 62930
+    },
+    {
+      "epoch": 2.4892720836876348,
+      "grad_norm": 1.2215171501564577,
+      "learning_rate": 8.570408718722656e-07,
+      "loss": 0.6495,
+      "step": 62940
+    },
+    {
+      "epoch": 2.489667582906524,
+      "grad_norm": 1.5068448611067728,
+      "learning_rate": 8.557530920012396e-07,
+      "loss": 0.6619,
+      "step": 62950
+    },
+    {
+      "epoch": 2.490063082125413,
+      "grad_norm": 1.3211681548482772,
+      "learning_rate": 8.544661898107081e-07,
+      "loss": 0.6537,
+      "step": 62960
+    },
+    {
+      "epoch": 2.490458581344302,
+      "grad_norm": 1.4480668111754622,
+      "learning_rate": 8.531801655732158e-07,
+      "loss": 0.6267,
+      "step": 62970
+    },
+    {
+      "epoch": 2.490854080563191,
+      "grad_norm": 1.5213505338685824,
+      "learning_rate": 8.518950195611192e-07,
+      "loss": 0.6482,
+      "step": 62980
+    },
+    {
+      "epoch": 2.49124957978208,
+      "grad_norm": 1.4109406820508017,
+      "learning_rate": 8.506107520465923e-07,
+      "loss": 0.679,
+      "step": 62990
+    },
+    {
+      "epoch": 2.491645079000969,
+      "grad_norm": 1.7065792918642062,
+      "learning_rate": 8.493273633016186e-07,
+      "loss": 0.6331,
+      "step": 63000
+    },
+    {
+      "epoch": 2.492040578219858,
+      "grad_norm": 1.238981061339666,
+      "learning_rate": 8.480448535980008e-07,
+      "loss": 0.6905,
+      "step": 63010
+    },
+    {
+      "epoch": 2.492436077438747,
+      "grad_norm": 1.4103121127250444,
+      "learning_rate": 8.467632232073497e-07,
+      "loss": 0.6741,
+      "step": 63020
+    },
+    {
+      "epoch": 2.492831576657636,
+      "grad_norm": 1.474332623979177,
+      "learning_rate": 8.454824724010951e-07,
+      "loss": 0.6304,
+      "step": 63030
+    },
+    {
+      "epoch": 2.493227075876525,
+      "grad_norm": 1.6107543437688416,
+      "learning_rate": 8.442026014504756e-07,
+      "loss": 0.6067,
+      "step": 63040
+    },
+    {
+      "epoch": 2.493622575095414,
+      "grad_norm": 1.2994985351397115,
+      "learning_rate": 8.429236106265482e-07,
+      "loss": 0.6561,
+      "step": 63050
+    },
+    {
+      "epoch": 2.4940180743143032,
+      "grad_norm": 1.2811967071546007,
+      "learning_rate": 8.416455002001806e-07,
+      "loss": 0.6239,
+      "step": 63060
+    },
+    {
+      "epoch": 2.4944135735331923,
+      "grad_norm": 1.6154029237172847,
+      "learning_rate": 8.40368270442053e-07,
+      "loss": 0.6649,
+      "step": 63070
+    },
+    {
+      "epoch": 2.4948090727520813,
+      "grad_norm": 1.430999136350645,
+      "learning_rate": 8.390919216226634e-07,
+      "loss": 0.6348,
+      "step": 63080
+    },
+    {
+      "epoch": 2.4952045719709703,
+      "grad_norm": 1.4004336370578108,
+      "learning_rate": 8.378164540123191e-07,
+      "loss": 0.6403,
+      "step": 63090
+    },
+    {
+      "epoch": 2.4956000711898594,
+      "grad_norm": 1.6829120560389035,
+      "learning_rate": 8.365418678811416e-07,
+      "loss": 0.6494,
+      "step": 63100
+    },
+    {
+      "epoch": 2.4959955704087484,
+      "grad_norm": 1.8855314343538596,
+      "learning_rate": 8.352681634990683e-07,
+      "loss": 0.6319,
+      "step": 63110
+    },
+    {
+      "epoch": 2.4963910696276375,
+      "grad_norm": 1.6200870333688646,
+      "learning_rate": 8.339953411358471e-07,
+      "loss": 0.6278,
+      "step": 63120
+    },
+    {
+      "epoch": 2.4967865688465265,
+      "grad_norm": 1.421846388191181,
+      "learning_rate": 8.327234010610385e-07,
+      "loss": 0.6179,
+      "step": 63130
+    },
+    {
+      "epoch": 2.4971820680654155,
+      "grad_norm": 1.5421255745157936,
+      "learning_rate": 8.314523435440203e-07,
+      "loss": 0.6267,
+      "step": 63140
+    },
+    {
+      "epoch": 2.4975775672843046,
+      "grad_norm": 1.6931529358451676,
+      "learning_rate": 8.301821688539785e-07,
+      "loss": 0.6251,
+      "step": 63150
+    },
+    {
+      "epoch": 2.4979730665031936,
+      "grad_norm": 1.4438046039011787,
+      "learning_rate": 8.289128772599159e-07,
+      "loss": 0.6507,
+      "step": 63160
+    },
+    {
+      "epoch": 2.4983685657220827,
+      "grad_norm": 1.5983460025928997,
+      "learning_rate": 8.276444690306451e-07,
+      "loss": 0.6626,
+      "step": 63170
+    },
+    {
+      "epoch": 2.4987640649409717,
+      "grad_norm": 1.7811642513131678,
+      "learning_rate": 8.263769444347957e-07,
+      "loss": 0.62,
+      "step": 63180
+    },
+    {
+      "epoch": 2.4991595641598607,
+      "grad_norm": 1.420844575349242,
+      "learning_rate": 8.251103037408043e-07,
+      "loss": 0.6636,
+      "step": 63190
+    },
+    {
+      "epoch": 2.4995550633787498,
+      "grad_norm": 1.895786180603178,
+      "learning_rate": 8.238445472169271e-07,
+      "loss": 0.6418,
+      "step": 63200
+    },
+    {
+      "epoch": 2.499950562597639,
+      "grad_norm": 1.3154920560413872,
+      "learning_rate": 8.225796751312282e-07,
+      "loss": 0.6348,
+      "step": 63210
+    },
+    {
+      "epoch": 2.500346061816528,
+      "grad_norm": 1.5507445316264594,
+      "learning_rate": 8.21315687751586e-07,
+      "loss": 0.6517,
+      "step": 63220
+    },
+    {
+      "epoch": 2.500741561035417,
+      "grad_norm": 1.3246659817953341,
+      "learning_rate": 8.200525853456892e-07,
+      "loss": 0.6591,
+      "step": 63230
+    },
+    {
+      "epoch": 2.501137060254306,
+      "grad_norm": 1.5009986996407279,
+      "learning_rate": 8.187903681810449e-07,
+      "loss": 0.6518,
+      "step": 63240
+    },
+    {
+      "epoch": 2.501532559473195,
+      "grad_norm": 1.3898452371231547,
+      "learning_rate": 8.17529036524966e-07,
+      "loss": 0.6702,
+      "step": 63250
+    },
+    {
+      "epoch": 2.501928058692084,
+      "grad_norm": 1.3773557734199384,
+      "learning_rate": 8.162685906445833e-07,
+      "loss": 0.636,
+      "step": 63260
+    },
+    {
+      "epoch": 2.502323557910973,
+      "grad_norm": 1.6405808013267522,
+      "learning_rate": 8.150090308068359e-07,
+      "loss": 0.678,
+      "step": 63270
+    },
+    {
+      "epoch": 2.502719057129862,
+      "grad_norm": 1.870128765345886,
+      "learning_rate": 8.137503572784782e-07,
+      "loss": 0.6503,
+      "step": 63280
+    },
+    {
+      "epoch": 2.503114556348751,
+      "grad_norm": 1.491973614542632,
+      "learning_rate": 8.12492570326075e-07,
+      "loss": 0.6489,
+      "step": 63290
+    },
+    {
+      "epoch": 2.50351005556764,
+      "grad_norm": 1.647248289622151,
+      "learning_rate": 8.112356702160046e-07,
+      "loss": 0.667,
+      "step": 63300
+    },
+    {
+      "epoch": 2.503905554786529,
+      "grad_norm": 1.4382096851185189,
+      "learning_rate": 8.09979657214457e-07,
+      "loss": 0.6597,
+      "step": 63310
+    },
+    {
+      "epoch": 2.5043010540054182,
+      "grad_norm": 1.4127706331466097,
+      "learning_rate": 8.08724531587432e-07,
+      "loss": 0.6322,
+      "step": 63320
+    },
+    {
+      "epoch": 2.5046965532243073,
+      "grad_norm": 1.8051513070023004,
+      "learning_rate": 8.074702936007472e-07,
+      "loss": 0.6568,
+      "step": 63330
+    },
+    {
+      "epoch": 2.5050920524431963,
+      "grad_norm": 1.8887772380015515,
+      "learning_rate": 8.062169435200267e-07,
+      "loss": 0.6395,
+      "step": 63340
+    },
+    {
+      "epoch": 2.5054875516620854,
+      "grad_norm": 1.5912545429694345,
+      "learning_rate": 8.049644816107094e-07,
+      "loss": 0.5802,
+      "step": 63350
+    },
+    {
+      "epoch": 2.5058830508809744,
+      "grad_norm": 1.6595292247544349,
+      "learning_rate": 8.037129081380429e-07,
+      "loss": 0.6536,
+      "step": 63360
+    },
+    {
+      "epoch": 2.5062785500998634,
+      "grad_norm": 1.4623974840906868,
+      "learning_rate": 8.024622233670926e-07,
+      "loss": 0.6474,
+      "step": 63370
+    },
+    {
+      "epoch": 2.5066740493187525,
+      "grad_norm": 1.4673617443372873,
+      "learning_rate": 8.012124275627292e-07,
+      "loss": 0.6596,
+      "step": 63380
+    },
+    {
+      "epoch": 2.5070695485376415,
+      "grad_norm": 1.7589369671854493,
+      "learning_rate": 7.9996352098964e-07,
+      "loss": 0.6719,
+      "step": 63390
+    },
+    {
+      "epoch": 2.5074650477565306,
+      "grad_norm": 1.216763752073658,
+      "learning_rate": 7.987155039123201e-07,
+      "loss": 0.6581,
+      "step": 63400
+    },
+    {
+      "epoch": 2.5078605469754196,
+      "grad_norm": 1.6247504919023326,
+      "learning_rate": 7.974683765950808e-07,
+      "loss": 0.6476,
+      "step": 63410
+    },
+    {
+      "epoch": 2.5082560461943086,
+      "grad_norm": 1.6316927824068348,
+      "learning_rate": 7.962221393020392e-07,
+      "loss": 0.6482,
+      "step": 63420
+    },
+    {
+      "epoch": 2.5086515454131977,
+      "grad_norm": 1.523106900589352,
+      "learning_rate": 7.949767922971302e-07,
+      "loss": 0.6467,
+      "step": 63430
+    },
+    {
+      "epoch": 2.5090470446320867,
+      "grad_norm": 1.5718266179477145,
+      "learning_rate": 7.937323358440935e-07,
+      "loss": 0.6564,
+      "step": 63440
+    },
+    {
+      "epoch": 2.5094425438509758,
+      "grad_norm": 1.6197619057117363,
+      "learning_rate": 7.924887702064882e-07,
+      "loss": 0.6044,
+      "step": 63450
+    },
+    {
+      "epoch": 2.509838043069865,
+      "grad_norm": 1.6228221150341298,
+      "learning_rate": 7.912460956476753e-07,
+      "loss": 0.6674,
+      "step": 63460
+    },
+    {
+      "epoch": 2.510233542288754,
+      "grad_norm": 1.4876497457779427,
+      "learning_rate": 7.900043124308354e-07,
+      "loss": 0.6816,
+      "step": 63470
+    },
+    {
+      "epoch": 2.510629041507643,
+      "grad_norm": 1.653460457016791,
+      "learning_rate": 7.887634208189543e-07,
+      "loss": 0.654,
+      "step": 63480
+    },
+    {
+      "epoch": 2.511024540726532,
+      "grad_norm": 1.3788636207437148,
+      "learning_rate": 7.875234210748339e-07,
+      "loss": 0.6551,
+      "step": 63490
+    },
+    {
+      "epoch": 2.511420039945421,
+      "grad_norm": 1.3921040566769074,
+      "learning_rate": 7.862843134610832e-07,
+      "loss": 0.6709,
+      "step": 63500
+    },
+    {
+      "epoch": 2.51181553916431,
+      "grad_norm": 1.469479419151567,
+      "learning_rate": 7.850460982401259e-07,
+      "loss": 0.6693,
+      "step": 63510
+    },
+    {
+      "epoch": 2.512211038383199,
+      "grad_norm": 1.8575011969455981,
+      "learning_rate": 7.838087756741919e-07,
+      "loss": 0.6254,
+      "step": 63520
+    },
+    {
+      "epoch": 2.512606537602088,
+      "grad_norm": 1.4649251746976057,
+      "learning_rate": 7.825723460253282e-07,
+      "loss": 0.6306,
+      "step": 63530
+    },
+    {
+      "epoch": 2.513002036820977,
+      "grad_norm": 1.6339690068494417,
+      "learning_rate": 7.81336809555388e-07,
+      "loss": 0.6609,
+      "step": 63540
+    },
+    {
+      "epoch": 2.513397536039866,
+      "grad_norm": 1.1678016073328654,
+      "learning_rate": 7.801021665260355e-07,
+      "loss": 0.6617,
+      "step": 63550
+    },
+    {
+      "epoch": 2.5137930352587556,
+      "grad_norm": 1.5747655237636606,
+      "learning_rate": 7.788684171987487e-07,
+      "loss": 0.6253,
+      "step": 63560
+    },
+    {
+      "epoch": 2.5141885344776442,
+      "grad_norm": 1.5584210004059487,
+      "learning_rate": 7.776355618348142e-07,
+      "loss": 0.6566,
+      "step": 63570
+    },
+    {
+      "epoch": 2.5145840336965337,
+      "grad_norm": 1.3341122680843094,
+      "learning_rate": 7.764036006953285e-07,
+      "loss": 0.6715,
+      "step": 63580
+    },
+    {
+      "epoch": 2.5149795329154223,
+      "grad_norm": 1.5556587834951154,
+      "learning_rate": 7.751725340412014e-07,
+      "loss": 0.6418,
+      "step": 63590
+    },
+    {
+      "epoch": 2.515375032134312,
+      "grad_norm": 1.3525840074132167,
+      "learning_rate": 7.73942362133151e-07,
+      "loss": 0.6608,
+      "step": 63600
+    },
+    {
+      "epoch": 2.5157705313532004,
+      "grad_norm": 1.4780082058622266,
+      "learning_rate": 7.72713085231706e-07,
+      "loss": 0.6422,
+      "step": 63610
+    },
+    {
+      "epoch": 2.51616603057209,
+      "grad_norm": 1.2975374943721156,
+      "learning_rate": 7.714847035972078e-07,
+      "loss": 0.6751,
+      "step": 63620
+    },
+    {
+      "epoch": 2.5165615297909785,
+      "grad_norm": 1.2297419535072853,
+      "learning_rate": 7.702572174898043e-07,
+      "loss": 0.6747,
+      "step": 63630
+    },
+    {
+      "epoch": 2.516957029009868,
+      "grad_norm": 1.7581706604975569,
+      "learning_rate": 7.690306271694587e-07,
+      "loss": 0.6347,
+      "step": 63640
+    },
+    {
+      "epoch": 2.5173525282287565,
+      "grad_norm": 1.3210786059057584,
+      "learning_rate": 7.678049328959391e-07,
+      "loss": 0.644,
+      "step": 63650
+    },
+    {
+      "epoch": 2.517748027447646,
+      "grad_norm": 1.4356136901919918,
+      "learning_rate": 7.665801349288294e-07,
+      "loss": 0.6156,
+      "step": 63660
+    },
+    {
+      "epoch": 2.5181435266665346,
+      "grad_norm": 1.855447833296287,
+      "learning_rate": 7.653562335275183e-07,
+      "loss": 0.6433,
+      "step": 63670
+    },
+    {
+      "epoch": 2.518539025885424,
+      "grad_norm": 1.4234518359546702,
+      "learning_rate": 7.641332289512094e-07,
+      "loss": 0.6532,
+      "step": 63680
+    },
+    {
+      "epoch": 2.5189345251043127,
+      "grad_norm": 1.606622494532379,
+      "learning_rate": 7.629111214589114e-07,
+      "loss": 0.6306,
+      "step": 63690
+    },
+    {
+      "epoch": 2.519330024323202,
+      "grad_norm": 1.3417351965958764,
+      "learning_rate": 7.616899113094495e-07,
+      "loss": 0.6412,
+      "step": 63700
+    },
+    {
+      "epoch": 2.519725523542091,
+      "grad_norm": 1.4463619080999224,
+      "learning_rate": 7.604695987614508e-07,
+      "loss": 0.6473,
+      "step": 63710
+    },
+    {
+      "epoch": 2.5201210227609803,
+      "grad_norm": 1.5176804577662928,
+      "learning_rate": 7.592501840733602e-07,
+      "loss": 0.6089,
+      "step": 63720
+    },
+    {
+      "epoch": 2.520516521979869,
+      "grad_norm": 1.1632054246897647,
+      "learning_rate": 7.580316675034255e-07,
+      "loss": 0.6658,
+      "step": 63730
+    },
+    {
+      "epoch": 2.5209120211987583,
+      "grad_norm": 1.652567896807858,
+      "learning_rate": 7.56814049309711e-07,
+      "loss": 0.6341,
+      "step": 63740
+    },
+    {
+      "epoch": 2.521307520417647,
+      "grad_norm": 1.6090079031867557,
+      "learning_rate": 7.555973297500841e-07,
+      "loss": 0.6301,
+      "step": 63750
+    },
+    {
+      "epoch": 2.5217030196365364,
+      "grad_norm": 1.5197996184173634,
+      "learning_rate": 7.543815090822288e-07,
+      "loss": 0.6535,
+      "step": 63760
+    },
+    {
+      "epoch": 2.522098518855425,
+      "grad_norm": 1.3457718741471285,
+      "learning_rate": 7.531665875636324e-07,
+      "loss": 0.673,
+      "step": 63770
+    },
+    {
+      "epoch": 2.5224940180743145,
+      "grad_norm": 1.3066599926141607,
+      "learning_rate": 7.519525654515941e-07,
+      "loss": 0.6569,
+      "step": 63780
+    },
+    {
+      "epoch": 2.522889517293203,
+      "grad_norm": 1.4335640762244388,
+      "learning_rate": 7.507394430032255e-07,
+      "loss": 0.6321,
+      "step": 63790
+    },
+    {
+      "epoch": 2.5232850165120926,
+      "grad_norm": 1.4828753579370653,
+      "learning_rate": 7.495272204754428e-07,
+      "loss": 0.6175,
+      "step": 63800
+    },
+    {
+      "epoch": 2.523680515730981,
+      "grad_norm": 1.7804579988153775,
+      "learning_rate": 7.483158981249761e-07,
+      "loss": 0.6413,
+      "step": 63810
+    },
+    {
+      "epoch": 2.5240760149498707,
+      "grad_norm": 1.5587827224832627,
+      "learning_rate": 7.471054762083608e-07,
+      "loss": 0.6523,
+      "step": 63820
+    },
+    {
+      "epoch": 2.5244715141687593,
+      "grad_norm": 1.242734199945498,
+      "learning_rate": 7.45895954981945e-07,
+      "loss": 0.6569,
+      "step": 63830
+    },
+    {
+      "epoch": 2.5248670133876487,
+      "grad_norm": 1.1548645219013924,
+      "learning_rate": 7.446873347018824e-07,
+      "loss": 0.6389,
+      "step": 63840
+    },
+    {
+      "epoch": 2.5252625126065373,
+      "grad_norm": 1.390645330296244,
+      "learning_rate": 7.434796156241403e-07,
+      "loss": 0.6637,
+      "step": 63850
+    },
+    {
+      "epoch": 2.525658011825427,
+      "grad_norm": 1.4611925248930342,
+      "learning_rate": 7.422727980044914e-07,
+      "loss": 0.6433,
+      "step": 63860
+    },
+    {
+      "epoch": 2.5260535110443154,
+      "grad_norm": 1.2388985783426072,
+      "learning_rate": 7.410668820985206e-07,
+      "loss": 0.6327,
+      "step": 63870
+    },
+    {
+      "epoch": 2.526449010263205,
+      "grad_norm": 1.4914892348947144,
+      "learning_rate": 7.398618681616182e-07,
+      "loss": 0.6289,
+      "step": 63880
+    },
+    {
+      "epoch": 2.526844509482094,
+      "grad_norm": 1.4462997489650091,
+      "learning_rate": 7.386577564489877e-07,
+      "loss": 0.6535,
+      "step": 63890
+    },
+    {
+      "epoch": 2.527240008700983,
+      "grad_norm": 1.5843617688194136,
+      "learning_rate": 7.374545472156374e-07,
+      "loss": 0.6201,
+      "step": 63900
+    },
+    {
+      "epoch": 2.527635507919872,
+      "grad_norm": 1.3444249863623576,
+      "learning_rate": 7.36252240716388e-07,
+      "loss": 0.6341,
+      "step": 63910
+    },
+    {
+      "epoch": 2.528031007138761,
+      "grad_norm": 1.5466589150827639,
+      "learning_rate": 7.350508372058657e-07,
+      "loss": 0.6555,
+      "step": 63920
+    },
+    {
+      "epoch": 2.52842650635765,
+      "grad_norm": 1.4055301225211925,
+      "learning_rate": 7.338503369385097e-07,
+      "loss": 0.6854,
+      "step": 63930
+    },
+    {
+      "epoch": 2.528822005576539,
+      "grad_norm": 1.5328363216445886,
+      "learning_rate": 7.326507401685634e-07,
+      "loss": 0.6965,
+      "step": 63940
+    },
+    {
+      "epoch": 2.529217504795428,
+      "grad_norm": 1.6822995299394083,
+      "learning_rate": 7.314520471500813e-07,
+      "loss": 0.6335,
+      "step": 63950
+    },
+    {
+      "epoch": 2.529613004014317,
+      "grad_norm": 1.7948228501110086,
+      "learning_rate": 7.302542581369254e-07,
+      "loss": 0.6182,
+      "step": 63960
+    },
+    {
+      "epoch": 2.5300085032332063,
+      "grad_norm": 1.5126533728553917,
+      "learning_rate": 7.290573733827683e-07,
+      "loss": 0.6239,
+      "step": 63970
+    },
+    {
+      "epoch": 2.5304040024520953,
+      "grad_norm": 1.4597173872060147,
+      "learning_rate": 7.278613931410877e-07,
+      "loss": 0.6487,
+      "step": 63980
+    },
+    {
+      "epoch": 2.5307995016709843,
+      "grad_norm": 1.4037056788550275,
+      "learning_rate": 7.266663176651739e-07,
+      "loss": 0.6566,
+      "step": 63990
+    },
+    {
+      "epoch": 2.5311950008898734,
+      "grad_norm": 1.3656886098733774,
+      "learning_rate": 7.254721472081227e-07,
+      "loss": 0.6514,
+      "step": 64000
+    },
+    {
+      "epoch": 2.5315905001087624,
+      "grad_norm": 1.7648473525926431,
+      "learning_rate": 7.242788820228381e-07,
+      "loss": 0.6673,
+      "step": 64010
+    },
+    {
+      "epoch": 2.5319859993276514,
+      "grad_norm": 1.4951716781466082,
+      "learning_rate": 7.230865223620342e-07,
+      "loss": 0.6656,
+      "step": 64020
+    },
+    {
+      "epoch": 2.5323814985465405,
+      "grad_norm": 1.518394802150982,
+      "learning_rate": 7.218950684782311e-07,
+      "loss": 0.6527,
+      "step": 64030
+    },
+    {
+      "epoch": 2.5327769977654295,
+      "grad_norm": 1.6029453925841384,
+      "learning_rate": 7.207045206237601e-07,
+      "loss": 0.6221,
+      "step": 64040
+    },
+    {
+      "epoch": 2.5331724969843186,
+      "grad_norm": 1.3012678994012925,
+      "learning_rate": 7.195148790507572e-07,
+      "loss": 0.6857,
+      "step": 64050
+    },
+    {
+      "epoch": 2.5335679962032076,
+      "grad_norm": 1.4442843991955365,
+      "learning_rate": 7.18326144011171e-07,
+      "loss": 0.6308,
+      "step": 64060
+    },
+    {
+      "epoch": 2.5339634954220966,
+      "grad_norm": 1.6481146908524857,
+      "learning_rate": 7.171383157567507e-07,
+      "loss": 0.632,
+      "step": 64070
+    },
+    {
+      "epoch": 2.5343589946409857,
+      "grad_norm": 1.481039199847829,
+      "learning_rate": 7.159513945390612e-07,
+      "loss": 0.6455,
+      "step": 64080
+    },
+    {
+      "epoch": 2.5347544938598747,
+      "grad_norm": 1.5901662236972298,
+      "learning_rate": 7.147653806094706e-07,
+      "loss": 0.6808,
+      "step": 64090
+    },
+    {
+      "epoch": 2.5351499930787638,
+      "grad_norm": 1.6810272782322087,
+      "learning_rate": 7.135802742191578e-07,
+      "loss": 0.6347,
+      "step": 64100
+    },
+    {
+      "epoch": 2.535545492297653,
+      "grad_norm": 1.685341120738639,
+      "learning_rate": 7.123960756191056e-07,
+      "loss": 0.6633,
+      "step": 64110
+    },
+    {
+      "epoch": 2.535940991516542,
+      "grad_norm": 1.4389745441286776,
+      "learning_rate": 7.112127850601103e-07,
+      "loss": 0.617,
+      "step": 64120
+    },
+    {
+      "epoch": 2.536336490735431,
+      "grad_norm": 1.3193730233786827,
+      "learning_rate": 7.100304027927696e-07,
+      "loss": 0.6449,
+      "step": 64130
+    },
+    {
+      "epoch": 2.53673198995432,
+      "grad_norm": 1.3535632068787948,
+      "learning_rate": 7.088489290674938e-07,
+      "loss": 0.6603,
+      "step": 64140
+    },
+    {
+      "epoch": 2.537127489173209,
+      "grad_norm": 1.5667684132141804,
+      "learning_rate": 7.076683641344972e-07,
+      "loss": 0.6243,
+      "step": 64150
+    },
+    {
+      "epoch": 2.537522988392098,
+      "grad_norm": 1.6672802724444284,
+      "learning_rate": 7.064887082438049e-07,
+      "loss": 0.6272,
+      "step": 64160
+    },
+    {
+      "epoch": 2.537918487610987,
+      "grad_norm": 1.4388337615207,
+      "learning_rate": 7.053099616452464e-07,
+      "loss": 0.6629,
+      "step": 64170
+    },
+    {
+      "epoch": 2.538313986829876,
+      "grad_norm": 1.666663529523057,
+      "learning_rate": 7.041321245884608e-07,
+      "loss": 0.6876,
+      "step": 64180
+    },
+    {
+      "epoch": 2.538709486048765,
+      "grad_norm": 1.378539536989363,
+      "learning_rate": 7.029551973228943e-07,
+      "loss": 0.6507,
+      "step": 64190
+    },
+    {
+      "epoch": 2.539104985267654,
+      "grad_norm": 1.4746413949833421,
+      "learning_rate": 7.017791800977991e-07,
+      "loss": 0.6922,
+      "step": 64200
+    },
+    {
+      "epoch": 2.539500484486543,
+      "grad_norm": 1.2625828816637943,
+      "learning_rate": 7.006040731622343e-07,
+      "loss": 0.6717,
+      "step": 64210
+    },
+    {
+      "epoch": 2.5398959837054322,
+      "grad_norm": 1.1690945069767034,
+      "learning_rate": 6.994298767650698e-07,
+      "loss": 0.6588,
+      "step": 64220
+    },
+    {
+      "epoch": 2.5402914829243213,
+      "grad_norm": 1.4394871701547498,
+      "learning_rate": 6.982565911549783e-07,
+      "loss": 0.6692,
+      "step": 64230
+    },
+    {
+      "epoch": 2.5406869821432103,
+      "grad_norm": 1.7926294439519617,
+      "learning_rate": 6.970842165804437e-07,
+      "loss": 0.6453,
+      "step": 64240
+    },
+    {
+      "epoch": 2.5410824813620994,
+      "grad_norm": 1.4632823611424701,
+      "learning_rate": 6.959127532897536e-07,
+      "loss": 0.6645,
+      "step": 64250
+    },
+    {
+      "epoch": 2.5414779805809884,
+      "grad_norm": 1.3098492414216825,
+      "learning_rate": 6.947422015310029e-07,
+      "loss": 0.6261,
+      "step": 64260
+    },
+    {
+      "epoch": 2.5418734797998774,
+      "grad_norm": 1.6759362917098144,
+      "learning_rate": 6.935725615520961e-07,
+      "loss": 0.6553,
+      "step": 64270
+    },
+    {
+      "epoch": 2.5422689790187665,
+      "grad_norm": 1.4048591046827679,
+      "learning_rate": 6.924038336007416e-07,
+      "loss": 0.6114,
+      "step": 64280
+    },
+    {
+      "epoch": 2.5426644782376555,
+      "grad_norm": 1.5032320314848409,
+      "learning_rate": 6.912360179244576e-07,
+      "loss": 0.6302,
+      "step": 64290
+    },
+    {
+      "epoch": 2.5430599774565446,
+      "grad_norm": 1.978846624091732,
+      "learning_rate": 6.900691147705652e-07,
+      "loss": 0.6296,
+      "step": 64300
+    },
+    {
+      "epoch": 2.5434554766754336,
+      "grad_norm": 1.4151085709598235,
+      "learning_rate": 6.889031243861982e-07,
+      "loss": 0.609,
+      "step": 64310
+    },
+    {
+      "epoch": 2.5438509758943226,
+      "grad_norm": 1.3592749604610284,
+      "learning_rate": 6.877380470182881e-07,
+      "loss": 0.6357,
+      "step": 64320
+    },
+    {
+      "epoch": 2.5442464751132117,
+      "grad_norm": 1.624499301516054,
+      "learning_rate": 6.865738829135827e-07,
+      "loss": 0.6172,
+      "step": 64330
+    },
+    {
+      "epoch": 2.5446419743321007,
+      "grad_norm": 1.3110736274845878,
+      "learning_rate": 6.854106323186294e-07,
+      "loss": 0.6445,
+      "step": 64340
+    },
+    {
+      "epoch": 2.5450374735509897,
+      "grad_norm": 1.3787268488250088,
+      "learning_rate": 6.842482954797869e-07,
+      "loss": 0.6364,
+      "step": 64350
+    },
+    {
+      "epoch": 2.545432972769879,
+      "grad_norm": 1.4867735525275818,
+      "learning_rate": 6.830868726432161e-07,
+      "loss": 0.6398,
+      "step": 64360
+    },
+    {
+      "epoch": 2.545828471988768,
+      "grad_norm": 1.3799749482140977,
+      "learning_rate": 6.819263640548884e-07,
+      "loss": 0.6412,
+      "step": 64370
+    },
+    {
+      "epoch": 2.546223971207657,
+      "grad_norm": 1.464857903363239,
+      "learning_rate": 6.807667699605775e-07,
+      "loss": 0.6466,
+      "step": 64380
+    },
+    {
+      "epoch": 2.546619470426546,
+      "grad_norm": 1.5043346964684243,
+      "learning_rate": 6.796080906058683e-07,
+      "loss": 0.6763,
+      "step": 64390
+    },
+    {
+      "epoch": 2.547014969645435,
+      "grad_norm": 1.5315236711721354,
+      "learning_rate": 6.784503262361464e-07,
+      "loss": 0.6106,
+      "step": 64400
+    },
+    {
+      "epoch": 2.547410468864324,
+      "grad_norm": 1.49336411608949,
+      "learning_rate": 6.772934770966089e-07,
+      "loss": 0.6599,
+      "step": 64410
+    },
+    {
+      "epoch": 2.547805968083213,
+      "grad_norm": 1.617251759033168,
+      "learning_rate": 6.761375434322559e-07,
+      "loss": 0.6686,
+      "step": 64420
+    },
+    {
+      "epoch": 2.548201467302102,
+      "grad_norm": 1.4681605835162652,
+      "learning_rate": 6.749825254878933e-07,
+      "loss": 0.6697,
+      "step": 64430
+    },
+    {
+      "epoch": 2.548596966520991,
+      "grad_norm": 1.5569685125586523,
+      "learning_rate": 6.738284235081344e-07,
+      "loss": 0.6736,
+      "step": 64440
+    },
+    {
+      "epoch": 2.54899246573988,
+      "grad_norm": 1.5922001412111684,
+      "learning_rate": 6.72675237737399e-07,
+      "loss": 0.6853,
+      "step": 64450
+    },
+    {
+      "epoch": 2.549387964958769,
+      "grad_norm": 1.5651530357971513,
+      "learning_rate": 6.715229684199109e-07,
+      "loss": 0.6976,
+      "step": 64460
+    },
+    {
+      "epoch": 2.549783464177658,
+      "grad_norm": 1.483646448211547,
+      "learning_rate": 6.703716157997031e-07,
+      "loss": 0.6324,
+      "step": 64470
+    },
+    {
+      "epoch": 2.5501789633965473,
+      "grad_norm": 1.6781225982285821,
+      "learning_rate": 6.692211801206111e-07,
+      "loss": 0.638,
+      "step": 64480
+    },
+    {
+      "epoch": 2.5505744626154363,
+      "grad_norm": 1.4220896570869705,
+      "learning_rate": 6.680716616262762e-07,
+      "loss": 0.6718,
+      "step": 64490
+    },
+    {
+      "epoch": 2.5509699618343253,
+      "grad_norm": 1.330645928287722,
+      "learning_rate": 6.66923060560149e-07,
+      "loss": 0.618,
+      "step": 64500
+    },
+    {
+      "epoch": 2.5513654610532144,
+      "grad_norm": 1.5587740110969288,
+      "learning_rate": 6.657753771654812e-07,
+      "loss": 0.6258,
+      "step": 64510
+    },
+    {
+      "epoch": 2.5517609602721034,
+      "grad_norm": 1.4081390674616483,
+      "learning_rate": 6.646286116853351e-07,
+      "loss": 0.6385,
+      "step": 64520
+    },
+    {
+      "epoch": 2.5521564594909925,
+      "grad_norm": 1.7677420048528714,
+      "learning_rate": 6.634827643625735e-07,
+      "loss": 0.6206,
+      "step": 64530
+    },
+    {
+      "epoch": 2.5525519587098815,
+      "grad_norm": 1.285515909697965,
+      "learning_rate": 6.623378354398691e-07,
+      "loss": 0.6588,
+      "step": 64540
+    },
+    {
+      "epoch": 2.5529474579287705,
+      "grad_norm": 1.4981960539225139,
+      "learning_rate": 6.611938251596978e-07,
+      "loss": 0.6319,
+      "step": 64550
+    },
+    {
+      "epoch": 2.5533429571476596,
+      "grad_norm": 1.280442623280671,
+      "learning_rate": 6.600507337643414e-07,
+      "loss": 0.6068,
+      "step": 64560
+    },
+    {
+      "epoch": 2.5537384563665486,
+      "grad_norm": 1.3930178025558675,
+      "learning_rate": 6.589085614958852e-07,
+      "loss": 0.644,
+      "step": 64570
+    },
+    {
+      "epoch": 2.5541339555854377,
+      "grad_norm": 1.4492186047061981,
+      "learning_rate": 6.57767308596225e-07,
+      "loss": 0.6498,
+      "step": 64580
+    },
+    {
+      "epoch": 2.5545294548043267,
+      "grad_norm": 1.592177634645699,
+      "learning_rate": 6.566269753070547e-07,
+      "loss": 0.6156,
+      "step": 64590
+    },
+    {
+      "epoch": 2.5549249540232157,
+      "grad_norm": 1.413915533106512,
+      "learning_rate": 6.554875618698813e-07,
+      "loss": 0.6319,
+      "step": 64600
+    },
+    {
+      "epoch": 2.5553204532421048,
+      "grad_norm": 1.691402239662337,
+      "learning_rate": 6.543490685260107e-07,
+      "loss": 0.6282,
+      "step": 64610
+    },
+    {
+      "epoch": 2.555715952460994,
+      "grad_norm": 1.4182407924117755,
+      "learning_rate": 6.532114955165569e-07,
+      "loss": 0.6222,
+      "step": 64620
+    },
+    {
+      "epoch": 2.556111451679883,
+      "grad_norm": 1.4428935534445777,
+      "learning_rate": 6.520748430824381e-07,
+      "loss": 0.6563,
+      "step": 64630
+    },
+    {
+      "epoch": 2.556506950898772,
+      "grad_norm": 1.375222417798048,
+      "learning_rate": 6.509391114643787e-07,
+      "loss": 0.6313,
+      "step": 64640
+    },
+    {
+      "epoch": 2.556902450117661,
+      "grad_norm": 1.7151935067741564,
+      "learning_rate": 6.49804300902907e-07,
+      "loss": 0.6186,
+      "step": 64650
+    },
+    {
+      "epoch": 2.55729794933655,
+      "grad_norm": 1.5579021075814052,
+      "learning_rate": 6.486704116383547e-07,
+      "loss": 0.631,
+      "step": 64660
+    },
+    {
+      "epoch": 2.557693448555439,
+      "grad_norm": 1.305896274034207,
+      "learning_rate": 6.475374439108623e-07,
+      "loss": 0.6564,
+      "step": 64670
+    },
+    {
+      "epoch": 2.558088947774328,
+      "grad_norm": 1.6039936468718605,
+      "learning_rate": 6.464053979603718e-07,
+      "loss": 0.6005,
+      "step": 64680
+    },
+    {
+      "epoch": 2.558484446993217,
+      "grad_norm": 1.5820520144580468,
+      "learning_rate": 6.45274274026631e-07,
+      "loss": 0.6261,
+      "step": 64690
+    },
+    {
+      "epoch": 2.558879946212106,
+      "grad_norm": 1.7683693120383075,
+      "learning_rate": 6.441440723491938e-07,
+      "loss": 0.6305,
+      "step": 64700
+    },
+    {
+      "epoch": 2.559275445430995,
+      "grad_norm": 1.259330393854891,
+      "learning_rate": 6.430147931674163e-07,
+      "loss": 0.6343,
+      "step": 64710
+    },
+    {
+      "epoch": 2.559670944649884,
+      "grad_norm": 1.404342326652451,
+      "learning_rate": 6.418864367204603e-07,
+      "loss": 0.6514,
+      "step": 64720
+    },
+    {
+      "epoch": 2.5600664438687732,
+      "grad_norm": 1.3370667182072586,
+      "learning_rate": 6.407590032472938e-07,
+      "loss": 0.6623,
+      "step": 64730
+    },
+    {
+      "epoch": 2.5604619430876623,
+      "grad_norm": 1.3470432245547215,
+      "learning_rate": 6.396324929866866e-07,
+      "loss": 0.6543,
+      "step": 64740
+    },
+    {
+      "epoch": 2.5608574423065513,
+      "grad_norm": 1.6588143553005,
+      "learning_rate": 6.385069061772154e-07,
+      "loss": 0.6637,
+      "step": 64750
+    },
+    {
+      "epoch": 2.5612529415254404,
+      "grad_norm": 1.391687738537518,
+      "learning_rate": 6.373822430572579e-07,
+      "loss": 0.626,
+      "step": 64760
+    },
+    {
+      "epoch": 2.5616484407443294,
+      "grad_norm": 1.4089109900553332,
+      "learning_rate": 6.362585038650021e-07,
+      "loss": 0.599,
+      "step": 64770
+    },
+    {
+      "epoch": 2.5620439399632184,
+      "grad_norm": 1.9344132896465636,
+      "learning_rate": 6.351356888384331e-07,
+      "loss": 0.6414,
+      "step": 64780
+    },
+    {
+      "epoch": 2.5624394391821075,
+      "grad_norm": 1.7518604994578053,
+      "learning_rate": 6.340137982153465e-07,
+      "loss": 0.6371,
+      "step": 64790
+    },
+    {
+      "epoch": 2.5628349384009965,
+      "grad_norm": 1.7207446991893527,
+      "learning_rate": 6.328928322333394e-07,
+      "loss": 0.6143,
+      "step": 64800
+    },
+    {
+      "epoch": 2.5632304376198856,
+      "grad_norm": 1.8064046863523273,
+      "learning_rate": 6.317727911298116e-07,
+      "loss": 0.613,
+      "step": 64810
+    },
+    {
+      "epoch": 2.5636259368387746,
+      "grad_norm": 1.3921089111696843,
+      "learning_rate": 6.306536751419684e-07,
+      "loss": 0.6432,
+      "step": 64820
+    },
+    {
+      "epoch": 2.5640214360576636,
+      "grad_norm": 1.5636391433656414,
+      "learning_rate": 6.295354845068214e-07,
+      "loss": 0.6594,
+      "step": 64830
+    },
+    {
+      "epoch": 2.5644169352765527,
+      "grad_norm": 1.5904359900860687,
+      "learning_rate": 6.284182194611821e-07,
+      "loss": 0.6687,
+      "step": 64840
+    },
+    {
+      "epoch": 2.5648124344954417,
+      "grad_norm": 1.7698590566229946,
+      "learning_rate": 6.273018802416703e-07,
+      "loss": 0.6641,
+      "step": 64850
+    },
+    {
+      "epoch": 2.5652079337143308,
+      "grad_norm": 1.3230267030166378,
+      "learning_rate": 6.261864670847051e-07,
+      "loss": 0.6525,
+      "step": 64860
+    },
+    {
+      "epoch": 2.56560343293322,
+      "grad_norm": 1.256391103949212,
+      "learning_rate": 6.250719802265142e-07,
+      "loss": 0.6716,
+      "step": 64870
+    },
+    {
+      "epoch": 2.565998932152109,
+      "grad_norm": 1.3886864404686912,
+      "learning_rate": 6.239584199031251e-07,
+      "loss": 0.6363,
+      "step": 64880
+    },
+    {
+      "epoch": 2.5663944313709983,
+      "grad_norm": 1.2004886167065867,
+      "learning_rate": 6.228457863503723e-07,
+      "loss": 0.6573,
+      "step": 64890
+    },
+    {
+      "epoch": 2.566789930589887,
+      "grad_norm": 1.8332682103011722,
+      "learning_rate": 6.217340798038917e-07,
+      "loss": 0.672,
+      "step": 64900
+    },
+    {
+      "epoch": 2.5671854298087764,
+      "grad_norm": 1.688646039930057,
+      "learning_rate": 6.20623300499123e-07,
+      "loss": 0.6288,
+      "step": 64910
+    },
+    {
+      "epoch": 2.567580929027665,
+      "grad_norm": 1.4683619682667337,
+      "learning_rate": 6.195134486713117e-07,
+      "loss": 0.6515,
+      "step": 64920
+    },
+    {
+      "epoch": 2.5679764282465545,
+      "grad_norm": 1.6056308905916619,
+      "learning_rate": 6.184045245555048e-07,
+      "loss": 0.6177,
+      "step": 64930
+    },
+    {
+      "epoch": 2.568371927465443,
+      "grad_norm": 1.3332388397797146,
+      "learning_rate": 6.172965283865528e-07,
+      "loss": 0.6297,
+      "step": 64940
+    },
+    {
+      "epoch": 2.5687674266843326,
+      "grad_norm": 1.51702282970225,
+      "learning_rate": 6.161894603991103e-07,
+      "loss": 0.6329,
+      "step": 64950
+    },
+    {
+      "epoch": 2.569162925903221,
+      "grad_norm": 1.3399787244702674,
+      "learning_rate": 6.150833208276363e-07,
+      "loss": 0.6369,
+      "step": 64960
+    },
+    {
+      "epoch": 2.5695584251221106,
+      "grad_norm": 1.3680672543378571,
+      "learning_rate": 6.139781099063913e-07,
+      "loss": 0.6587,
+      "step": 64970
+    },
+    {
+      "epoch": 2.5699539243409992,
+      "grad_norm": 1.4743134536453832,
+      "learning_rate": 6.128738278694412e-07,
+      "loss": 0.6071,
+      "step": 64980
+    },
+    {
+      "epoch": 2.5703494235598887,
+      "grad_norm": 1.3647499895725006,
+      "learning_rate": 6.11770474950652e-07,
+      "loss": 0.6278,
+      "step": 64990
+    },
+    {
+      "epoch": 2.5707449227787773,
+      "grad_norm": 1.7172077060608413,
+      "learning_rate": 6.106680513836976e-07,
+      "loss": 0.6347,
+      "step": 65000
+    },
+    {
+      "epoch": 2.571140421997667,
+      "grad_norm": 1.2993390458797738,
+      "learning_rate": 6.095665574020504e-07,
+      "loss": 0.6646,
+      "step": 65010
+    },
+    {
+      "epoch": 2.5715359212165554,
+      "grad_norm": 1.4848489083274365,
+      "learning_rate": 6.084659932389891e-07,
+      "loss": 0.6637,
+      "step": 65020
+    },
+    {
+      "epoch": 2.571931420435445,
+      "grad_norm": 1.5877402863902395,
+      "learning_rate": 6.07366359127593e-07,
+      "loss": 0.6503,
+      "step": 65030
+    },
+    {
+      "epoch": 2.5723269196543335,
+      "grad_norm": 1.5517251825422966,
+      "learning_rate": 6.062676553007485e-07,
+      "loss": 0.6612,
+      "step": 65040
+    },
+    {
+      "epoch": 2.572722418873223,
+      "grad_norm": 1.7784695102816384,
+      "learning_rate": 6.051698819911384e-07,
+      "loss": 0.6344,
+      "step": 65050
+    },
+    {
+      "epoch": 2.5731179180921115,
+      "grad_norm": 1.3031254714632408,
+      "learning_rate": 6.04073039431255e-07,
+      "loss": 0.6467,
+      "step": 65060
+    },
+    {
+      "epoch": 2.573513417311001,
+      "grad_norm": 1.7933600367778726,
+      "learning_rate": 6.029771278533891e-07,
+      "loss": 0.6199,
+      "step": 65070
+    },
+    {
+      "epoch": 2.5739089165298896,
+      "grad_norm": 1.4033180688369145,
+      "learning_rate": 6.01882147489638e-07,
+      "loss": 0.6497,
+      "step": 65080
+    },
+    {
+      "epoch": 2.574304415748779,
+      "grad_norm": 1.2791579512017908,
+      "learning_rate": 6.007880985718978e-07,
+      "loss": 0.6744,
+      "step": 65090
+    },
+    {
+      "epoch": 2.5746999149676677,
+      "grad_norm": 1.5819704182622671,
+      "learning_rate": 5.996949813318709e-07,
+      "loss": 0.6535,
+      "step": 65100
+    },
+    {
+      "epoch": 2.575095414186557,
+      "grad_norm": 1.6215734422411294,
+      "learning_rate": 5.986027960010593e-07,
+      "loss": 0.6249,
+      "step": 65110
+    },
+    {
+      "epoch": 2.575490913405446,
+      "grad_norm": 1.678272007338141,
+      "learning_rate": 5.975115428107709e-07,
+      "loss": 0.6557,
+      "step": 65120
+    },
+    {
+      "epoch": 2.5758864126243353,
+      "grad_norm": 1.2732338189743906,
+      "learning_rate": 5.964212219921134e-07,
+      "loss": 0.6324,
+      "step": 65130
+    },
+    {
+      "epoch": 2.576281911843224,
+      "grad_norm": 1.3369278773350985,
+      "learning_rate": 5.953318337759973e-07,
+      "loss": 0.661,
+      "step": 65140
+    },
+    {
+      "epoch": 2.5766774110621133,
+      "grad_norm": 1.4366000393816591,
+      "learning_rate": 5.942433783931378e-07,
+      "loss": 0.6404,
+      "step": 65150
+    },
+    {
+      "epoch": 2.577072910281002,
+      "grad_norm": 1.5049741622756876,
+      "learning_rate": 5.931558560740502e-07,
+      "loss": 0.6072,
+      "step": 65160
+    },
+    {
+      "epoch": 2.5774684094998914,
+      "grad_norm": 1.4558191823923379,
+      "learning_rate": 5.920692670490535e-07,
+      "loss": 0.6599,
+      "step": 65170
+    },
+    {
+      "epoch": 2.57786390871878,
+      "grad_norm": 1.3659451057860597,
+      "learning_rate": 5.909836115482693e-07,
+      "loss": 0.6119,
+      "step": 65180
+    },
+    {
+      "epoch": 2.5782594079376695,
+      "grad_norm": 1.3108490962184507,
+      "learning_rate": 5.898988898016201e-07,
+      "loss": 0.6234,
+      "step": 65190
+    },
+    {
+      "epoch": 2.578654907156558,
+      "grad_norm": 1.395860705573737,
+      "learning_rate": 5.888151020388299e-07,
+      "loss": 0.6626,
+      "step": 65200
+    },
+    {
+      "epoch": 2.5790504063754476,
+      "grad_norm": 1.8333726697416093,
+      "learning_rate": 5.877322484894288e-07,
+      "loss": 0.6515,
+      "step": 65210
+    },
+    {
+      "epoch": 2.579445905594336,
+      "grad_norm": 1.2140871888109008,
+      "learning_rate": 5.866503293827447e-07,
+      "loss": 0.6493,
+      "step": 65220
+    },
+    {
+      "epoch": 2.5798414048132257,
+      "grad_norm": 1.8742947173068751,
+      "learning_rate": 5.855693449479116e-07,
+      "loss": 0.6254,
+      "step": 65230
+    },
+    {
+      "epoch": 2.5802369040321147,
+      "grad_norm": 1.3742629590621933,
+      "learning_rate": 5.844892954138615e-07,
+      "loss": 0.648,
+      "step": 65240
+    },
+    {
+      "epoch": 2.5806324032510037,
+      "grad_norm": 1.5513769655278322,
+      "learning_rate": 5.834101810093324e-07,
+      "loss": 0.6327,
+      "step": 65250
+    },
+    {
+      "epoch": 2.5810279024698928,
+      "grad_norm": 1.6421918722944624,
+      "learning_rate": 5.823320019628598e-07,
+      "loss": 0.6564,
+      "step": 65260
+    },
+    {
+      "epoch": 2.581423401688782,
+      "grad_norm": 1.1995188456299204,
+      "learning_rate": 5.812547585027861e-07,
+      "loss": 0.6314,
+      "step": 65270
+    },
+    {
+      "epoch": 2.581818900907671,
+      "grad_norm": 1.7341415510130007,
+      "learning_rate": 5.801784508572505e-07,
+      "loss": 0.6268,
+      "step": 65280
+    },
+    {
+      "epoch": 2.58221440012656,
+      "grad_norm": 1.4444947634172702,
+      "learning_rate": 5.791030792541996e-07,
+      "loss": 0.6632,
+      "step": 65290
+    },
+    {
+      "epoch": 2.582609899345449,
+      "grad_norm": 1.4454786684461065,
+      "learning_rate": 5.780286439213756e-07,
+      "loss": 0.6268,
+      "step": 65300
+    },
+    {
+      "epoch": 2.583005398564338,
+      "grad_norm": 1.4044651480280266,
+      "learning_rate": 5.76955145086327e-07,
+      "loss": 0.6675,
+      "step": 65310
+    },
+    {
+      "epoch": 2.583400897783227,
+      "grad_norm": 1.4568383295623744,
+      "learning_rate": 5.758825829764014e-07,
+      "loss": 0.6679,
+      "step": 65320
+    },
+    {
+      "epoch": 2.583796397002116,
+      "grad_norm": 1.4287001495107357,
+      "learning_rate": 5.748109578187511e-07,
+      "loss": 0.671,
+      "step": 65330
+    },
+    {
+      "epoch": 2.584191896221005,
+      "grad_norm": 1.7176298639320215,
+      "learning_rate": 5.73740269840326e-07,
+      "loss": 0.6463,
+      "step": 65340
+    },
+    {
+      "epoch": 2.584587395439894,
+      "grad_norm": 1.44455304180914,
+      "learning_rate": 5.726705192678811e-07,
+      "loss": 0.6132,
+      "step": 65350
+    },
+    {
+      "epoch": 2.584982894658783,
+      "grad_norm": 1.328066761917533,
+      "learning_rate": 5.7160170632797e-07,
+      "loss": 0.6655,
+      "step": 65360
+    },
+    {
+      "epoch": 2.585378393877672,
+      "grad_norm": 1.5843005670391128,
+      "learning_rate": 5.705338312469483e-07,
+      "loss": 0.6335,
+      "step": 65370
+    },
+    {
+      "epoch": 2.5857738930965612,
+      "grad_norm": 1.6978839145878624,
+      "learning_rate": 5.694668942509763e-07,
+      "loss": 0.6692,
+      "step": 65380
+    },
+    {
+      "epoch": 2.5861693923154503,
+      "grad_norm": 1.872285829206385,
+      "learning_rate": 5.684008955660098e-07,
+      "loss": 0.6408,
+      "step": 65390
+    },
+    {
+      "epoch": 2.5865648915343393,
+      "grad_norm": 1.8226915418218141,
+      "learning_rate": 5.673358354178122e-07,
+      "loss": 0.607,
+      "step": 65400
+    },
+    {
+      "epoch": 2.5869603907532284,
+      "grad_norm": 1.2491721761377383,
+      "learning_rate": 5.66271714031943e-07,
+      "loss": 0.6553,
+      "step": 65410
+    },
+    {
+      "epoch": 2.5873558899721174,
+      "grad_norm": 1.381641801471228,
+      "learning_rate": 5.652085316337652e-07,
+      "loss": 0.6368,
+      "step": 65420
+    },
+    {
+      "epoch": 2.5877513891910064,
+      "grad_norm": 1.6237051709871286,
+      "learning_rate": 5.641462884484417e-07,
+      "loss": 0.676,
+      "step": 65430
+    },
+    {
+      "epoch": 2.5881468884098955,
+      "grad_norm": 1.3530778706896025,
+      "learning_rate": 5.630849847009396e-07,
+      "loss": 0.6748,
+      "step": 65440
+    },
+    {
+      "epoch": 2.5885423876287845,
+      "grad_norm": 1.82032126633611,
+      "learning_rate": 5.62024620616023e-07,
+      "loss": 0.6462,
+      "step": 65450
+    },
+    {
+      "epoch": 2.5889378868476736,
+      "grad_norm": 1.4043252529225034,
+      "learning_rate": 5.6096519641826e-07,
+      "loss": 0.6253,
+      "step": 65460
+    },
+    {
+      "epoch": 2.5893333860665626,
+      "grad_norm": 1.2400145032499792,
+      "learning_rate": 5.599067123320174e-07,
+      "loss": 0.6624,
+      "step": 65470
+    },
+    {
+      "epoch": 2.5897288852854516,
+      "grad_norm": 1.864662516035955,
+      "learning_rate": 5.588491685814651e-07,
+      "loss": 0.5777,
+      "step": 65480
+    },
+    {
+      "epoch": 2.5901243845043407,
+      "grad_norm": 1.3970398659563015,
+      "learning_rate": 5.577925653905713e-07,
+      "loss": 0.6373,
+      "step": 65490
+    },
+    {
+      "epoch": 2.5905198837232297,
+      "grad_norm": 1.4821470775298895,
+      "learning_rate": 5.567369029831088e-07,
+      "loss": 0.6529,
+      "step": 65500
+    },
+    {
+      "epoch": 2.5909153829421188,
+      "grad_norm": 1.579923038008987,
+      "learning_rate": 5.556821815826463e-07,
+      "loss": 0.6489,
+      "step": 65510
+    },
+    {
+      "epoch": 2.591310882161008,
+      "grad_norm": 1.7764681387432162,
+      "learning_rate": 5.54628401412558e-07,
+      "loss": 0.635,
+      "step": 65520
+    },
+    {
+      "epoch": 2.591706381379897,
+      "grad_norm": 1.480321787976223,
+      "learning_rate": 5.535755626960154e-07,
+      "loss": 0.6838,
+      "step": 65530
+    },
+    {
+      "epoch": 2.592101880598786,
+      "grad_norm": 1.478309609680217,
+      "learning_rate": 5.525236656559912e-07,
+      "loss": 0.6412,
+      "step": 65540
+    },
+    {
+      "epoch": 2.592497379817675,
+      "grad_norm": 1.5376367312659802,
+      "learning_rate": 5.51472710515259e-07,
+      "loss": 0.6475,
+      "step": 65550
+    },
+    {
+      "epoch": 2.592892879036564,
+      "grad_norm": 1.5763290316113068,
+      "learning_rate": 5.504226974963945e-07,
+      "loss": 0.6109,
+      "step": 65560
+    },
+    {
+      "epoch": 2.593288378255453,
+      "grad_norm": 1.746398331000172,
+      "learning_rate": 5.493736268217703e-07,
+      "loss": 0.6444,
+      "step": 65570
+    },
+    {
+      "epoch": 2.593683877474342,
+      "grad_norm": 1.341431239770582,
+      "learning_rate": 5.483254987135644e-07,
+      "loss": 0.6368,
+      "step": 65580
+    },
+    {
+      "epoch": 2.594079376693231,
+      "grad_norm": 1.403624033436707,
+      "learning_rate": 5.472783133937509e-07,
+      "loss": 0.6444,
+      "step": 65590
+    },
+    {
+      "epoch": 2.59447487591212,
+      "grad_norm": 1.1751479390279747,
+      "learning_rate": 5.462320710841046e-07,
+      "loss": 0.6682,
+      "step": 65600
+    },
+    {
+      "epoch": 2.594870375131009,
+      "grad_norm": 1.4945311682834728,
+      "learning_rate": 5.451867720062043e-07,
+      "loss": 0.6538,
+      "step": 65610
+    },
+    {
+      "epoch": 2.595265874349898,
+      "grad_norm": 1.6062316057132215,
+      "learning_rate": 5.441424163814235e-07,
+      "loss": 0.6459,
+      "step": 65620
+    },
+    {
+      "epoch": 2.5956613735687872,
+      "grad_norm": 1.4393111753530963,
+      "learning_rate": 5.430990044309415e-07,
+      "loss": 0.6503,
+      "step": 65630
+    },
+    {
+      "epoch": 2.5960568727876763,
+      "grad_norm": 1.8482574754564973,
+      "learning_rate": 5.42056536375733e-07,
+      "loss": 0.6418,
+      "step": 65640
+    },
+    {
+      "epoch": 2.5964523720065653,
+      "grad_norm": 1.2775102832349554,
+      "learning_rate": 5.410150124365765e-07,
+      "loss": 0.6368,
+      "step": 65650
+    },
+    {
+      "epoch": 2.5968478712254544,
+      "grad_norm": 1.354447918318132,
+      "learning_rate": 5.399744328340484e-07,
+      "loss": 0.6397,
+      "step": 65660
+    },
+    {
+      "epoch": 2.5972433704443434,
+      "grad_norm": 1.7722332729393773,
+      "learning_rate": 5.389347977885257e-07,
+      "loss": 0.6413,
+      "step": 65670
+    },
+    {
+      "epoch": 2.5976388696632324,
+      "grad_norm": 1.6760909974915852,
+      "learning_rate": 5.378961075201833e-07,
+      "loss": 0.6503,
+      "step": 65680
+    },
+    {
+      "epoch": 2.5980343688821215,
+      "grad_norm": 1.5416730261143272,
+      "learning_rate": 5.368583622490009e-07,
+      "loss": 0.6261,
+      "step": 65690
+    },
+    {
+      "epoch": 2.5984298681010105,
+      "grad_norm": 1.4664663984519084,
+      "learning_rate": 5.358215621947532e-07,
+      "loss": 0.6777,
+      "step": 65700
+    },
+    {
+      "epoch": 2.5988253673198995,
+      "grad_norm": 1.8338741320098175,
+      "learning_rate": 5.347857075770185e-07,
+      "loss": 0.6085,
+      "step": 65710
+    },
+    {
+      "epoch": 2.5992208665387886,
+      "grad_norm": 1.2743189577164808,
+      "learning_rate": 5.337507986151713e-07,
+      "loss": 0.6309,
+      "step": 65720
+    },
+    {
+      "epoch": 2.5996163657576776,
+      "grad_norm": 1.4289964181188688,
+      "learning_rate": 5.327168355283891e-07,
+      "loss": 0.6552,
+      "step": 65730
+    },
+    {
+      "epoch": 2.6000118649765667,
+      "grad_norm": 1.2660059470743217,
+      "learning_rate": 5.316838185356454e-07,
+      "loss": 0.6132,
+      "step": 65740
+    },
+    {
+      "epoch": 2.6004073641954557,
+      "grad_norm": 1.3859608823350247,
+      "learning_rate": 5.306517478557183e-07,
+      "loss": 0.6443,
+      "step": 65750
+    },
+    {
+      "epoch": 2.6008028634143447,
+      "grad_norm": 1.4016103657751104,
+      "learning_rate": 5.296206237071799e-07,
+      "loss": 0.6381,
+      "step": 65760
+    },
+    {
+      "epoch": 2.601198362633234,
+      "grad_norm": 1.482216362906679,
+      "learning_rate": 5.285904463084074e-07,
+      "loss": 0.6265,
+      "step": 65770
+    },
+    {
+      "epoch": 2.601593861852123,
+      "grad_norm": 1.400283819206724,
+      "learning_rate": 5.275612158775734e-07,
+      "loss": 0.6555,
+      "step": 65780
+    },
+    {
+      "epoch": 2.601989361071012,
+      "grad_norm": 1.5144468067868355,
+      "learning_rate": 5.265329326326512e-07,
+      "loss": 0.6247,
+      "step": 65790
+    },
+    {
+      "epoch": 2.602384860289901,
+      "grad_norm": 1.6692677941281382,
+      "learning_rate": 5.255055967914125e-07,
+      "loss": 0.6729,
+      "step": 65800
+    },
+    {
+      "epoch": 2.60278035950879,
+      "grad_norm": 1.5714983036438803,
+      "learning_rate": 5.244792085714312e-07,
+      "loss": 0.6247,
+      "step": 65810
+    },
+    {
+      "epoch": 2.603175858727679,
+      "grad_norm": 1.4483053168443896,
+      "learning_rate": 5.23453768190077e-07,
+      "loss": 0.6426,
+      "step": 65820
+    },
+    {
+      "epoch": 2.603571357946568,
+      "grad_norm": 1.398570202727538,
+      "learning_rate": 5.224292758645233e-07,
+      "loss": 0.6789,
+      "step": 65830
+    },
+    {
+      "epoch": 2.603966857165457,
+      "grad_norm": 1.3141073894710729,
+      "learning_rate": 5.214057318117378e-07,
+      "loss": 0.6701,
+      "step": 65840
+    },
+    {
+      "epoch": 2.604362356384346,
+      "grad_norm": 1.2438240004294592,
+      "learning_rate": 5.203831362484896e-07,
+      "loss": 0.6722,
+      "step": 65850
+    },
+    {
+      "epoch": 2.604757855603235,
+      "grad_norm": 1.4024661360095372,
+      "learning_rate": 5.193614893913484e-07,
+      "loss": 0.6816,
+      "step": 65860
+    },
+    {
+      "epoch": 2.605153354822124,
+      "grad_norm": 1.3013663084535005,
+      "learning_rate": 5.183407914566796e-07,
+      "loss": 0.6537,
+      "step": 65870
+    },
+    {
+      "epoch": 2.605548854041013,
+      "grad_norm": 1.461307100114189,
+      "learning_rate": 5.173210426606512e-07,
+      "loss": 0.677,
+      "step": 65880
+    },
+    {
+      "epoch": 2.6059443532599023,
+      "grad_norm": 1.5037038967480247,
+      "learning_rate": 5.163022432192272e-07,
+      "loss": 0.6461,
+      "step": 65890
+    },
+    {
+      "epoch": 2.6063398524787913,
+      "grad_norm": 1.6788525185134127,
+      "learning_rate": 5.152843933481744e-07,
+      "loss": 0.657,
+      "step": 65900
+    },
+    {
+      "epoch": 2.6067353516976803,
+      "grad_norm": 1.531500435506248,
+      "learning_rate": 5.14267493263052e-07,
+      "loss": 0.6538,
+      "step": 65910
+    },
+    {
+      "epoch": 2.6071308509165694,
+      "grad_norm": 1.4066417904709896,
+      "learning_rate": 5.132515431792251e-07,
+      "loss": 0.613,
+      "step": 65920
+    },
+    {
+      "epoch": 2.6075263501354584,
+      "grad_norm": 1.2227901604776632,
+      "learning_rate": 5.122365433118526e-07,
+      "loss": 0.6637,
+      "step": 65930
+    },
+    {
+      "epoch": 2.6079218493543475,
+      "grad_norm": 1.4510534327965838,
+      "learning_rate": 5.112224938758964e-07,
+      "loss": 0.6328,
+      "step": 65940
+    },
+    {
+      "epoch": 2.6083173485732365,
+      "grad_norm": 1.4457845330829098,
+      "learning_rate": 5.10209395086112e-07,
+      "loss": 0.6725,
+      "step": 65950
+    },
+    {
+      "epoch": 2.6087128477921255,
+      "grad_norm": 1.69543710852855,
+      "learning_rate": 5.091972471570589e-07,
+      "loss": 0.6189,
+      "step": 65960
+    },
+    {
+      "epoch": 2.6091083470110146,
+      "grad_norm": 1.3225714447403205,
+      "learning_rate": 5.081860503030906e-07,
+      "loss": 0.6507,
+      "step": 65970
+    },
+    {
+      "epoch": 2.6095038462299036,
+      "grad_norm": 1.5105313988423044,
+      "learning_rate": 5.07175804738363e-07,
+      "loss": 0.6666,
+      "step": 65980
+    },
+    {
+      "epoch": 2.6098993454487927,
+      "grad_norm": 1.62226883544106,
+      "learning_rate": 5.061665106768271e-07,
+      "loss": 0.6518,
+      "step": 65990
+    },
+    {
+      "epoch": 2.6102948446676817,
+      "grad_norm": 1.9588686832610245,
+      "learning_rate": 5.051581683322365e-07,
+      "loss": 0.6314,
+      "step": 66000
+    },
+    {
+      "epoch": 2.6106903438865707,
+      "grad_norm": 1.7156386895046971,
+      "learning_rate": 5.041507779181388e-07,
+      "loss": 0.6274,
+      "step": 66010
+    },
+    {
+      "epoch": 2.6110858431054598,
+      "grad_norm": 1.6515180010712787,
+      "learning_rate": 5.03144339647883e-07,
+      "loss": 0.6576,
+      "step": 66020
+    },
+    {
+      "epoch": 2.611481342324349,
+      "grad_norm": 1.1889228329078885,
+      "learning_rate": 5.021388537346139e-07,
+      "loss": 0.6502,
+      "step": 66030
+    },
+    {
+      "epoch": 2.611876841543238,
+      "grad_norm": 1.4509807141328215,
+      "learning_rate": 5.011343203912783e-07,
+      "loss": 0.63,
+      "step": 66040
+    },
+    {
+      "epoch": 2.612272340762127,
+      "grad_norm": 1.2892090941749295,
+      "learning_rate": 5.001307398306171e-07,
+      "loss": 0.6491,
+      "step": 66050
+    },
+    {
+      "epoch": 2.612667839981016,
+      "grad_norm": 1.592465753293887,
+      "learning_rate": 4.991281122651736e-07,
+      "loss": 0.6252,
+      "step": 66060
+    },
+    {
+      "epoch": 2.613063339199905,
+      "grad_norm": 1.3656516042900058,
+      "learning_rate": 4.981264379072864e-07,
+      "loss": 0.6587,
+      "step": 66070
+    },
+    {
+      "epoch": 2.613458838418794,
+      "grad_norm": 1.6250125310512444,
+      "learning_rate": 4.971257169690919e-07,
+      "loss": 0.6481,
+      "step": 66080
+    },
+    {
+      "epoch": 2.613854337637683,
+      "grad_norm": 1.9221780773238208,
+      "learning_rate": 4.96125949662527e-07,
+      "loss": 0.6392,
+      "step": 66090
+    },
+    {
+      "epoch": 2.614249836856572,
+      "grad_norm": 1.228444389820104,
+      "learning_rate": 4.951271361993237e-07,
+      "loss": 0.6456,
+      "step": 66100
+    },
+    {
+      "epoch": 2.614645336075461,
+      "grad_norm": 1.2933064230881441,
+      "learning_rate": 4.941292767910161e-07,
+      "loss": 0.6625,
+      "step": 66110
+    },
+    {
+      "epoch": 2.61504083529435,
+      "grad_norm": 1.9606588973178931,
+      "learning_rate": 4.931323716489306e-07,
+      "loss": 0.6522,
+      "step": 66120
+    },
+    {
+      "epoch": 2.615436334513239,
+      "grad_norm": 1.586729380817973,
+      "learning_rate": 4.921364209841978e-07,
+      "loss": 0.6488,
+      "step": 66130
+    },
+    {
+      "epoch": 2.6158318337321282,
+      "grad_norm": 1.4016925681263692,
+      "learning_rate": 4.911414250077412e-07,
+      "loss": 0.6429,
+      "step": 66140
+    },
+    {
+      "epoch": 2.6162273329510173,
+      "grad_norm": 1.448621941728291,
+      "learning_rate": 4.901473839302839e-07,
+      "loss": 0.6207,
+      "step": 66150
+    },
+    {
+      "epoch": 2.6166228321699063,
+      "grad_norm": 1.495869245144115,
+      "learning_rate": 4.891542979623465e-07,
+      "loss": 0.617,
+      "step": 66160
+    },
+    {
+      "epoch": 2.6170183313887954,
+      "grad_norm": 1.186091347328476,
+      "learning_rate": 4.881621673142489e-07,
+      "loss": 0.6554,
+      "step": 66170
+    },
+    {
+      "epoch": 2.6174138306076844,
+      "grad_norm": 1.4066447255639665,
+      "learning_rate": 4.87170992196106e-07,
+      "loss": 0.654,
+      "step": 66180
+    },
+    {
+      "epoch": 2.6178093298265734,
+      "grad_norm": 1.3191814178400343,
+      "learning_rate": 4.861807728178331e-07,
+      "loss": 0.6691,
+      "step": 66190
+    },
+    {
+      "epoch": 2.6182048290454625,
+      "grad_norm": 1.4579616457467213,
+      "learning_rate": 4.851915093891396e-07,
+      "loss": 0.6561,
+      "step": 66200
+    },
+    {
+      "epoch": 2.6186003282643515,
+      "grad_norm": 1.6474097434634207,
+      "learning_rate": 4.842032021195376e-07,
+      "loss": 0.6462,
+      "step": 66210
+    },
+    {
+      "epoch": 2.6189958274832406,
+      "grad_norm": 1.4782575772413122,
+      "learning_rate": 4.832158512183299e-07,
+      "loss": 0.6019,
+      "step": 66220
+    },
+    {
+      "epoch": 2.6193913267021296,
+      "grad_norm": 1.7916294109319313,
+      "learning_rate": 4.82229456894624e-07,
+      "loss": 0.6443,
+      "step": 66230
+    },
+    {
+      "epoch": 2.619786825921019,
+      "grad_norm": 2.0669091433373183,
+      "learning_rate": 4.812440193573197e-07,
+      "loss": 0.65,
+      "step": 66240
+    },
+    {
+      "epoch": 2.6201823251399077,
+      "grad_norm": 1.7944506849343103,
+      "learning_rate": 4.802595388151154e-07,
+      "loss": 0.6307,
+      "step": 66250
+    },
+    {
+      "epoch": 2.620577824358797,
+      "grad_norm": 1.521611640318423,
+      "learning_rate": 4.792760154765086e-07,
+      "loss": 0.6724,
+      "step": 66260
+    },
+    {
+      "epoch": 2.6209733235776858,
+      "grad_norm": 1.3096520307171977,
+      "learning_rate": 4.782934495497915e-07,
+      "loss": 0.6257,
+      "step": 66270
+    },
+    {
+      "epoch": 2.6213688227965752,
+      "grad_norm": 1.5461989241147014,
+      "learning_rate": 4.773118412430538e-07,
+      "loss": 0.6651,
+      "step": 66280
+    },
+    {
+      "epoch": 2.621764322015464,
+      "grad_norm": 1.3400524696883678,
+      "learning_rate": 4.76331190764186e-07,
+      "loss": 0.6291,
+      "step": 66290
+    },
+    {
+      "epoch": 2.6221598212343533,
+      "grad_norm": 1.4785937689506297,
+      "learning_rate": 4.753514983208718e-07,
+      "loss": 0.6352,
+      "step": 66300
+    },
+    {
+      "epoch": 2.622555320453242,
+      "grad_norm": 1.180513996359601,
+      "learning_rate": 4.74372764120592e-07,
+      "loss": 0.6276,
+      "step": 66310
+    },
+    {
+      "epoch": 2.6229508196721314,
+      "grad_norm": 1.3848742090224064,
+      "learning_rate": 4.733949883706274e-07,
+      "loss": 0.6542,
+      "step": 66320
+    },
+    {
+      "epoch": 2.62334631889102,
+      "grad_norm": 1.2880722612427167,
+      "learning_rate": 4.724181712780529e-07,
+      "loss": 0.6459,
+      "step": 66330
+    },
+    {
+      "epoch": 2.6237418181099095,
+      "grad_norm": 1.6418835102149247,
+      "learning_rate": 4.714423130497436e-07,
+      "loss": 0.6335,
+      "step": 66340
+    },
+    {
+      "epoch": 2.624137317328798,
+      "grad_norm": 1.1854161039246214,
+      "learning_rate": 4.7046741389236704e-07,
+      "loss": 0.6794,
+      "step": 66350
+    },
+    {
+      "epoch": 2.6245328165476876,
+      "grad_norm": 1.4350707385400354,
+      "learning_rate": 4.694934740123924e-07,
+      "loss": 0.6673,
+      "step": 66360
+    },
+    {
+      "epoch": 2.624928315766576,
+      "grad_norm": 1.2049319144411197,
+      "learning_rate": 4.685204936160814e-07,
+      "loss": 0.6581,
+      "step": 66370
+    },
+    {
+      "epoch": 2.6253238149854656,
+      "grad_norm": 1.4943565124130345,
+      "learning_rate": 4.67548472909497e-07,
+      "loss": 0.6418,
+      "step": 66380
+    },
+    {
+      "epoch": 2.6257193142043542,
+      "grad_norm": 1.6376707669612973,
+      "learning_rate": 4.665774120984951e-07,
+      "loss": 0.6431,
+      "step": 66390
+    },
+    {
+      "epoch": 2.6261148134232437,
+      "grad_norm": 1.804583110026279,
+      "learning_rate": 4.6560731138872993e-07,
+      "loss": 0.6203,
+      "step": 66400
+    },
+    {
+      "epoch": 2.6265103126421323,
+      "grad_norm": 1.6513561468535503,
+      "learning_rate": 4.6463817098565067e-07,
+      "loss": 0.6537,
+      "step": 66410
+    },
+    {
+      "epoch": 2.626905811861022,
+      "grad_norm": 1.288928900264162,
+      "learning_rate": 4.636699910945075e-07,
+      "loss": 0.6435,
+      "step": 66420
+    },
+    {
+      "epoch": 2.6273013110799104,
+      "grad_norm": 1.7691904867964667,
+      "learning_rate": 4.62702771920342e-07,
+      "loss": 0.6214,
+      "step": 66430
+    },
+    {
+      "epoch": 2.6276968102988,
+      "grad_norm": 1.487699400478626,
+      "learning_rate": 4.6173651366799634e-07,
+      "loss": 0.6408,
+      "step": 66440
+    },
+    {
+      "epoch": 2.6280923095176885,
+      "grad_norm": 1.3436567914096835,
+      "learning_rate": 4.607712165421058e-07,
+      "loss": 0.6614,
+      "step": 66450
+    },
+    {
+      "epoch": 2.628487808736578,
+      "grad_norm": 1.3544132189882172,
+      "learning_rate": 4.5980688074710566e-07,
+      "loss": 0.6343,
+      "step": 66460
+    },
+    {
+      "epoch": 2.6288833079554665,
+      "grad_norm": 1.5119038218419327,
+      "learning_rate": 4.5884350648722374e-07,
+      "loss": 0.6615,
+      "step": 66470
+    },
+    {
+      "epoch": 2.629278807174356,
+      "grad_norm": 1.3001043451843204,
+      "learning_rate": 4.578810939664885e-07,
+      "loss": 0.687,
+      "step": 66480
+    },
+    {
+      "epoch": 2.6296743063932446,
+      "grad_norm": 1.6595015704129095,
+      "learning_rate": 4.5691964338872075e-07,
+      "loss": 0.6675,
+      "step": 66490
+    },
+    {
+      "epoch": 2.630069805612134,
+      "grad_norm": 1.5078063069290257,
+      "learning_rate": 4.5595915495753873e-07,
+      "loss": 0.6433,
+      "step": 66500
+    },
+    {
+      "epoch": 2.6304653048310227,
+      "grad_norm": 1.435052566826052,
+      "learning_rate": 4.549996288763592e-07,
+      "loss": 0.6355,
+      "step": 66510
+    },
+    {
+      "epoch": 2.630860804049912,
+      "grad_norm": 1.3746795233069786,
+      "learning_rate": 4.5404106534839233e-07,
+      "loss": 0.6673,
+      "step": 66520
+    },
+    {
+      "epoch": 2.631256303268801,
+      "grad_norm": 1.2458315954306327,
+      "learning_rate": 4.5308346457664573e-07,
+      "loss": 0.6544,
+      "step": 66530
+    },
+    {
+      "epoch": 2.6316518024876903,
+      "grad_norm": 1.2212836465971777,
+      "learning_rate": 4.5212682676392174e-07,
+      "loss": 0.6423,
+      "step": 66540
+    },
+    {
+      "epoch": 2.632047301706579,
+      "grad_norm": 1.5625978161652871,
+      "learning_rate": 4.51171152112822e-07,
+      "loss": 0.627,
+      "step": 66550
+    },
+    {
+      "epoch": 2.6324428009254683,
+      "grad_norm": 1.6740385092598458,
+      "learning_rate": 4.502164408257398e-07,
+      "loss": 0.6406,
+      "step": 66560
+    },
+    {
+      "epoch": 2.6328383001443574,
+      "grad_norm": 1.390000813214016,
+      "learning_rate": 4.492626931048688e-07,
+      "loss": 0.6463,
+      "step": 66570
+    },
+    {
+      "epoch": 2.6332337993632464,
+      "grad_norm": 1.2996125579848212,
+      "learning_rate": 4.483099091521936e-07,
+      "loss": 0.6256,
+      "step": 66580
+    },
+    {
+      "epoch": 2.6336292985821355,
+      "grad_norm": 1.4238411976058616,
+      "learning_rate": 4.473580891695012e-07,
+      "loss": 0.6468,
+      "step": 66590
+    },
+    {
+      "epoch": 2.6340247978010245,
+      "grad_norm": 1.5538501448890076,
+      "learning_rate": 4.464072333583669e-07,
+      "loss": 0.6266,
+      "step": 66600
+    },
+    {
+      "epoch": 2.6344202970199135,
+      "grad_norm": 1.336802486191245,
+      "learning_rate": 4.454573419201691e-07,
+      "loss": 0.6551,
+      "step": 66610
+    },
+    {
+      "epoch": 2.6348157962388026,
+      "grad_norm": 1.4376934215654078,
+      "learning_rate": 4.445084150560758e-07,
+      "loss": 0.6352,
+      "step": 66620
+    },
+    {
+      "epoch": 2.6352112954576916,
+      "grad_norm": 1.6694939470526333,
+      "learning_rate": 4.4356045296705676e-07,
+      "loss": 0.6499,
+      "step": 66630
+    },
+    {
+      "epoch": 2.6356067946765807,
+      "grad_norm": 1.7201483962088393,
+      "learning_rate": 4.426134558538697e-07,
+      "loss": 0.652,
+      "step": 66640
+    },
+    {
+      "epoch": 2.6360022938954697,
+      "grad_norm": 1.3728509963743392,
+      "learning_rate": 4.4166742391707593e-07,
+      "loss": 0.6375,
+      "step": 66650
+    },
+    {
+      "epoch": 2.6363977931143587,
+      "grad_norm": 1.472184419224856,
+      "learning_rate": 4.4072235735702684e-07,
+      "loss": 0.6308,
+      "step": 66660
+    },
+    {
+      "epoch": 2.6367932923332478,
+      "grad_norm": 1.6987875377420738,
+      "learning_rate": 4.3977825637387284e-07,
+      "loss": 0.6244,
+      "step": 66670
+    },
+    {
+      "epoch": 2.637188791552137,
+      "grad_norm": 1.7906711340462902,
+      "learning_rate": 4.388351211675562e-07,
+      "loss": 0.6316,
+      "step": 66680
+    },
+    {
+      "epoch": 2.637584290771026,
+      "grad_norm": 1.5727263411124601,
+      "learning_rate": 4.3789295193781944e-07,
+      "loss": 0.6458,
+      "step": 66690
+    },
+    {
+      "epoch": 2.637979789989915,
+      "grad_norm": 1.753860939783427,
+      "learning_rate": 4.3695174888419566e-07,
+      "loss": 0.6201,
+      "step": 66700
+    },
+    {
+      "epoch": 2.638375289208804,
+      "grad_norm": 1.5859584062220233,
+      "learning_rate": 4.3601151220601713e-07,
+      "loss": 0.5974,
+      "step": 66710
+    },
+    {
+      "epoch": 2.638770788427693,
+      "grad_norm": 1.5678573027066647,
+      "learning_rate": 4.350722421024095e-07,
+      "loss": 0.6461,
+      "step": 66720
+    },
+    {
+      "epoch": 2.639166287646582,
+      "grad_norm": 1.4199341610106007,
+      "learning_rate": 4.341339387722926e-07,
+      "loss": 0.6716,
+      "step": 66730
+    },
+    {
+      "epoch": 2.639561786865471,
+      "grad_norm": 1.2020944948214642,
+      "learning_rate": 4.3319660241438577e-07,
+      "loss": 0.6666,
+      "step": 66740
+    },
+    {
+      "epoch": 2.63995728608436,
+      "grad_norm": 1.3431146074968838,
+      "learning_rate": 4.3226023322719746e-07,
+      "loss": 0.658,
+      "step": 66750
+    },
+    {
+      "epoch": 2.640352785303249,
+      "grad_norm": 1.602487495302267,
+      "learning_rate": 4.313248314090379e-07,
+      "loss": 0.6427,
+      "step": 66760
+    },
+    {
+      "epoch": 2.640748284522138,
+      "grad_norm": 1.4788348998753265,
+      "learning_rate": 4.30390397158007e-07,
+      "loss": 0.678,
+      "step": 66770
+    },
+    {
+      "epoch": 2.641143783741027,
+      "grad_norm": 1.6903540426726307,
+      "learning_rate": 4.2945693067200313e-07,
+      "loss": 0.6275,
+      "step": 66780
+    },
+    {
+      "epoch": 2.6415392829599162,
+      "grad_norm": 1.4297571612996394,
+      "learning_rate": 4.28524432148717e-07,
+      "loss": 0.6514,
+      "step": 66790
+    },
+    {
+      "epoch": 2.6419347821788053,
+      "grad_norm": 1.5165658895594936,
+      "learning_rate": 4.2759290178563727e-07,
+      "loss": 0.6416,
+      "step": 66800
+    },
+    {
+      "epoch": 2.6423302813976943,
+      "grad_norm": 1.3195290811336806,
+      "learning_rate": 4.2666233978004513e-07,
+      "loss": 0.6493,
+      "step": 66810
+    },
+    {
+      "epoch": 2.6427257806165834,
+      "grad_norm": 1.348959330811117,
+      "learning_rate": 4.257327463290184e-07,
+      "loss": 0.6614,
+      "step": 66820
+    },
+    {
+      "epoch": 2.6431212798354724,
+      "grad_norm": 1.5874236226395562,
+      "learning_rate": 4.248041216294285e-07,
+      "loss": 0.6408,
+      "step": 66830
+    },
+    {
+      "epoch": 2.6435167790543614,
+      "grad_norm": 1.8629482742181673,
+      "learning_rate": 4.238764658779432e-07,
+      "loss": 0.6554,
+      "step": 66840
+    },
+    {
+      "epoch": 2.6439122782732505,
+      "grad_norm": 1.6249435092101672,
+      "learning_rate": 4.2294977927102244e-07,
+      "loss": 0.6373,
+      "step": 66850
+    },
+    {
+      "epoch": 2.6443077774921395,
+      "grad_norm": 1.4046606342939152,
+      "learning_rate": 4.220240620049243e-07,
+      "loss": 0.6299,
+      "step": 66860
+    },
+    {
+      "epoch": 2.6447032767110286,
+      "grad_norm": 1.5306101148762647,
+      "learning_rate": 4.210993142756986e-07,
+      "loss": 0.6246,
+      "step": 66870
+    },
+    {
+      "epoch": 2.6450987759299176,
+      "grad_norm": 1.529327978440233,
+      "learning_rate": 4.201755362791932e-07,
+      "loss": 0.6279,
+      "step": 66880
+    },
+    {
+      "epoch": 2.6454942751488066,
+      "grad_norm": 1.2823331238658422,
+      "learning_rate": 4.1925272821104477e-07,
+      "loss": 0.651,
+      "step": 66890
+    },
+    {
+      "epoch": 2.6458897743676957,
+      "grad_norm": 1.2822840583923572,
+      "learning_rate": 4.183308902666916e-07,
+      "loss": 0.6798,
+      "step": 66900
+    },
+    {
+      "epoch": 2.6462852735865847,
+      "grad_norm": 1.4604251989445376,
+      "learning_rate": 4.1741002264136075e-07,
+      "loss": 0.6264,
+      "step": 66910
+    },
+    {
+      "epoch": 2.6466807728054738,
+      "grad_norm": 1.495190644041529,
+      "learning_rate": 4.1649012553007795e-07,
+      "loss": 0.6066,
+      "step": 66920
+    },
+    {
+      "epoch": 2.647076272024363,
+      "grad_norm": 1.4840463689796157,
+      "learning_rate": 4.155711991276601e-07,
+      "loss": 0.6796,
+      "step": 66930
+    },
+    {
+      "epoch": 2.647471771243252,
+      "grad_norm": 1.3556142115990775,
+      "learning_rate": 4.1465324362872205e-07,
+      "loss": 0.6319,
+      "step": 66940
+    },
+    {
+      "epoch": 2.647867270462141,
+      "grad_norm": 1.3947919236139419,
+      "learning_rate": 4.1373625922767005e-07,
+      "loss": 0.6654,
+      "step": 66950
+    },
+    {
+      "epoch": 2.64826276968103,
+      "grad_norm": 1.3691337766863305,
+      "learning_rate": 4.1282024611870474e-07,
+      "loss": 0.606,
+      "step": 66960
+    },
+    {
+      "epoch": 2.648658268899919,
+      "grad_norm": 1.7176406567866715,
+      "learning_rate": 4.119052044958238e-07,
+      "loss": 0.6339,
+      "step": 66970
+    },
+    {
+      "epoch": 2.649053768118808,
+      "grad_norm": 1.2847804424439877,
+      "learning_rate": 4.1099113455281556e-07,
+      "loss": 0.6513,
+      "step": 66980
+    },
+    {
+      "epoch": 2.649449267337697,
+      "grad_norm": 1.4261966979975993,
+      "learning_rate": 4.100780364832657e-07,
+      "loss": 0.6392,
+      "step": 66990
+    },
+    {
+      "epoch": 2.649844766556586,
+      "grad_norm": 1.3296865532160875,
+      "learning_rate": 4.0916591048055286e-07,
+      "loss": 0.6221,
+      "step": 67000
+    },
+    {
+      "epoch": 2.650240265775475,
+      "grad_norm": 1.2429985713452463,
+      "learning_rate": 4.0825475673784867e-07,
+      "loss": 0.6598,
+      "step": 67010
+    },
+    {
+      "epoch": 2.650635764994364,
+      "grad_norm": 1.3711051294072798,
+      "learning_rate": 4.073445754481198e-07,
+      "loss": 0.6152,
+      "step": 67020
+    },
+    {
+      "epoch": 2.651031264213253,
+      "grad_norm": 1.3829629827143872,
+      "learning_rate": 4.064353668041282e-07,
+      "loss": 0.6329,
+      "step": 67030
+    },
+    {
+      "epoch": 2.6514267634321422,
+      "grad_norm": 1.425930233077325,
+      "learning_rate": 4.0552713099842767e-07,
+      "loss": 0.6248,
+      "step": 67040
+    },
+    {
+      "epoch": 2.6518222626510313,
+      "grad_norm": 1.603481017678548,
+      "learning_rate": 4.0461986822336765e-07,
+      "loss": 0.6601,
+      "step": 67050
+    },
+    {
+      "epoch": 2.6522177618699203,
+      "grad_norm": 1.4839827953440277,
+      "learning_rate": 4.0371357867109e-07,
+      "loss": 0.6424,
+      "step": 67060
+    },
+    {
+      "epoch": 2.6526132610888093,
+      "grad_norm": 1.7249467590446503,
+      "learning_rate": 4.0280826253353334e-07,
+      "loss": 0.6432,
+      "step": 67070
+    },
+    {
+      "epoch": 2.6530087603076984,
+      "grad_norm": 1.7496658102068572,
+      "learning_rate": 4.0190392000242495e-07,
+      "loss": 0.6269,
+      "step": 67080
+    },
+    {
+      "epoch": 2.6534042595265874,
+      "grad_norm": 1.484891972499491,
+      "learning_rate": 4.0100055126929215e-07,
+      "loss": 0.6415,
+      "step": 67090
+    },
+    {
+      "epoch": 2.6537997587454765,
+      "grad_norm": 1.3361017697498834,
+      "learning_rate": 4.0009815652545135e-07,
+      "loss": 0.6332,
+      "step": 67100
+    },
+    {
+      "epoch": 2.6541952579643655,
+      "grad_norm": 1.5597945169185352,
+      "learning_rate": 3.991967359620147e-07,
+      "loss": 0.625,
+      "step": 67110
+    },
+    {
+      "epoch": 2.6545907571832545,
+      "grad_norm": 1.494366562315302,
+      "learning_rate": 3.982962897698878e-07,
+      "loss": 0.6288,
+      "step": 67120
+    },
+    {
+      "epoch": 2.6549862564021436,
+      "grad_norm": 1.4337863908407422,
+      "learning_rate": 3.973968181397697e-07,
+      "loss": 0.6566,
+      "step": 67130
+    },
+    {
+      "epoch": 2.6553817556210326,
+      "grad_norm": 1.447224989329216,
+      "learning_rate": 3.964983212621515e-07,
+      "loss": 0.637,
+      "step": 67140
+    },
+    {
+      "epoch": 2.6557772548399217,
+      "grad_norm": 1.4654719225633515,
+      "learning_rate": 3.9560079932732197e-07,
+      "loss": 0.6435,
+      "step": 67150
+    },
+    {
+      "epoch": 2.6561727540588107,
+      "grad_norm": 1.253563886149293,
+      "learning_rate": 3.947042525253586e-07,
+      "loss": 0.6382,
+      "step": 67160
+    },
+    {
+      "epoch": 2.6565682532776997,
+      "grad_norm": 1.6720554023752612,
+      "learning_rate": 3.9380868104613665e-07,
+      "loss": 0.6539,
+      "step": 67170
+    },
+    {
+      "epoch": 2.656963752496589,
+      "grad_norm": 1.4577868046866587,
+      "learning_rate": 3.929140850793223e-07,
+      "loss": 0.6201,
+      "step": 67180
+    },
+    {
+      "epoch": 2.657359251715478,
+      "grad_norm": 1.3094226264865712,
+      "learning_rate": 3.920204648143738e-07,
+      "loss": 0.6847,
+      "step": 67190
+    },
+    {
+      "epoch": 2.657754750934367,
+      "grad_norm": 1.4000435018670074,
+      "learning_rate": 3.911278204405478e-07,
+      "loss": 0.6414,
+      "step": 67200
+    },
+    {
+      "epoch": 2.658150250153256,
+      "grad_norm": 1.5063931391024656,
+      "learning_rate": 3.902361521468878e-07,
+      "loss": 0.6388,
+      "step": 67210
+    },
+    {
+      "epoch": 2.658545749372145,
+      "grad_norm": 1.609485005896645,
+      "learning_rate": 3.893454601222363e-07,
+      "loss": 0.6314,
+      "step": 67220
+    },
+    {
+      "epoch": 2.658941248591034,
+      "grad_norm": 1.5424299020077414,
+      "learning_rate": 3.8845574455522506e-07,
+      "loss": 0.6425,
+      "step": 67230
+    },
+    {
+      "epoch": 2.659336747809923,
+      "grad_norm": 1.5112816966688762,
+      "learning_rate": 3.875670056342823e-07,
+      "loss": 0.6339,
+      "step": 67240
+    },
+    {
+      "epoch": 2.659732247028812,
+      "grad_norm": 1.8633998106247787,
+      "learning_rate": 3.866792435476263e-07,
+      "loss": 0.657,
+      "step": 67250
+    },
+    {
+      "epoch": 2.660127746247701,
+      "grad_norm": 1.6998981111492322,
+      "learning_rate": 3.857924584832706e-07,
+      "loss": 0.6516,
+      "step": 67260
+    },
+    {
+      "epoch": 2.66052324546659,
+      "grad_norm": 1.761161973631195,
+      "learning_rate": 3.849066506290194e-07,
+      "loss": 0.6435,
+      "step": 67270
+    },
+    {
+      "epoch": 2.660918744685479,
+      "grad_norm": 1.1564538653904168,
+      "learning_rate": 3.8402182017247434e-07,
+      "loss": 0.6386,
+      "step": 67280
+    },
+    {
+      "epoch": 2.661314243904368,
+      "grad_norm": 1.455135653124351,
+      "learning_rate": 3.8313796730102493e-07,
+      "loss": 0.636,
+      "step": 67290
+    },
+    {
+      "epoch": 2.6617097431232573,
+      "grad_norm": 1.4325183598229272,
+      "learning_rate": 3.822550922018575e-07,
+      "loss": 0.6897,
+      "step": 67300
+    },
+    {
+      "epoch": 2.6621052423421463,
+      "grad_norm": 1.194626336687413,
+      "learning_rate": 3.8137319506194927e-07,
+      "loss": 0.6419,
+      "step": 67310
+    },
+    {
+      "epoch": 2.6625007415610353,
+      "grad_norm": 1.3449387684709344,
+      "learning_rate": 3.8049227606807126e-07,
+      "loss": 0.6562,
+      "step": 67320
+    },
+    {
+      "epoch": 2.6628962407799244,
+      "grad_norm": 1.4410529700490062,
+      "learning_rate": 3.7961233540678656e-07,
+      "loss": 0.6348,
+      "step": 67330
+    },
+    {
+      "epoch": 2.6632917399988134,
+      "grad_norm": 1.4851409735795837,
+      "learning_rate": 3.787333732644527e-07,
+      "loss": 0.6228,
+      "step": 67340
+    },
+    {
+      "epoch": 2.6636872392177025,
+      "grad_norm": 1.2700039102753866,
+      "learning_rate": 3.778553898272169e-07,
+      "loss": 0.6139,
+      "step": 67350
+    },
+    {
+      "epoch": 2.6640827384365915,
+      "grad_norm": 1.7911519665895288,
+      "learning_rate": 3.7697838528102324e-07,
+      "loss": 0.6239,
+      "step": 67360
+    },
+    {
+      "epoch": 2.6644782376554805,
+      "grad_norm": 1.3071701079955973,
+      "learning_rate": 3.761023598116048e-07,
+      "loss": 0.615,
+      "step": 67370
+    },
+    {
+      "epoch": 2.6648737368743696,
+      "grad_norm": 1.5354925568102975,
+      "learning_rate": 3.752273136044893e-07,
+      "loss": 0.6623,
+      "step": 67380
+    },
+    {
+      "epoch": 2.6652692360932586,
+      "grad_norm": 1.5452305780191486,
+      "learning_rate": 3.743532468449951e-07,
+      "loss": 0.6218,
+      "step": 67390
+    },
+    {
+      "epoch": 2.6656647353121476,
+      "grad_norm": 1.2995892283492798,
+      "learning_rate": 3.734801597182369e-07,
+      "loss": 0.6602,
+      "step": 67400
+    },
+    {
+      "epoch": 2.6660602345310367,
+      "grad_norm": 1.2425583351380654,
+      "learning_rate": 3.7260805240911747e-07,
+      "loss": 0.6482,
+      "step": 67410
+    },
+    {
+      "epoch": 2.6664557337499257,
+      "grad_norm": 1.3466121328477731,
+      "learning_rate": 3.7173692510233617e-07,
+      "loss": 0.6565,
+      "step": 67420
+    },
+    {
+      "epoch": 2.6668512329688148,
+      "grad_norm": 1.5716410366626758,
+      "learning_rate": 3.7086677798238214e-07,
+      "loss": 0.6406,
+      "step": 67430
+    },
+    {
+      "epoch": 2.667246732187704,
+      "grad_norm": 1.295476496645719,
+      "learning_rate": 3.6999761123353574e-07,
+      "loss": 0.6533,
+      "step": 67440
+    },
+    {
+      "epoch": 2.667642231406593,
+      "grad_norm": 1.4453105704642009,
+      "learning_rate": 3.691294250398747e-07,
+      "loss": 0.6463,
+      "step": 67450
+    },
+    {
+      "epoch": 2.668037730625482,
+      "grad_norm": 1.4420361191847626,
+      "learning_rate": 3.6826221958526307e-07,
+      "loss": 0.6328,
+      "step": 67460
+    },
+    {
+      "epoch": 2.668433229844371,
+      "grad_norm": 1.4687586905766155,
+      "learning_rate": 3.6739599505336286e-07,
+      "loss": 0.662,
+      "step": 67470
+    },
+    {
+      "epoch": 2.66882872906326,
+      "grad_norm": 1.4689470491007226,
+      "learning_rate": 3.665307516276234e-07,
+      "loss": 0.6495,
+      "step": 67480
+    },
+    {
+      "epoch": 2.669224228282149,
+      "grad_norm": 1.1769819109822068,
+      "learning_rate": 3.656664894912909e-07,
+      "loss": 0.6014,
+      "step": 67490
+    },
+    {
+      "epoch": 2.669619727501038,
+      "grad_norm": 1.2838348423550436,
+      "learning_rate": 3.6480320882739785e-07,
+      "loss": 0.6706,
+      "step": 67500
+    },
+    {
+      "epoch": 2.670015226719927,
+      "grad_norm": 1.4384230088080945,
+      "learning_rate": 3.6394090981877463e-07,
+      "loss": 0.6385,
+      "step": 67510
+    },
+    {
+      "epoch": 2.670410725938816,
+      "grad_norm": 1.408892298790722,
+      "learning_rate": 3.630795926480407e-07,
+      "loss": 0.6576,
+      "step": 67520
+    },
+    {
+      "epoch": 2.670806225157705,
+      "grad_norm": 1.364572772958427,
+      "learning_rate": 3.6221925749760854e-07,
+      "loss": 0.6057,
+      "step": 67530
+    },
+    {
+      "epoch": 2.671201724376594,
+      "grad_norm": 1.3792356111715454,
+      "learning_rate": 3.6135990454968184e-07,
+      "loss": 0.6377,
+      "step": 67540
+    },
+    {
+      "epoch": 2.6715972235954832,
+      "grad_norm": 1.4487893114290935,
+      "learning_rate": 3.605015339862583e-07,
+      "loss": 0.6133,
+      "step": 67550
+    },
+    {
+      "epoch": 2.6719927228143723,
+      "grad_norm": 1.5843185378033344,
+      "learning_rate": 3.596441459891242e-07,
+      "loss": 0.6327,
+      "step": 67560
+    },
+    {
+      "epoch": 2.6723882220332613,
+      "grad_norm": 1.3243027806347243,
+      "learning_rate": 3.58787740739861e-07,
+      "loss": 0.6783,
+      "step": 67570
+    },
+    {
+      "epoch": 2.6727837212521504,
+      "grad_norm": 1.440987779065691,
+      "learning_rate": 3.579323184198397e-07,
+      "loss": 0.6576,
+      "step": 67580
+    },
+    {
+      "epoch": 2.67317922047104,
+      "grad_norm": 1.6064033549193635,
+      "learning_rate": 3.570778792102253e-07,
+      "loss": 0.6474,
+      "step": 67590
+    },
+    {
+      "epoch": 2.6735747196899284,
+      "grad_norm": 1.362771487103353,
+      "learning_rate": 3.562244232919726e-07,
+      "loss": 0.6504,
+      "step": 67600
+    },
+    {
+      "epoch": 2.673970218908818,
+      "grad_norm": 1.6466558859849665,
+      "learning_rate": 3.553719508458292e-07,
+      "loss": 0.6744,
+      "step": 67610
+    },
+    {
+      "epoch": 2.6743657181277065,
+      "grad_norm": 1.692159902937415,
+      "learning_rate": 3.5452046205233293e-07,
+      "loss": 0.6317,
+      "step": 67620
+    },
+    {
+      "epoch": 2.674761217346596,
+      "grad_norm": 1.704803372118754,
+      "learning_rate": 3.5366995709181675e-07,
+      "loss": 0.6421,
+      "step": 67630
+    },
+    {
+      "epoch": 2.6751567165654846,
+      "grad_norm": 1.5623441738852302,
+      "learning_rate": 3.5282043614440043e-07,
+      "loss": 0.6059,
+      "step": 67640
+    },
+    {
+      "epoch": 2.675552215784374,
+      "grad_norm": 1.5529221015322243,
+      "learning_rate": 3.5197189939000065e-07,
+      "loss": 0.662,
+      "step": 67650
+    },
+    {
+      "epoch": 2.6759477150032627,
+      "grad_norm": 1.499488156716238,
+      "learning_rate": 3.5112434700832144e-07,
+      "loss": 0.6466,
+      "step": 67660
+    },
+    {
+      "epoch": 2.676343214222152,
+      "grad_norm": 1.3826978451876017,
+      "learning_rate": 3.5027777917885977e-07,
+      "loss": 0.6642,
+      "step": 67670
+    },
+    {
+      "epoch": 2.6767387134410408,
+      "grad_norm": 1.5903123990018386,
+      "learning_rate": 3.4943219608090493e-07,
+      "loss": 0.6398,
+      "step": 67680
+    },
+    {
+      "epoch": 2.6771342126599302,
+      "grad_norm": 1.5798941281349288,
+      "learning_rate": 3.48587597893536e-07,
+      "loss": 0.6516,
+      "step": 67690
+    },
+    {
+      "epoch": 2.677529711878819,
+      "grad_norm": 1.3080170689834487,
+      "learning_rate": 3.477439847956254e-07,
+      "loss": 0.6427,
+      "step": 67700
+    },
+    {
+      "epoch": 2.6779252110977083,
+      "grad_norm": 1.3071788865346678,
+      "learning_rate": 3.4690135696583473e-07,
+      "loss": 0.635,
+      "step": 67710
+    },
+    {
+      "epoch": 2.678320710316597,
+      "grad_norm": 1.5345252688578457,
+      "learning_rate": 3.4605971458262e-07,
+      "loss": 0.5767,
+      "step": 67720
+    },
+    {
+      "epoch": 2.6787162095354864,
+      "grad_norm": 1.6636315024068176,
+      "learning_rate": 3.452190578242243e-07,
+      "loss": 0.651,
+      "step": 67730
+    },
+    {
+      "epoch": 2.679111708754375,
+      "grad_norm": 1.6950699020623816,
+      "learning_rate": 3.443793868686879e-07,
+      "loss": 0.643,
+      "step": 67740
+    },
+    {
+      "epoch": 2.6795072079732645,
+      "grad_norm": 1.4733830197549675,
+      "learning_rate": 3.4354070189383413e-07,
+      "loss": 0.643,
+      "step": 67750
+    },
+    {
+      "epoch": 2.679902707192153,
+      "grad_norm": 1.2748151594976393,
+      "learning_rate": 3.427030030772854e-07,
+      "loss": 0.6711,
+      "step": 67760
+    },
+    {
+      "epoch": 2.6802982064110425,
+      "grad_norm": 1.3254388623939386,
+      "learning_rate": 3.418662905964498e-07,
+      "loss": 0.6648,
+      "step": 67770
+    },
+    {
+      "epoch": 2.680693705629931,
+      "grad_norm": 1.4290339665773355,
+      "learning_rate": 3.410305646285311e-07,
+      "loss": 0.6499,
+      "step": 67780
+    },
+    {
+      "epoch": 2.6810892048488206,
+      "grad_norm": 1.4335538686772098,
+      "learning_rate": 3.401958253505194e-07,
+      "loss": 0.635,
+      "step": 67790
+    },
+    {
+      "epoch": 2.6814847040677092,
+      "grad_norm": 1.5844709371594365,
+      "learning_rate": 3.3936207293919997e-07,
+      "loss": 0.6507,
+      "step": 67800
+    },
+    {
+      "epoch": 2.6818802032865987,
+      "grad_norm": 1.350994567483789,
+      "learning_rate": 3.38529307571146e-07,
+      "loss": 0.6449,
+      "step": 67810
+    },
+    {
+      "epoch": 2.6822757025054873,
+      "grad_norm": 1.6806705175000394,
+      "learning_rate": 3.376975294227242e-07,
+      "loss": 0.6298,
+      "step": 67820
+    },
+    {
+      "epoch": 2.682671201724377,
+      "grad_norm": 1.6975300418314174,
+      "learning_rate": 3.3686673867009025e-07,
+      "loss": 0.6431,
+      "step": 67830
+    },
+    {
+      "epoch": 2.6830667009432654,
+      "grad_norm": 1.2908359146813606,
+      "learning_rate": 3.360369354891907e-07,
+      "loss": 0.6517,
+      "step": 67840
+    },
+    {
+      "epoch": 2.683462200162155,
+      "grad_norm": 1.4959510332695647,
+      "learning_rate": 3.352081200557644e-07,
+      "loss": 0.6325,
+      "step": 67850
+    },
+    {
+      "epoch": 2.6838576993810435,
+      "grad_norm": 1.5398910327911224,
+      "learning_rate": 3.34380292545341e-07,
+      "loss": 0.6073,
+      "step": 67860
+    },
+    {
+      "epoch": 2.684253198599933,
+      "grad_norm": 1.434697804562703,
+      "learning_rate": 3.3355345313323796e-07,
+      "loss": 0.6551,
+      "step": 67870
+    },
+    {
+      "epoch": 2.6846486978188215,
+      "grad_norm": 1.6316207395651667,
+      "learning_rate": 3.3272760199456853e-07,
+      "loss": 0.6412,
+      "step": 67880
+    },
+    {
+      "epoch": 2.685044197037711,
+      "grad_norm": 1.6515451705670228,
+      "learning_rate": 3.3190273930423177e-07,
+      "loss": 0.6689,
+      "step": 67890
+    },
+    {
+      "epoch": 2.6854396962565996,
+      "grad_norm": 1.7636864563050267,
+      "learning_rate": 3.3107886523691955e-07,
+      "loss": 0.6487,
+      "step": 67900
+    },
+    {
+      "epoch": 2.685835195475489,
+      "grad_norm": 1.2407556192004556,
+      "learning_rate": 3.3025597996711566e-07,
+      "loss": 0.6645,
+      "step": 67910
+    },
+    {
+      "epoch": 2.686230694694378,
+      "grad_norm": 1.799509130953986,
+      "learning_rate": 3.294340836690918e-07,
+      "loss": 0.5929,
+      "step": 67920
+    },
+    {
+      "epoch": 2.686626193913267,
+      "grad_norm": 1.4364499340508392,
+      "learning_rate": 3.286131765169126e-07,
+      "loss": 0.6433,
+      "step": 67930
+    },
+    {
+      "epoch": 2.687021693132156,
+      "grad_norm": 1.733801769054392,
+      "learning_rate": 3.277932586844307e-07,
+      "loss": 0.6469,
+      "step": 67940
+    },
+    {
+      "epoch": 2.6874171923510453,
+      "grad_norm": 1.8505853992871186,
+      "learning_rate": 3.2697433034529214e-07,
+      "loss": 0.6078,
+      "step": 67950
+    },
+    {
+      "epoch": 2.6878126915699343,
+      "grad_norm": 1.3680040478374165,
+      "learning_rate": 3.2615639167293045e-07,
+      "loss": 0.6361,
+      "step": 67960
+    },
+    {
+      "epoch": 2.6882081907888233,
+      "grad_norm": 1.5000926896746636,
+      "learning_rate": 3.2533944284057264e-07,
+      "loss": 0.6602,
+      "step": 67970
+    },
+    {
+      "epoch": 2.6886036900077124,
+      "grad_norm": 1.3052371375795877,
+      "learning_rate": 3.2452348402123356e-07,
+      "loss": 0.6727,
+      "step": 67980
+    },
+    {
+      "epoch": 2.6889991892266014,
+      "grad_norm": 1.396241337167498,
+      "learning_rate": 3.2370851538771953e-07,
+      "loss": 0.6489,
+      "step": 67990
+    },
+    {
+      "epoch": 2.6893946884454905,
+      "grad_norm": 1.1532291475843564,
+      "learning_rate": 3.228945371126263e-07,
+      "loss": 0.6394,
+      "step": 68000
+    },
+    {
+      "epoch": 2.6897901876643795,
+      "grad_norm": 1.24235711859018,
+      "learning_rate": 3.220815493683416e-07,
+      "loss": 0.6314,
+      "step": 68010
+    },
+    {
+      "epoch": 2.6901856868832685,
+      "grad_norm": 1.220705732795657,
+      "learning_rate": 3.2126955232704094e-07,
+      "loss": 0.6246,
+      "step": 68020
+    },
+    {
+      "epoch": 2.6905811861021576,
+      "grad_norm": 1.3511688229780527,
+      "learning_rate": 3.204585461606929e-07,
+      "loss": 0.6368,
+      "step": 68030
+    },
+    {
+      "epoch": 2.6909766853210466,
+      "grad_norm": 1.4741450640018745,
+      "learning_rate": 3.196485310410535e-07,
+      "loss": 0.6589,
+      "step": 68040
+    },
+    {
+      "epoch": 2.6913721845399357,
+      "grad_norm": 1.7334416177762504,
+      "learning_rate": 3.1883950713967105e-07,
+      "loss": 0.6422,
+      "step": 68050
+    },
+    {
+      "epoch": 2.6917676837588247,
+      "grad_norm": 1.6395381189868599,
+      "learning_rate": 3.1803147462788175e-07,
+      "loss": 0.6258,
+      "step": 68060
+    },
+    {
+      "epoch": 2.6921631829777137,
+      "grad_norm": 1.639894593249071,
+      "learning_rate": 3.1722443367681487e-07,
+      "loss": 0.6135,
+      "step": 68070
+    },
+    {
+      "epoch": 2.6925586821966028,
+      "grad_norm": 1.2390951299359396,
+      "learning_rate": 3.164183844573865e-07,
+      "loss": 0.6693,
+      "step": 68080
+    },
+    {
+      "epoch": 2.692954181415492,
+      "grad_norm": 1.7592401704859844,
+      "learning_rate": 3.1561332714030334e-07,
+      "loss": 0.6232,
+      "step": 68090
+    },
+    {
+      "epoch": 2.693349680634381,
+      "grad_norm": 1.657449540500462,
+      "learning_rate": 3.1480926189606463e-07,
+      "loss": 0.6461,
+      "step": 68100
+    },
+    {
+      "epoch": 2.69374517985327,
+      "grad_norm": 1.3336205402744792,
+      "learning_rate": 3.1400618889495636e-07,
+      "loss": 0.6838,
+      "step": 68110
+    },
+    {
+      "epoch": 2.694140679072159,
+      "grad_norm": 1.6292808262751366,
+      "learning_rate": 3.132041083070564e-07,
+      "loss": 0.6565,
+      "step": 68120
+    },
+    {
+      "epoch": 2.694536178291048,
+      "grad_norm": 1.5217678688211929,
+      "learning_rate": 3.124030203022305e-07,
+      "loss": 0.6353,
+      "step": 68130
+    },
+    {
+      "epoch": 2.694931677509937,
+      "grad_norm": 1.5030488987102606,
+      "learning_rate": 3.1160292505013676e-07,
+      "loss": 0.6764,
+      "step": 68140
+    },
+    {
+      "epoch": 2.695327176728826,
+      "grad_norm": 1.5561741890697343,
+      "learning_rate": 3.1080382272021983e-07,
+      "loss": 0.6535,
+      "step": 68150
+    },
+    {
+      "epoch": 2.695722675947715,
+      "grad_norm": 1.368334135572594,
+      "learning_rate": 3.1000571348171803e-07,
+      "loss": 0.6443,
+      "step": 68160
+    },
+    {
+      "epoch": 2.696118175166604,
+      "grad_norm": 1.5852085599217816,
+      "learning_rate": 3.0920859750365574e-07,
+      "loss": 0.6165,
+      "step": 68170
+    },
+    {
+      "epoch": 2.696513674385493,
+      "grad_norm": 1.3695727666639599,
+      "learning_rate": 3.084124749548495e-07,
+      "loss": 0.6504,
+      "step": 68180
+    },
+    {
+      "epoch": 2.696909173604382,
+      "grad_norm": 1.5767428456848072,
+      "learning_rate": 3.0761734600390334e-07,
+      "loss": 0.6136,
+      "step": 68190
+    },
+    {
+      "epoch": 2.6973046728232712,
+      "grad_norm": 1.5278696792448265,
+      "learning_rate": 3.068232108192132e-07,
+      "loss": 0.6491,
+      "step": 68200
+    },
+    {
+      "epoch": 2.6977001720421603,
+      "grad_norm": 1.7384324531816469,
+      "learning_rate": 3.0603006956896165e-07,
+      "loss": 0.643,
+      "step": 68210
+    },
+    {
+      "epoch": 2.6980956712610493,
+      "grad_norm": 1.4578527958217349,
+      "learning_rate": 3.052379224211244e-07,
+      "loss": 0.6221,
+      "step": 68220
+    },
+    {
+      "epoch": 2.6984911704799384,
+      "grad_norm": 1.3911086240015524,
+      "learning_rate": 3.044467695434633e-07,
+      "loss": 0.6375,
+      "step": 68230
+    },
+    {
+      "epoch": 2.6988866696988274,
+      "grad_norm": 1.6910829018839328,
+      "learning_rate": 3.036566111035316e-07,
+      "loss": 0.6199,
+      "step": 68240
+    },
+    {
+      "epoch": 2.6992821689177164,
+      "grad_norm": 1.4168179772947889,
+      "learning_rate": 3.028674472686699e-07,
+      "loss": 0.6546,
+      "step": 68250
+    },
+    {
+      "epoch": 2.6996776681366055,
+      "grad_norm": 1.3986303649049687,
+      "learning_rate": 3.020792782060117e-07,
+      "loss": 0.6753,
+      "step": 68260
+    },
+    {
+      "epoch": 2.7000731673554945,
+      "grad_norm": 1.6422893016554367,
+      "learning_rate": 3.012921040824762e-07,
+      "loss": 0.659,
+      "step": 68270
+    },
+    {
+      "epoch": 2.7004686665743836,
+      "grad_norm": 1.5002977705015865,
+      "learning_rate": 3.005059250647746e-07,
+      "loss": 0.6491,
+      "step": 68280
+    },
+    {
+      "epoch": 2.7008641657932726,
+      "grad_norm": 1.240302239507857,
+      "learning_rate": 2.997207413194042e-07,
+      "loss": 0.6411,
+      "step": 68290
+    },
+    {
+      "epoch": 2.7012596650121616,
+      "grad_norm": 1.4569336444242,
+      "learning_rate": 2.989365530126559e-07,
+      "loss": 0.6622,
+      "step": 68300
+    },
+    {
+      "epoch": 2.7016551642310507,
+      "grad_norm": 1.3511359636396427,
+      "learning_rate": 2.9815336031060626e-07,
+      "loss": 0.6453,
+      "step": 68310
+    },
+    {
+      "epoch": 2.7020506634499397,
+      "grad_norm": 1.3450343464159829,
+      "learning_rate": 2.97371163379121e-07,
+      "loss": 0.6217,
+      "step": 68320
+    },
+    {
+      "epoch": 2.7024461626688288,
+      "grad_norm": 1.3192119078784772,
+      "learning_rate": 2.965899623838575e-07,
+      "loss": 0.6398,
+      "step": 68330
+    },
+    {
+      "epoch": 2.702841661887718,
+      "grad_norm": 1.4843361017815262,
+      "learning_rate": 2.958097574902602e-07,
+      "loss": 0.6165,
+      "step": 68340
+    },
+    {
+      "epoch": 2.703237161106607,
+      "grad_norm": 1.5065879981967603,
+      "learning_rate": 2.9503054886356353e-07,
+      "loss": 0.6268,
+      "step": 68350
+    },
+    {
+      "epoch": 2.703632660325496,
+      "grad_norm": 1.6528747274797109,
+      "learning_rate": 2.9425233666878993e-07,
+      "loss": 0.6368,
+      "step": 68360
+    },
+    {
+      "epoch": 2.704028159544385,
+      "grad_norm": 1.3504742246608263,
+      "learning_rate": 2.9347512107075207e-07,
+      "loss": 0.6662,
+      "step": 68370
+    },
+    {
+      "epoch": 2.704423658763274,
+      "grad_norm": 1.5273343418523304,
+      "learning_rate": 2.926989022340493e-07,
+      "loss": 0.6162,
+      "step": 68380
+    },
+    {
+      "epoch": 2.704819157982163,
+      "grad_norm": 1.5790203009416488,
+      "learning_rate": 2.919236803230741e-07,
+      "loss": 0.6369,
+      "step": 68390
+    },
+    {
+      "epoch": 2.705214657201052,
+      "grad_norm": 1.533063079252469,
+      "learning_rate": 2.911494555020022e-07,
+      "loss": 0.6572,
+      "step": 68400
+    },
+    {
+      "epoch": 2.705610156419941,
+      "grad_norm": 1.5514001641638855,
+      "learning_rate": 2.903762279348038e-07,
+      "loss": 0.64,
+      "step": 68410
+    },
+    {
+      "epoch": 2.70600565563883,
+      "grad_norm": 1.9201184076745088,
+      "learning_rate": 2.8960399778523384e-07,
+      "loss": 0.6539,
+      "step": 68420
+    },
+    {
+      "epoch": 2.706401154857719,
+      "grad_norm": 1.538000017692867,
+      "learning_rate": 2.888327652168377e-07,
+      "loss": 0.6214,
+      "step": 68430
+    },
+    {
+      "epoch": 2.706796654076608,
+      "grad_norm": 1.5139147604366952,
+      "learning_rate": 2.880625303929491e-07,
+      "loss": 0.6437,
+      "step": 68440
+    },
+    {
+      "epoch": 2.7071921532954972,
+      "grad_norm": 1.360455992413914,
+      "learning_rate": 2.8729329347669146e-07,
+      "loss": 0.6461,
+      "step": 68450
+    },
+    {
+      "epoch": 2.7075876525143863,
+      "grad_norm": 1.5686283832820274,
+      "learning_rate": 2.8652505463097445e-07,
+      "loss": 0.6596,
+      "step": 68460
+    },
+    {
+      "epoch": 2.7079831517332753,
+      "grad_norm": 1.6018929023218305,
+      "learning_rate": 2.857578140185002e-07,
+      "loss": 0.6243,
+      "step": 68470
+    },
+    {
+      "epoch": 2.7083786509521643,
+      "grad_norm": 1.5118950744260757,
+      "learning_rate": 2.849915718017543e-07,
+      "loss": 0.6324,
+      "step": 68480
+    },
+    {
+      "epoch": 2.7087741501710534,
+      "grad_norm": 1.770497101415643,
+      "learning_rate": 2.8422632814301523e-07,
+      "loss": 0.6066,
+      "step": 68490
+    },
+    {
+      "epoch": 2.7091696493899424,
+      "grad_norm": 1.4172496119037643,
+      "learning_rate": 2.834620832043483e-07,
+      "loss": 0.6406,
+      "step": 68500
+    },
+    {
+      "epoch": 2.7095651486088315,
+      "grad_norm": 1.2949555961801198,
+      "learning_rate": 2.8269883714760806e-07,
+      "loss": 0.6453,
+      "step": 68510
+    },
+    {
+      "epoch": 2.7099606478277205,
+      "grad_norm": 1.8227660460144495,
+      "learning_rate": 2.8193659013443563e-07,
+      "loss": 0.6418,
+      "step": 68520
+    },
+    {
+      "epoch": 2.7103561470466095,
+      "grad_norm": 1.3587220887292486,
+      "learning_rate": 2.811753423262631e-07,
+      "loss": 0.6483,
+      "step": 68530
+    },
+    {
+      "epoch": 2.7107516462654986,
+      "grad_norm": 1.4992946389392103,
+      "learning_rate": 2.8041509388430976e-07,
+      "loss": 0.6376,
+      "step": 68540
+    },
+    {
+      "epoch": 2.7111471454843876,
+      "grad_norm": 1.3204886574934729,
+      "learning_rate": 2.7965584496958185e-07,
+      "loss": 0.6423,
+      "step": 68550
+    },
+    {
+      "epoch": 2.7115426447032767,
+      "grad_norm": 1.517801064217121,
+      "learning_rate": 2.788975957428769e-07,
+      "loss": 0.6546,
+      "step": 68560
+    },
+    {
+      "epoch": 2.7119381439221657,
+      "grad_norm": 1.3694690323043008,
+      "learning_rate": 2.781403463647775e-07,
+      "loss": 0.6573,
+      "step": 68570
+    },
+    {
+      "epoch": 2.7123336431410547,
+      "grad_norm": 1.423737782498182,
+      "learning_rate": 2.773840969956576e-07,
+      "loss": 0.6738,
+      "step": 68580
+    },
+    {
+      "epoch": 2.712729142359944,
+      "grad_norm": 1.540953605545579,
+      "learning_rate": 2.7662884779567687e-07,
+      "loss": 0.6368,
+      "step": 68590
+    },
+    {
+      "epoch": 2.713124641578833,
+      "grad_norm": 1.5640336462122308,
+      "learning_rate": 2.7587459892478517e-07,
+      "loss": 0.6536,
+      "step": 68600
+    },
+    {
+      "epoch": 2.713520140797722,
+      "grad_norm": 1.609568168975661,
+      "learning_rate": 2.7512135054271806e-07,
+      "loss": 0.6283,
+      "step": 68610
+    },
+    {
+      "epoch": 2.713915640016611,
+      "grad_norm": 1.3666465221971882,
+      "learning_rate": 2.7436910280900176e-07,
+      "loss": 0.6338,
+      "step": 68620
+    },
+    {
+      "epoch": 2.7143111392355,
+      "grad_norm": 1.300546333214171,
+      "learning_rate": 2.736178558829483e-07,
+      "loss": 0.6426,
+      "step": 68630
+    },
+    {
+      "epoch": 2.714706638454389,
+      "grad_norm": 1.6373888804716752,
+      "learning_rate": 2.7286760992366046e-07,
+      "loss": 0.6519,
+      "step": 68640
+    },
+    {
+      "epoch": 2.715102137673278,
+      "grad_norm": 1.6376397086747032,
+      "learning_rate": 2.721183650900261e-07,
+      "loss": 0.6432,
+      "step": 68650
+    },
+    {
+      "epoch": 2.715497636892167,
+      "grad_norm": 1.4602682879598028,
+      "learning_rate": 2.7137012154072385e-07,
+      "loss": 0.6475,
+      "step": 68660
+    },
+    {
+      "epoch": 2.715893136111056,
+      "grad_norm": 1.336128118705436,
+      "learning_rate": 2.706228794342175e-07,
+      "loss": 0.6221,
+      "step": 68670
+    },
+    {
+      "epoch": 2.716288635329945,
+      "grad_norm": 1.4060509655271807,
+      "learning_rate": 2.6987663892876105e-07,
+      "loss": 0.6508,
+      "step": 68680
+    },
+    {
+      "epoch": 2.716684134548834,
+      "grad_norm": 1.6104819501962804,
+      "learning_rate": 2.691314001823947e-07,
+      "loss": 0.6367,
+      "step": 68690
+    },
+    {
+      "epoch": 2.717079633767723,
+      "grad_norm": 1.7001876219415388,
+      "learning_rate": 2.683871633529483e-07,
+      "loss": 0.6375,
+      "step": 68700
+    },
+    {
+      "epoch": 2.7174751329866123,
+      "grad_norm": 1.5032203096451775,
+      "learning_rate": 2.67643928598037e-07,
+      "loss": 0.6373,
+      "step": 68710
+    },
+    {
+      "epoch": 2.7178706322055013,
+      "grad_norm": 1.2857610675448505,
+      "learning_rate": 2.6690169607506697e-07,
+      "loss": 0.6443,
+      "step": 68720
+    },
+    {
+      "epoch": 2.7182661314243903,
+      "grad_norm": 1.296749796922664,
+      "learning_rate": 2.6616046594122866e-07,
+      "loss": 0.6052,
+      "step": 68730
+    },
+    {
+      "epoch": 2.7186616306432794,
+      "grad_norm": 1.2190414086705312,
+      "learning_rate": 2.654202383535032e-07,
+      "loss": 0.6728,
+      "step": 68740
+    },
+    {
+      "epoch": 2.7190571298621684,
+      "grad_norm": 1.6385610649159084,
+      "learning_rate": 2.646810134686567e-07,
+      "loss": 0.6159,
+      "step": 68750
+    },
+    {
+      "epoch": 2.7194526290810574,
+      "grad_norm": 1.400802721941971,
+      "learning_rate": 2.6394279144324576e-07,
+      "loss": 0.6418,
+      "step": 68760
+    },
+    {
+      "epoch": 2.7198481282999465,
+      "grad_norm": 1.5629708272782843,
+      "learning_rate": 2.63205572433613e-07,
+      "loss": 0.6358,
+      "step": 68770
+    },
+    {
+      "epoch": 2.7202436275188355,
+      "grad_norm": 1.387853163638029,
+      "learning_rate": 2.624693565958869e-07,
+      "loss": 0.6522,
+      "step": 68780
+    },
+    {
+      "epoch": 2.7206391267377246,
+      "grad_norm": 1.5030026062662623,
+      "learning_rate": 2.617341440859883e-07,
+      "loss": 0.6392,
+      "step": 68790
+    },
+    {
+      "epoch": 2.7210346259566136,
+      "grad_norm": 1.597726876397117,
+      "learning_rate": 2.6099993505961984e-07,
+      "loss": 0.6216,
+      "step": 68800
+    },
+    {
+      "epoch": 2.7214301251755026,
+      "grad_norm": 1.4943509482838175,
+      "learning_rate": 2.6026672967227664e-07,
+      "loss": 0.6235,
+      "step": 68810
+    },
+    {
+      "epoch": 2.7218256243943917,
+      "grad_norm": 1.5425165452770624,
+      "learning_rate": 2.595345280792372e-07,
+      "loss": 0.6481,
+      "step": 68820
+    },
+    {
+      "epoch": 2.7222211236132807,
+      "grad_norm": 1.3585751304652756,
+      "learning_rate": 2.5880333043557136e-07,
+      "loss": 0.638,
+      "step": 68830
+    },
+    {
+      "epoch": 2.7226166228321698,
+      "grad_norm": 1.3237370151135492,
+      "learning_rate": 2.5807313689613256e-07,
+      "loss": 0.6181,
+      "step": 68840
+    },
+    {
+      "epoch": 2.723012122051059,
+      "grad_norm": 1.5878473618651103,
+      "learning_rate": 2.573439476155637e-07,
+      "loss": 0.6451,
+      "step": 68850
+    },
+    {
+      "epoch": 2.723407621269948,
+      "grad_norm": 1.5731462815375221,
+      "learning_rate": 2.566157627482946e-07,
+      "loss": 0.6169,
+      "step": 68860
+    },
+    {
+      "epoch": 2.723803120488837,
+      "grad_norm": 1.3739114566964035,
+      "learning_rate": 2.558885824485424e-07,
+      "loss": 0.6559,
+      "step": 68870
+    },
+    {
+      "epoch": 2.724198619707726,
+      "grad_norm": 1.5122209680259404,
+      "learning_rate": 2.551624068703112e-07,
+      "loss": 0.6519,
+      "step": 68880
+    },
+    {
+      "epoch": 2.724594118926615,
+      "grad_norm": 1.3420880454933402,
+      "learning_rate": 2.54437236167393e-07,
+      "loss": 0.6336,
+      "step": 68890
+    },
+    {
+      "epoch": 2.724989618145504,
+      "grad_norm": 1.3009560719115958,
+      "learning_rate": 2.53713070493366e-07,
+      "loss": 0.6268,
+      "step": 68900
+    },
+    {
+      "epoch": 2.725385117364393,
+      "grad_norm": 1.6500623502081264,
+      "learning_rate": 2.52989910001597e-07,
+      "loss": 0.639,
+      "step": 68910
+    },
+    {
+      "epoch": 2.7257806165832825,
+      "grad_norm": 1.2388565463344678,
+      "learning_rate": 2.5226775484523737e-07,
+      "loss": 0.6363,
+      "step": 68920
+    },
+    {
+      "epoch": 2.726176115802171,
+      "grad_norm": 1.5017041054242064,
+      "learning_rate": 2.5154660517722917e-07,
+      "loss": 0.6725,
+      "step": 68930
+    },
+    {
+      "epoch": 2.7265716150210606,
+      "grad_norm": 1.3055033887653618,
+      "learning_rate": 2.5082646115029806e-07,
+      "loss": 0.6933,
+      "step": 68940
+    },
+    {
+      "epoch": 2.726967114239949,
+      "grad_norm": 1.4122468539706097,
+      "learning_rate": 2.5010732291695926e-07,
+      "loss": 0.6422,
+      "step": 68950
+    },
+    {
+      "epoch": 2.7273626134588387,
+      "grad_norm": 1.571471474460686,
+      "learning_rate": 2.4938919062951373e-07,
+      "loss": 0.6467,
+      "step": 68960
+    },
+    {
+      "epoch": 2.7277581126777273,
+      "grad_norm": 1.2842866422276005,
+      "learning_rate": 2.4867206444004864e-07,
+      "loss": 0.6517,
+      "step": 68970
+    },
+    {
+      "epoch": 2.7281536118966168,
+      "grad_norm": 1.4313994440518403,
+      "learning_rate": 2.4795594450043925e-07,
+      "loss": 0.6475,
+      "step": 68980
+    },
+    {
+      "epoch": 2.7285491111155054,
+      "grad_norm": 1.29947851444614,
+      "learning_rate": 2.472408309623486e-07,
+      "loss": 0.6547,
+      "step": 68990
+    },
+    {
+      "epoch": 2.728944610334395,
+      "grad_norm": 1.5189394205067743,
+      "learning_rate": 2.4652672397722397e-07,
+      "loss": 0.6514,
+      "step": 69000
+    },
+    {
+      "epoch": 2.7293401095532834,
+      "grad_norm": 1.8697739254086567,
+      "learning_rate": 2.458136236963027e-07,
+      "loss": 0.6283,
+      "step": 69010
+    },
+    {
+      "epoch": 2.729735608772173,
+      "grad_norm": 1.2917153407817616,
+      "learning_rate": 2.4510153027060615e-07,
+      "loss": 0.6458,
+      "step": 69020
+    },
+    {
+      "epoch": 2.7301311079910615,
+      "grad_norm": 1.4988388176962628,
+      "learning_rate": 2.443904438509431e-07,
+      "loss": 0.6587,
+      "step": 69030
+    },
+    {
+      "epoch": 2.730526607209951,
+      "grad_norm": 1.517101504264004,
+      "learning_rate": 2.43680364587911e-07,
+      "loss": 0.6693,
+      "step": 69040
+    },
+    {
+      "epoch": 2.7309221064288396,
+      "grad_norm": 1.5672061335077485,
+      "learning_rate": 2.4297129263189e-07,
+      "loss": 0.6381,
+      "step": 69050
+    },
+    {
+      "epoch": 2.731317605647729,
+      "grad_norm": 1.4367360336621544,
+      "learning_rate": 2.4226322813305226e-07,
+      "loss": 0.6712,
+      "step": 69060
+    },
+    {
+      "epoch": 2.7317131048666177,
+      "grad_norm": 1.5619493447589667,
+      "learning_rate": 2.4155617124135164e-07,
+      "loss": 0.601,
+      "step": 69070
+    },
+    {
+      "epoch": 2.732108604085507,
+      "grad_norm": 1.4542552867826393,
+      "learning_rate": 2.4085012210653235e-07,
+      "loss": 0.7147,
+      "step": 69080
+    },
+    {
+      "epoch": 2.7325041033043957,
+      "grad_norm": 1.4631889254838182,
+      "learning_rate": 2.4014508087812137e-07,
+      "loss": 0.6391,
+      "step": 69090
+    },
+    {
+      "epoch": 2.7328996025232852,
+      "grad_norm": 1.3041780732902712,
+      "learning_rate": 2.3944104770543653e-07,
+      "loss": 0.684,
+      "step": 69100
+    },
+    {
+      "epoch": 2.733295101742174,
+      "grad_norm": 1.4138354713939887,
+      "learning_rate": 2.38738022737578e-07,
+      "loss": 0.6619,
+      "step": 69110
+    },
+    {
+      "epoch": 2.7336906009610633,
+      "grad_norm": 1.4244810808186827,
+      "learning_rate": 2.3803600612343602e-07,
+      "loss": 0.6612,
+      "step": 69120
+    },
+    {
+      "epoch": 2.734086100179952,
+      "grad_norm": 1.5115910890001782,
+      "learning_rate": 2.3733499801168457e-07,
+      "loss": 0.6468,
+      "step": 69130
+    },
+    {
+      "epoch": 2.7344815993988414,
+      "grad_norm": 1.5494320243488706,
+      "learning_rate": 2.3663499855078653e-07,
+      "loss": 0.6827,
+      "step": 69140
+    },
+    {
+      "epoch": 2.73487709861773,
+      "grad_norm": 1.9854899886385176,
+      "learning_rate": 2.3593600788898773e-07,
+      "loss": 0.605,
+      "step": 69150
+    },
+    {
+      "epoch": 2.7352725978366195,
+      "grad_norm": 1.6898890540047589,
+      "learning_rate": 2.3523802617432477e-07,
+      "loss": 0.6226,
+      "step": 69160
+    },
+    {
+      "epoch": 2.735668097055508,
+      "grad_norm": 1.321907731715437,
+      "learning_rate": 2.345410535546161e-07,
+      "loss": 0.6621,
+      "step": 69170
+    },
+    {
+      "epoch": 2.7360635962743975,
+      "grad_norm": 1.5706416558424043,
+      "learning_rate": 2.3384509017747026e-07,
+      "loss": 0.6241,
+      "step": 69180
+    },
+    {
+      "epoch": 2.736459095493286,
+      "grad_norm": 1.3650093850130947,
+      "learning_rate": 2.3315013619027993e-07,
+      "loss": 0.637,
+      "step": 69190
+    },
+    {
+      "epoch": 2.7368545947121756,
+      "grad_norm": 1.4406319757428092,
+      "learning_rate": 2.3245619174022294e-07,
+      "loss": 0.6385,
+      "step": 69200
+    },
+    {
+      "epoch": 2.737250093931064,
+      "grad_norm": 1.7934054487672793,
+      "learning_rate": 2.3176325697426726e-07,
+      "loss": 0.6331,
+      "step": 69210
+    },
+    {
+      "epoch": 2.7376455931499537,
+      "grad_norm": 1.5060467002685176,
+      "learning_rate": 2.3107133203916331e-07,
+      "loss": 0.628,
+      "step": 69220
+    },
+    {
+      "epoch": 2.7380410923688423,
+      "grad_norm": 1.621182495945924,
+      "learning_rate": 2.303804170814483e-07,
+      "loss": 0.6788,
+      "step": 69230
+    },
+    {
+      "epoch": 2.738436591587732,
+      "grad_norm": 1.5765280174694711,
+      "learning_rate": 2.2969051224744743e-07,
+      "loss": 0.6423,
+      "step": 69240
+    },
+    {
+      "epoch": 2.738832090806621,
+      "grad_norm": 1.1734909918378862,
+      "learning_rate": 2.2900161768327044e-07,
+      "loss": 0.6623,
+      "step": 69250
+    },
+    {
+      "epoch": 2.73922759002551,
+      "grad_norm": 1.5728635020318578,
+      "learning_rate": 2.2831373353481234e-07,
+      "loss": 0.6134,
+      "step": 69260
+    },
+    {
+      "epoch": 2.739623089244399,
+      "grad_norm": 1.3922302774790831,
+      "learning_rate": 2.2762685994775657e-07,
+      "loss": 0.6709,
+      "step": 69270
+    },
+    {
+      "epoch": 2.740018588463288,
+      "grad_norm": 1.3963543510513603,
+      "learning_rate": 2.2694099706757066e-07,
+      "loss": 0.6558,
+      "step": 69280
+    },
+    {
+      "epoch": 2.740414087682177,
+      "grad_norm": 1.6029162929281209,
+      "learning_rate": 2.2625614503950843e-07,
+      "loss": 0.6523,
+      "step": 69290
+    },
+    {
+      "epoch": 2.740809586901066,
+      "grad_norm": 1.337793949884056,
+      "learning_rate": 2.2557230400860998e-07,
+      "loss": 0.6461,
+      "step": 69300
+    },
+    {
+      "epoch": 2.741205086119955,
+      "grad_norm": 1.4868880009013101,
+      "learning_rate": 2.2488947411970163e-07,
+      "loss": 0.6439,
+      "step": 69310
+    },
+    {
+      "epoch": 2.741600585338844,
+      "grad_norm": 1.25075291955305,
+      "learning_rate": 2.242076555173939e-07,
+      "loss": 0.6573,
+      "step": 69320
+    },
+    {
+      "epoch": 2.741996084557733,
+      "grad_norm": 1.3198802985255884,
+      "learning_rate": 2.2352684834608617e-07,
+      "loss": 0.6251,
+      "step": 69330
+    },
+    {
+      "epoch": 2.742391583776622,
+      "grad_norm": 1.4158703326538418,
+      "learning_rate": 2.2284705274995933e-07,
+      "loss": 0.6169,
+      "step": 69340
+    },
+    {
+      "epoch": 2.742787082995511,
+      "grad_norm": 1.4411382836941886,
+      "learning_rate": 2.2216826887298426e-07,
+      "loss": 0.6675,
+      "step": 69350
+    },
+    {
+      "epoch": 2.7431825822144003,
+      "grad_norm": 1.3163533262387754,
+      "learning_rate": 2.2149049685891434e-07,
+      "loss": 0.6401,
+      "step": 69360
+    },
+    {
+      "epoch": 2.7435780814332893,
+      "grad_norm": 1.316766547224836,
+      "learning_rate": 2.2081373685129194e-07,
+      "loss": 0.6273,
+      "step": 69370
+    },
+    {
+      "epoch": 2.7439735806521783,
+      "grad_norm": 1.2823164663220994,
+      "learning_rate": 2.2013798899344073e-07,
+      "loss": 0.6741,
+      "step": 69380
+    },
+    {
+      "epoch": 2.7443690798710674,
+      "grad_norm": 1.3709664460718767,
+      "learning_rate": 2.194632534284752e-07,
+      "loss": 0.6275,
+      "step": 69390
+    },
+    {
+      "epoch": 2.7447645790899564,
+      "grad_norm": 1.2116639866310164,
+      "learning_rate": 2.1878953029929094e-07,
+      "loss": 0.6598,
+      "step": 69400
+    },
+    {
+      "epoch": 2.7451600783088455,
+      "grad_norm": 1.5408849494847299,
+      "learning_rate": 2.181168197485717e-07,
+      "loss": 0.6452,
+      "step": 69410
+    },
+    {
+      "epoch": 2.7455555775277345,
+      "grad_norm": 1.5225328335156032,
+      "learning_rate": 2.1744512191878565e-07,
+      "loss": 0.651,
+      "step": 69420
+    },
+    {
+      "epoch": 2.7459510767466235,
+      "grad_norm": 1.3874667113624823,
+      "learning_rate": 2.1677443695218735e-07,
+      "loss": 0.6411,
+      "step": 69430
+    },
+    {
+      "epoch": 2.7463465759655126,
+      "grad_norm": 1.691442001775994,
+      "learning_rate": 2.1610476499081656e-07,
+      "loss": 0.6464,
+      "step": 69440
+    },
+    {
+      "epoch": 2.7467420751844016,
+      "grad_norm": 1.6784529503396555,
+      "learning_rate": 2.1543610617649812e-07,
+      "loss": 0.6622,
+      "step": 69450
+    },
+    {
+      "epoch": 2.7471375744032906,
+      "grad_norm": 1.503828137536102,
+      "learning_rate": 2.1476846065084157e-07,
+      "loss": 0.6581,
+      "step": 69460
+    },
+    {
+      "epoch": 2.7475330736221797,
+      "grad_norm": 1.4900429380242568,
+      "learning_rate": 2.1410182855524376e-07,
+      "loss": 0.6534,
+      "step": 69470
+    },
+    {
+      "epoch": 2.7479285728410687,
+      "grad_norm": 1.7257924432736504,
+      "learning_rate": 2.134362100308862e-07,
+      "loss": 0.5996,
+      "step": 69480
+    },
+    {
+      "epoch": 2.7483240720599578,
+      "grad_norm": 1.38035700512195,
+      "learning_rate": 2.1277160521873452e-07,
+      "loss": 0.6223,
+      "step": 69490
+    },
+    {
+      "epoch": 2.748719571278847,
+      "grad_norm": 1.5150352066029071,
+      "learning_rate": 2.1210801425954165e-07,
+      "loss": 0.647,
+      "step": 69500
+    },
+    {
+      "epoch": 2.749115070497736,
+      "grad_norm": 1.2806491208746578,
+      "learning_rate": 2.1144543729384348e-07,
+      "loss": 0.6299,
+      "step": 69510
+    },
+    {
+      "epoch": 2.749510569716625,
+      "grad_norm": 1.931191023765883,
+      "learning_rate": 2.1078387446196392e-07,
+      "loss": 0.65,
+      "step": 69520
+    },
+    {
+      "epoch": 2.749906068935514,
+      "grad_norm": 1.4297792979739687,
+      "learning_rate": 2.101233259040092e-07,
+      "loss": 0.6547,
+      "step": 69530
+    },
+    {
+      "epoch": 2.750301568154403,
+      "grad_norm": 1.8976901558890449,
+      "learning_rate": 2.094637917598741e-07,
+      "loss": 0.6255,
+      "step": 69540
+    },
+    {
+      "epoch": 2.750697067373292,
+      "grad_norm": 1.454416603524465,
+      "learning_rate": 2.0880527216923462e-07,
+      "loss": 0.6141,
+      "step": 69550
+    },
+    {
+      "epoch": 2.751092566592181,
+      "grad_norm": 1.6237589818678877,
+      "learning_rate": 2.081477672715554e-07,
+      "loss": 0.6735,
+      "step": 69560
+    },
+    {
+      "epoch": 2.75148806581107,
+      "grad_norm": 1.5912237571555568,
+      "learning_rate": 2.0749127720608443e-07,
+      "loss": 0.6454,
+      "step": 69570
+    },
+    {
+      "epoch": 2.751883565029959,
+      "grad_norm": 1.5235691770166913,
+      "learning_rate": 2.068358021118544e-07,
+      "loss": 0.6384,
+      "step": 69580
+    },
+    {
+      "epoch": 2.752279064248848,
+      "grad_norm": 1.4196356588930572,
+      "learning_rate": 2.0618134212768371e-07,
+      "loss": 0.6439,
+      "step": 69590
+    },
+    {
+      "epoch": 2.752674563467737,
+      "grad_norm": 1.396346920875002,
+      "learning_rate": 2.055278973921765e-07,
+      "loss": 0.5926,
+      "step": 69600
+    },
+    {
+      "epoch": 2.7530700626866262,
+      "grad_norm": 1.3020287675495448,
+      "learning_rate": 2.048754680437204e-07,
+      "loss": 0.6271,
+      "step": 69610
+    },
+    {
+      "epoch": 2.7534655619055153,
+      "grad_norm": 1.644638699820278,
+      "learning_rate": 2.0422405422048985e-07,
+      "loss": 0.6654,
+      "step": 69620
+    },
+    {
+      "epoch": 2.7538610611244043,
+      "grad_norm": 1.453576215780983,
+      "learning_rate": 2.0357365606044177e-07,
+      "loss": 0.6833,
+      "step": 69630
+    },
+    {
+      "epoch": 2.7542565603432934,
+      "grad_norm": 1.3471039447623419,
+      "learning_rate": 2.0292427370132095e-07,
+      "loss": 0.6528,
+      "step": 69640
+    },
+    {
+      "epoch": 2.7546520595621824,
+      "grad_norm": 1.3397498760496365,
+      "learning_rate": 2.022759072806535e-07,
+      "loss": 0.6443,
+      "step": 69650
+    },
+    {
+      "epoch": 2.7550475587810714,
+      "grad_norm": 1.389448340434731,
+      "learning_rate": 2.01628556935754e-07,
+      "loss": 0.6575,
+      "step": 69660
+    },
+    {
+      "epoch": 2.7554430579999605,
+      "grad_norm": 1.3721106493078639,
+      "learning_rate": 2.0098222280372003e-07,
+      "loss": 0.6655,
+      "step": 69670
+    },
+    {
+      "epoch": 2.7558385572188495,
+      "grad_norm": 1.530698837259324,
+      "learning_rate": 2.0033690502143266e-07,
+      "loss": 0.643,
+      "step": 69680
+    },
+    {
+      "epoch": 2.7562340564377386,
+      "grad_norm": 1.3278282492733717,
+      "learning_rate": 1.9969260372556033e-07,
+      "loss": 0.6307,
+      "step": 69690
+    },
+    {
+      "epoch": 2.7566295556566276,
+      "grad_norm": 1.501316534193504,
+      "learning_rate": 1.9904931905255553e-07,
+      "loss": 0.6191,
+      "step": 69700
+    },
+    {
+      "epoch": 2.7570250548755166,
+      "grad_norm": 1.367879676115424,
+      "learning_rate": 1.9840705113865322e-07,
+      "loss": 0.627,
+      "step": 69710
+    },
+    {
+      "epoch": 2.7574205540944057,
+      "grad_norm": 1.3133081204376638,
+      "learning_rate": 1.9776580011987566e-07,
+      "loss": 0.6197,
+      "step": 69720
+    },
+    {
+      "epoch": 2.7578160533132947,
+      "grad_norm": 1.4628042349087949,
+      "learning_rate": 1.9712556613202928e-07,
+      "loss": 0.6433,
+      "step": 69730
+    },
+    {
+      "epoch": 2.7582115525321838,
+      "grad_norm": 1.5779612026261522,
+      "learning_rate": 1.9648634931070386e-07,
+      "loss": 0.6288,
+      "step": 69740
+    },
+    {
+      "epoch": 2.758607051751073,
+      "grad_norm": 1.556417617607726,
+      "learning_rate": 1.9584814979127563e-07,
+      "loss": 0.6898,
+      "step": 69750
+    },
+    {
+      "epoch": 2.759002550969962,
+      "grad_norm": 1.5612831206343956,
+      "learning_rate": 1.9521096770890257e-07,
+      "loss": 0.6688,
+      "step": 69760
+    },
+    {
+      "epoch": 2.759398050188851,
+      "grad_norm": 1.3764924788728663,
+      "learning_rate": 1.9457480319853116e-07,
+      "loss": 0.6233,
+      "step": 69770
+    },
+    {
+      "epoch": 2.75979354940774,
+      "grad_norm": 1.351673060665852,
+      "learning_rate": 1.939396563948881e-07,
+      "loss": 0.6579,
+      "step": 69780
+    },
+    {
+      "epoch": 2.760189048626629,
+      "grad_norm": 1.4287970695025112,
+      "learning_rate": 1.9330552743248798e-07,
+      "loss": 0.6762,
+      "step": 69790
+    },
+    {
+      "epoch": 2.760584547845518,
+      "grad_norm": 1.5326256345661453,
+      "learning_rate": 1.9267241644562728e-07,
+      "loss": 0.6562,
+      "step": 69800
+    },
+    {
+      "epoch": 2.760980047064407,
+      "grad_norm": 1.2716691150426762,
+      "learning_rate": 1.920403235683893e-07,
+      "loss": 0.6495,
+      "step": 69810
+    },
+    {
+      "epoch": 2.761375546283296,
+      "grad_norm": 1.629671043633906,
+      "learning_rate": 1.914092489346403e-07,
+      "loss": 0.6476,
+      "step": 69820
+    },
+    {
+      "epoch": 2.761771045502185,
+      "grad_norm": 1.5945873954823444,
+      "learning_rate": 1.9077919267803058e-07,
+      "loss": 0.6204,
+      "step": 69830
+    },
+    {
+      "epoch": 2.762166544721074,
+      "grad_norm": 1.627265891732816,
+      "learning_rate": 1.901501549319945e-07,
+      "loss": 0.6542,
+      "step": 69840
+    },
+    {
+      "epoch": 2.762562043939963,
+      "grad_norm": 1.4076824150040739,
+      "learning_rate": 1.8952213582975332e-07,
+      "loss": 0.6555,
+      "step": 69850
+    },
+    {
+      "epoch": 2.7629575431588522,
+      "grad_norm": 1.6614197374612025,
+      "learning_rate": 1.8889513550430892e-07,
+      "loss": 0.6668,
+      "step": 69860
+    },
+    {
+      "epoch": 2.7633530423777413,
+      "grad_norm": 1.348660418248994,
+      "learning_rate": 1.882691540884507e-07,
+      "loss": 0.6452,
+      "step": 69870
+    },
+    {
+      "epoch": 2.7637485415966303,
+      "grad_norm": 1.2861102602800623,
+      "learning_rate": 1.8764419171474923e-07,
+      "loss": 0.6251,
+      "step": 69880
+    },
+    {
+      "epoch": 2.7641440408155193,
+      "grad_norm": 1.2752208612507339,
+      "learning_rate": 1.8702024851556255e-07,
+      "loss": 0.6337,
+      "step": 69890
+    },
+    {
+      "epoch": 2.7645395400344084,
+      "grad_norm": 1.3812320918730367,
+      "learning_rate": 1.8639732462303051e-07,
+      "loss": 0.6414,
+      "step": 69900
+    },
+    {
+      "epoch": 2.7649350392532974,
+      "grad_norm": 1.5308342141209106,
+      "learning_rate": 1.85775420169077e-07,
+      "loss": 0.6614,
+      "step": 69910
+    },
+    {
+      "epoch": 2.7653305384721865,
+      "grad_norm": 1.2806709986852391,
+      "learning_rate": 1.8515453528541172e-07,
+      "loss": 0.6506,
+      "step": 69920
+    },
+    {
+      "epoch": 2.7657260376910755,
+      "grad_norm": 1.3895428090263962,
+      "learning_rate": 1.8453467010352667e-07,
+      "loss": 0.6299,
+      "step": 69930
+    },
+    {
+      "epoch": 2.7661215369099645,
+      "grad_norm": 1.42698733467093,
+      "learning_rate": 1.839158247546996e-07,
+      "loss": 0.6463,
+      "step": 69940
+    },
+    {
+      "epoch": 2.7665170361288536,
+      "grad_norm": 1.3367842849416025,
+      "learning_rate": 1.8329799936999072e-07,
+      "loss": 0.6443,
+      "step": 69950
+    },
+    {
+      "epoch": 2.7669125353477426,
+      "grad_norm": 1.7551605377211708,
+      "learning_rate": 1.8268119408024476e-07,
+      "loss": 0.6077,
+      "step": 69960
+    },
+    {
+      "epoch": 2.7673080345666317,
+      "grad_norm": 1.5393491753605335,
+      "learning_rate": 1.8206540901609004e-07,
+      "loss": 0.6313,
+      "step": 69970
+    },
+    {
+      "epoch": 2.7677035337855207,
+      "grad_norm": 1.2870384692928958,
+      "learning_rate": 1.8145064430794058e-07,
+      "loss": 0.6516,
+      "step": 69980
+    },
+    {
+      "epoch": 2.7680990330044097,
+      "grad_norm": 1.5348966340685912,
+      "learning_rate": 1.8083690008599163e-07,
+      "loss": 0.6725,
+      "step": 69990
+    },
+    {
+      "epoch": 2.7684945322232988,
+      "grad_norm": 1.2976751426033901,
+      "learning_rate": 1.802241764802254e-07,
+      "loss": 0.6599,
+      "step": 70000
+    },
+    {
+      "epoch": 2.768890031442188,
+      "grad_norm": 1.2988854684535658,
+      "learning_rate": 1.7961247362040468e-07,
+      "loss": 0.6512,
+      "step": 70010
+    },
+    {
+      "epoch": 2.769285530661077,
+      "grad_norm": 1.32835481484248,
+      "learning_rate": 1.7900179163607866e-07,
+      "loss": 0.6453,
+      "step": 70020
+    },
+    {
+      "epoch": 2.769681029879966,
+      "grad_norm": 1.6783409781445278,
+      "learning_rate": 1.7839213065657835e-07,
+      "loss": 0.6353,
+      "step": 70030
+    },
+    {
+      "epoch": 2.770076529098855,
+      "grad_norm": 1.3247836198190301,
+      "learning_rate": 1.7778349081102042e-07,
+      "loss": 0.6365,
+      "step": 70040
+    },
+    {
+      "epoch": 2.770472028317744,
+      "grad_norm": 1.4445303567851318,
+      "learning_rate": 1.77175872228304e-07,
+      "loss": 0.6327,
+      "step": 70050
+    },
+    {
+      "epoch": 2.770867527536633,
+      "grad_norm": 1.2509364936379517,
+      "learning_rate": 1.7656927503711284e-07,
+      "loss": 0.6235,
+      "step": 70060
+    },
+    {
+      "epoch": 2.771263026755522,
+      "grad_norm": 1.4464360294031313,
+      "learning_rate": 1.759636993659125e-07,
+      "loss": 0.6625,
+      "step": 70070
+    },
+    {
+      "epoch": 2.771658525974411,
+      "grad_norm": 1.3427423811400483,
+      "learning_rate": 1.7535914534295485e-07,
+      "loss": 0.6301,
+      "step": 70080
+    },
+    {
+      "epoch": 2.7720540251933,
+      "grad_norm": 1.639592752745467,
+      "learning_rate": 1.7475561309627298e-07,
+      "loss": 0.6448,
+      "step": 70090
+    },
+    {
+      "epoch": 2.772449524412189,
+      "grad_norm": 1.359407536333506,
+      "learning_rate": 1.741531027536858e-07,
+      "loss": 0.6528,
+      "step": 70100
+    },
+    {
+      "epoch": 2.772845023631078,
+      "grad_norm": 1.316898559916085,
+      "learning_rate": 1.7355161444279346e-07,
+      "loss": 0.6831,
+      "step": 70110
+    },
+    {
+      "epoch": 2.7732405228499672,
+      "grad_norm": 1.5151070387663423,
+      "learning_rate": 1.7295114829098237e-07,
+      "loss": 0.6108,
+      "step": 70120
+    },
+    {
+      "epoch": 2.7736360220688563,
+      "grad_norm": 1.6430217099381406,
+      "learning_rate": 1.723517044254197e-07,
+      "loss": 0.6521,
+      "step": 70130
+    },
+    {
+      "epoch": 2.7740315212877453,
+      "grad_norm": 1.2411080704034883,
+      "learning_rate": 1.7175328297305782e-07,
+      "loss": 0.6345,
+      "step": 70140
+    },
+    {
+      "epoch": 2.7744270205066344,
+      "grad_norm": 1.5426078940055101,
+      "learning_rate": 1.7115588406063255e-07,
+      "loss": 0.6668,
+      "step": 70150
+    },
+    {
+      "epoch": 2.7748225197255234,
+      "grad_norm": 1.627580513848178,
+      "learning_rate": 1.7055950781466157e-07,
+      "loss": 0.6261,
+      "step": 70160
+    },
+    {
+      "epoch": 2.7752180189444124,
+      "grad_norm": 1.7985545682125468,
+      "learning_rate": 1.6996415436144887e-07,
+      "loss": 0.6172,
+      "step": 70170
+    },
+    {
+      "epoch": 2.7756135181633015,
+      "grad_norm": 1.6044468180595712,
+      "learning_rate": 1.6936982382707855e-07,
+      "loss": 0.6525,
+      "step": 70180
+    },
+    {
+      "epoch": 2.7760090173821905,
+      "grad_norm": 1.5822653965454174,
+      "learning_rate": 1.687765163374211e-07,
+      "loss": 0.6367,
+      "step": 70190
+    },
+    {
+      "epoch": 2.7764045166010796,
+      "grad_norm": 1.285159732438064,
+      "learning_rate": 1.6818423201812705e-07,
+      "loss": 0.6565,
+      "step": 70200
+    },
+    {
+      "epoch": 2.7768000158199686,
+      "grad_norm": 1.3308012964646518,
+      "learning_rate": 1.675929709946339e-07,
+      "loss": 0.6206,
+      "step": 70210
+    },
+    {
+      "epoch": 2.7771955150388576,
+      "grad_norm": 1.5697358656515874,
+      "learning_rate": 1.6700273339215922e-07,
+      "loss": 0.6302,
+      "step": 70220
+    },
+    {
+      "epoch": 2.7775910142577467,
+      "grad_norm": 1.5741341975123648,
+      "learning_rate": 1.664135193357058e-07,
+      "loss": 0.6657,
+      "step": 70230
+    },
+    {
+      "epoch": 2.7779865134766357,
+      "grad_norm": 1.6300738170318227,
+      "learning_rate": 1.6582532895005888e-07,
+      "loss": 0.6337,
+      "step": 70240
+    },
+    {
+      "epoch": 2.7783820126955248,
+      "grad_norm": 1.386117346121053,
+      "learning_rate": 1.6523816235978817e-07,
+      "loss": 0.6603,
+      "step": 70250
+    },
+    {
+      "epoch": 2.778777511914414,
+      "grad_norm": 1.3896923700628134,
+      "learning_rate": 1.646520196892437e-07,
+      "loss": 0.6435,
+      "step": 70260
+    },
+    {
+      "epoch": 2.7791730111333033,
+      "grad_norm": 1.49885510815463,
+      "learning_rate": 1.6406690106256174e-07,
+      "loss": 0.6458,
+      "step": 70270
+    },
+    {
+      "epoch": 2.779568510352192,
+      "grad_norm": 1.4641701806077778,
+      "learning_rate": 1.6348280660365978e-07,
+      "loss": 0.6476,
+      "step": 70280
+    },
+    {
+      "epoch": 2.7799640095710814,
+      "grad_norm": 1.565453130641798,
+      "learning_rate": 1.6289973643623947e-07,
+      "loss": 0.618,
+      "step": 70290
+    },
+    {
+      "epoch": 2.78035950878997,
+      "grad_norm": 1.4376018413566471,
+      "learning_rate": 1.623176906837848e-07,
+      "loss": 0.6301,
+      "step": 70300
+    },
+    {
+      "epoch": 2.7807550080088594,
+      "grad_norm": 1.25605809644225,
+      "learning_rate": 1.6173666946956378e-07,
+      "loss": 0.6385,
+      "step": 70310
+    },
+    {
+      "epoch": 2.781150507227748,
+      "grad_norm": 1.3395555696135488,
+      "learning_rate": 1.611566729166253e-07,
+      "loss": 0.6392,
+      "step": 70320
+    },
+    {
+      "epoch": 2.7815460064466375,
+      "grad_norm": 1.4924151482989823,
+      "learning_rate": 1.605777011478038e-07,
+      "loss": 0.6438,
+      "step": 70330
+    },
+    {
+      "epoch": 2.781941505665526,
+      "grad_norm": 1.4345607053485638,
+      "learning_rate": 1.5999975428571513e-07,
+      "loss": 0.6113,
+      "step": 70340
+    },
+    {
+      "epoch": 2.7823370048844156,
+      "grad_norm": 1.676604636784896,
+      "learning_rate": 1.5942283245275913e-07,
+      "loss": 0.642,
+      "step": 70350
+    },
+    {
+      "epoch": 2.782732504103304,
+      "grad_norm": 1.5528653306400786,
+      "learning_rate": 1.5884693577111698e-07,
+      "loss": 0.6533,
+      "step": 70360
+    },
+    {
+      "epoch": 2.7831280033221937,
+      "grad_norm": 1.4631525480539687,
+      "learning_rate": 1.5827206436275443e-07,
+      "loss": 0.6514,
+      "step": 70370
+    },
+    {
+      "epoch": 2.7835235025410823,
+      "grad_norm": 1.5734008953544165,
+      "learning_rate": 1.5769821834941968e-07,
+      "loss": 0.6043,
+      "step": 70380
+    },
+    {
+      "epoch": 2.7839190017599718,
+      "grad_norm": 1.294508863775289,
+      "learning_rate": 1.571253978526427e-07,
+      "loss": 0.6511,
+      "step": 70390
+    },
+    {
+      "epoch": 2.7843145009788604,
+      "grad_norm": 1.57276638787535,
+      "learning_rate": 1.5655360299373757e-07,
+      "loss": 0.6454,
+      "step": 70400
+    },
+    {
+      "epoch": 2.78471000019775,
+      "grad_norm": 1.3818690470423782,
+      "learning_rate": 1.5598283389379965e-07,
+      "loss": 0.6531,
+      "step": 70410
+    },
+    {
+      "epoch": 2.7851054994166384,
+      "grad_norm": 1.1968357075455516,
+      "learning_rate": 1.5541309067371002e-07,
+      "loss": 0.6573,
+      "step": 70420
+    },
+    {
+      "epoch": 2.785500998635528,
+      "grad_norm": 1.4734324030449588,
+      "learning_rate": 1.548443734541294e-07,
+      "loss": 0.6251,
+      "step": 70430
+    },
+    {
+      "epoch": 2.7858964978544165,
+      "grad_norm": 1.6733311382296236,
+      "learning_rate": 1.5427668235550196e-07,
+      "loss": 0.6193,
+      "step": 70440
+    },
+    {
+      "epoch": 2.786291997073306,
+      "grad_norm": 1.3773379777397299,
+      "learning_rate": 1.5371001749805492e-07,
+      "loss": 0.6307,
+      "step": 70450
+    },
+    {
+      "epoch": 2.7866874962921946,
+      "grad_norm": 1.4497533763907762,
+      "learning_rate": 1.5314437900179945e-07,
+      "loss": 0.6689,
+      "step": 70460
+    },
+    {
+      "epoch": 2.787082995511084,
+      "grad_norm": 1.74263901083047,
+      "learning_rate": 1.5257976698652644e-07,
+      "loss": 0.6346,
+      "step": 70470
+    },
+    {
+      "epoch": 2.7874784947299727,
+      "grad_norm": 1.4079424098713957,
+      "learning_rate": 1.520161815718124e-07,
+      "loss": 0.6447,
+      "step": 70480
+    },
+    {
+      "epoch": 2.787873993948862,
+      "grad_norm": 1.4718817217638045,
+      "learning_rate": 1.5145362287701416e-07,
+      "loss": 0.6744,
+      "step": 70490
+    },
+    {
+      "epoch": 2.7882694931677507,
+      "grad_norm": 1.579376219566021,
+      "learning_rate": 1.50892091021273e-07,
+      "loss": 0.6518,
+      "step": 70500
+    },
+    {
+      "epoch": 2.7886649923866402,
+      "grad_norm": 1.5148739392164345,
+      "learning_rate": 1.5033158612350996e-07,
+      "loss": 0.6819,
+      "step": 70510
+    },
+    {
+      "epoch": 2.789060491605529,
+      "grad_norm": 1.4138321887912344,
+      "learning_rate": 1.4977210830243282e-07,
+      "loss": 0.6234,
+      "step": 70520
+    },
+    {
+      "epoch": 2.7894559908244183,
+      "grad_norm": 1.3767362246811161,
+      "learning_rate": 1.4921365767652741e-07,
+      "loss": 0.6458,
+      "step": 70530
+    },
+    {
+      "epoch": 2.789851490043307,
+      "grad_norm": 1.2651323005352821,
+      "learning_rate": 1.4865623436406517e-07,
+      "loss": 0.6643,
+      "step": 70540
+    },
+    {
+      "epoch": 2.7902469892621964,
+      "grad_norm": 1.54661973112921,
+      "learning_rate": 1.4809983848309783e-07,
+      "loss": 0.6151,
+      "step": 70550
+    },
+    {
+      "epoch": 2.790642488481085,
+      "grad_norm": 1.5729891372496556,
+      "learning_rate": 1.4754447015146167e-07,
+      "loss": 0.6439,
+      "step": 70560
+    },
+    {
+      "epoch": 2.7910379876999745,
+      "grad_norm": 1.4062440147445077,
+      "learning_rate": 1.4699012948677317e-07,
+      "loss": 0.6208,
+      "step": 70570
+    },
+    {
+      "epoch": 2.791433486918863,
+      "grad_norm": 1.2542659186910414,
+      "learning_rate": 1.4643681660643228e-07,
+      "loss": 0.6617,
+      "step": 70580
+    },
+    {
+      "epoch": 2.7918289861377525,
+      "grad_norm": 1.63192997647672,
+      "learning_rate": 1.458845316276214e-07,
+      "loss": 0.6362,
+      "step": 70590
+    },
+    {
+      "epoch": 2.7922244853566416,
+      "grad_norm": 1.678368655790032,
+      "learning_rate": 1.4533327466730585e-07,
+      "loss": 0.6628,
+      "step": 70600
+    },
+    {
+      "epoch": 2.7926199845755306,
+      "grad_norm": 1.6681442093536465,
+      "learning_rate": 1.4478304584223168e-07,
+      "loss": 0.6522,
+      "step": 70610
+    },
+    {
+      "epoch": 2.7930154837944197,
+      "grad_norm": 1.5728678176256543,
+      "learning_rate": 1.442338452689268e-07,
+      "loss": 0.6477,
+      "step": 70620
+    },
+    {
+      "epoch": 2.7934109830133087,
+      "grad_norm": 1.4940185489575846,
+      "learning_rate": 1.4368567306370486e-07,
+      "loss": 0.6628,
+      "step": 70630
+    },
+    {
+      "epoch": 2.7938064822321977,
+      "grad_norm": 1.3653235148324767,
+      "learning_rate": 1.431385293426568e-07,
+      "loss": 0.6243,
+      "step": 70640
+    },
+    {
+      "epoch": 2.794201981451087,
+      "grad_norm": 1.2546474939810106,
+      "learning_rate": 1.4259241422166058e-07,
+      "loss": 0.6425,
+      "step": 70650
+    },
+    {
+      "epoch": 2.794597480669976,
+      "grad_norm": 1.4349327109309442,
+      "learning_rate": 1.4204732781637255e-07,
+      "loss": 0.6484,
+      "step": 70660
+    },
+    {
+      "epoch": 2.794992979888865,
+      "grad_norm": 1.3787818241105554,
+      "learning_rate": 1.415032702422331e-07,
+      "loss": 0.652,
+      "step": 70670
+    },
+    {
+      "epoch": 2.795388479107754,
+      "grad_norm": 1.633678026200489,
+      "learning_rate": 1.4096024161446453e-07,
+      "loss": 0.6367,
+      "step": 70680
+    },
+    {
+      "epoch": 2.795783978326643,
+      "grad_norm": 1.548539279796999,
+      "learning_rate": 1.4041824204807098e-07,
+      "loss": 0.6514,
+      "step": 70690
+    },
+    {
+      "epoch": 2.796179477545532,
+      "grad_norm": 1.9867896044037945,
+      "learning_rate": 1.3987727165783782e-07,
+      "loss": 0.6117,
+      "step": 70700
+    },
+    {
+      "epoch": 2.796574976764421,
+      "grad_norm": 1.4428548669428978,
+      "learning_rate": 1.3933733055833453e-07,
+      "loss": 0.6561,
+      "step": 70710
+    },
+    {
+      "epoch": 2.79697047598331,
+      "grad_norm": 1.555012078990304,
+      "learning_rate": 1.3879841886391077e-07,
+      "loss": 0.6187,
+      "step": 70720
+    },
+    {
+      "epoch": 2.797365975202199,
+      "grad_norm": 1.4561341297622712,
+      "learning_rate": 1.3826053668869854e-07,
+      "loss": 0.6682,
+      "step": 70730
+    },
+    {
+      "epoch": 2.797761474421088,
+      "grad_norm": 1.3523860626914401,
+      "learning_rate": 1.377236841466123e-07,
+      "loss": 0.6529,
+      "step": 70740
+    },
+    {
+      "epoch": 2.798156973639977,
+      "grad_norm": 1.6296871689652246,
+      "learning_rate": 1.3718786135134887e-07,
+      "loss": 0.589,
+      "step": 70750
+    },
+    {
+      "epoch": 2.798552472858866,
+      "grad_norm": 1.3072960671335296,
+      "learning_rate": 1.3665306841638526e-07,
+      "loss": 0.6695,
+      "step": 70760
+    },
+    {
+      "epoch": 2.7989479720777553,
+      "grad_norm": 1.252329441611291,
+      "learning_rate": 1.3611930545498253e-07,
+      "loss": 0.6504,
+      "step": 70770
+    },
+    {
+      "epoch": 2.7993434712966443,
+      "grad_norm": 1.4252708063986979,
+      "learning_rate": 1.3558657258018193e-07,
+      "loss": 0.6652,
+      "step": 70780
+    },
+    {
+      "epoch": 2.7997389705155333,
+      "grad_norm": 1.395769794154177,
+      "learning_rate": 1.350548699048071e-07,
+      "loss": 0.6434,
+      "step": 70790
+    },
+    {
+      "epoch": 2.8001344697344224,
+      "grad_norm": 1.588739536643672,
+      "learning_rate": 1.345241975414635e-07,
+      "loss": 0.6528,
+      "step": 70800
+    },
+    {
+      "epoch": 2.8005299689533114,
+      "grad_norm": 1.2582128132655923,
+      "learning_rate": 1.3399455560253903e-07,
+      "loss": 0.663,
+      "step": 70810
+    },
+    {
+      "epoch": 2.8009254681722004,
+      "grad_norm": 1.2966416685606534,
+      "learning_rate": 1.3346594420020176e-07,
+      "loss": 0.6595,
+      "step": 70820
+    },
+    {
+      "epoch": 2.8013209673910895,
+      "grad_norm": 1.2699773005541533,
+      "learning_rate": 1.3293836344640321e-07,
+      "loss": 0.6417,
+      "step": 70830
+    },
+    {
+      "epoch": 2.8017164666099785,
+      "grad_norm": 1.283172380779107,
+      "learning_rate": 1.3241181345287624e-07,
+      "loss": 0.6369,
+      "step": 70840
+    },
+    {
+      "epoch": 2.8021119658288676,
+      "grad_norm": 1.72367921616284,
+      "learning_rate": 1.3188629433113443e-07,
+      "loss": 0.6225,
+      "step": 70850
+    },
+    {
+      "epoch": 2.8025074650477566,
+      "grad_norm": 1.2816956811493818,
+      "learning_rate": 1.313618061924743e-07,
+      "loss": 0.6617,
+      "step": 70860
+    },
+    {
+      "epoch": 2.8029029642666456,
+      "grad_norm": 1.338547532837257,
+      "learning_rate": 1.3083834914797255e-07,
+      "loss": 0.6667,
+      "step": 70870
+    },
+    {
+      "epoch": 2.8032984634855347,
+      "grad_norm": 1.7753813131278051,
+      "learning_rate": 1.3031592330848995e-07,
+      "loss": 0.6398,
+      "step": 70880
+    },
+    {
+      "epoch": 2.8036939627044237,
+      "grad_norm": 1.2871159392263742,
+      "learning_rate": 1.2979452878466582e-07,
+      "loss": 0.6153,
+      "step": 70890
+    },
+    {
+      "epoch": 2.8040894619233128,
+      "grad_norm": 1.3586814422551918,
+      "learning_rate": 1.292741656869234e-07,
+      "loss": 0.6187,
+      "step": 70900
+    },
+    {
+      "epoch": 2.804484961142202,
+      "grad_norm": 1.2812685003902786,
+      "learning_rate": 1.287548341254663e-07,
+      "loss": 0.6718,
+      "step": 70910
+    },
+    {
+      "epoch": 2.804880460361091,
+      "grad_norm": 1.4752136792868573,
+      "learning_rate": 1.2823653421028092e-07,
+      "loss": 0.6014,
+      "step": 70920
+    },
+    {
+      "epoch": 2.80527595957998,
+      "grad_norm": 1.7100484199761878,
+      "learning_rate": 1.2771926605113283e-07,
+      "loss": 0.6408,
+      "step": 70930
+    },
+    {
+      "epoch": 2.805671458798869,
+      "grad_norm": 1.503382808450596,
+      "learning_rate": 1.2720302975757214e-07,
+      "loss": 0.6408,
+      "step": 70940
+    },
+    {
+      "epoch": 2.806066958017758,
+      "grad_norm": 1.6564643609656255,
+      "learning_rate": 1.26687825438927e-07,
+      "loss": 0.6475,
+      "step": 70950
+    },
+    {
+      "epoch": 2.806462457236647,
+      "grad_norm": 1.4523722470610614,
+      "learning_rate": 1.2617365320431063e-07,
+      "loss": 0.61,
+      "step": 70960
+    },
+    {
+      "epoch": 2.806857956455536,
+      "grad_norm": 1.8597013290015023,
+      "learning_rate": 1.2566051316261485e-07,
+      "loss": 0.6691,
+      "step": 70970
+    },
+    {
+      "epoch": 2.807253455674425,
+      "grad_norm": 1.560784573235987,
+      "learning_rate": 1.2514840542251495e-07,
+      "loss": 0.6178,
+      "step": 70980
+    },
+    {
+      "epoch": 2.807648954893314,
+      "grad_norm": 1.5515378282283863,
+      "learning_rate": 1.246373300924647e-07,
+      "loss": 0.6258,
+      "step": 70990
+    },
+    {
+      "epoch": 2.808044454112203,
+      "grad_norm": 1.499857883745644,
+      "learning_rate": 1.2412728728070367e-07,
+      "loss": 0.637,
+      "step": 71000
+    },
+    {
+      "epoch": 2.808439953331092,
+      "grad_norm": 1.729513914812002,
+      "learning_rate": 1.2361827709524765e-07,
+      "loss": 0.6154,
+      "step": 71010
+    },
+    {
+      "epoch": 2.8088354525499812,
+      "grad_norm": 1.4987849422891406,
+      "learning_rate": 1.231102996438982e-07,
+      "loss": 0.5912,
+      "step": 71020
+    },
+    {
+      "epoch": 2.8092309517688703,
+      "grad_norm": 1.3519845243946256,
+      "learning_rate": 1.2260335503423537e-07,
+      "loss": 0.6385,
+      "step": 71030
+    },
+    {
+      "epoch": 2.8096264509877593,
+      "grad_norm": 1.3482338424726288,
+      "learning_rate": 1.220974433736216e-07,
+      "loss": 0.6107,
+      "step": 71040
+    },
+    {
+      "epoch": 2.8100219502066484,
+      "grad_norm": 1.3356539631697795,
+      "learning_rate": 1.2159256476919957e-07,
+      "loss": 0.6527,
+      "step": 71050
+    },
+    {
+      "epoch": 2.8104174494255374,
+      "grad_norm": 1.379259505253153,
+      "learning_rate": 1.2108871932789534e-07,
+      "loss": 0.6691,
+      "step": 71060
+    },
+    {
+      "epoch": 2.8108129486444264,
+      "grad_norm": 1.7146532877745266,
+      "learning_rate": 1.205859071564136e-07,
+      "loss": 0.6463,
+      "step": 71070
+    },
+    {
+      "epoch": 2.8112084478633155,
+      "grad_norm": 1.533848926021102,
+      "learning_rate": 1.2008412836124139e-07,
+      "loss": 0.6026,
+      "step": 71080
+    },
+    {
+      "epoch": 2.8116039470822045,
+      "grad_norm": 1.3335394683551018,
+      "learning_rate": 1.1958338304864704e-07,
+      "loss": 0.651,
+      "step": 71090
+    },
+    {
+      "epoch": 2.8119994463010936,
+      "grad_norm": 1.797687976638747,
+      "learning_rate": 1.1908367132468012e-07,
+      "loss": 0.65,
+      "step": 71100
+    },
+    {
+      "epoch": 2.8123949455199826,
+      "grad_norm": 1.3262616638829634,
+      "learning_rate": 1.1858499329517104e-07,
+      "loss": 0.6716,
+      "step": 71110
+    },
+    {
+      "epoch": 2.8127904447388716,
+      "grad_norm": 1.7733830450989985,
+      "learning_rate": 1.1808734906573083e-07,
+      "loss": 0.6092,
+      "step": 71120
+    },
+    {
+      "epoch": 2.8131859439577607,
+      "grad_norm": 1.4960562923521619,
+      "learning_rate": 1.1759073874175242e-07,
+      "loss": 0.6072,
+      "step": 71130
+    },
+    {
+      "epoch": 2.8135814431766497,
+      "grad_norm": 1.400976180118364,
+      "learning_rate": 1.170951624284089e-07,
+      "loss": 0.679,
+      "step": 71140
+    },
+    {
+      "epoch": 2.8139769423955387,
+      "grad_norm": 1.455967690331034,
+      "learning_rate": 1.1660062023065521e-07,
+      "loss": 0.6223,
+      "step": 71150
+    },
+    {
+      "epoch": 2.814372441614428,
+      "grad_norm": 1.4461566745830368,
+      "learning_rate": 1.1610711225322702e-07,
+      "loss": 0.6312,
+      "step": 71160
+    },
+    {
+      "epoch": 2.814767940833317,
+      "grad_norm": 1.3091359959437165,
+      "learning_rate": 1.1561463860064126e-07,
+      "loss": 0.6698,
+      "step": 71170
+    },
+    {
+      "epoch": 2.815163440052206,
+      "grad_norm": 1.7632193077363547,
+      "learning_rate": 1.151231993771934e-07,
+      "loss": 0.6276,
+      "step": 71180
+    },
+    {
+      "epoch": 2.815558939271095,
+      "grad_norm": 1.3683489118262935,
+      "learning_rate": 1.1463279468696464e-07,
+      "loss": 0.634,
+      "step": 71190
+    },
+    {
+      "epoch": 2.815954438489984,
+      "grad_norm": 1.4772338543995671,
+      "learning_rate": 1.1414342463381189e-07,
+      "loss": 0.665,
+      "step": 71200
+    },
+    {
+      "epoch": 2.816349937708873,
+      "grad_norm": 1.7091646355933343,
+      "learning_rate": 1.1365508932137726e-07,
+      "loss": 0.645,
+      "step": 71210
+    },
+    {
+      "epoch": 2.816745436927762,
+      "grad_norm": 1.3713118845500332,
+      "learning_rate": 1.1316778885308022e-07,
+      "loss": 0.6484,
+      "step": 71220
+    },
+    {
+      "epoch": 2.817140936146651,
+      "grad_norm": 1.6795543988991155,
+      "learning_rate": 1.1268152333212378e-07,
+      "loss": 0.656,
+      "step": 71230
+    },
+    {
+      "epoch": 2.81753643536554,
+      "grad_norm": 1.6196904160666268,
+      "learning_rate": 1.1219629286148948e-07,
+      "loss": 0.6343,
+      "step": 71240
+    },
+    {
+      "epoch": 2.817931934584429,
+      "grad_norm": 1.5980321479095312,
+      "learning_rate": 1.117120975439423e-07,
+      "loss": 0.6637,
+      "step": 71250
+    },
+    {
+      "epoch": 2.818327433803318,
+      "grad_norm": 1.2787969320191077,
+      "learning_rate": 1.1122893748202578e-07,
+      "loss": 0.6488,
+      "step": 71260
+    },
+    {
+      "epoch": 2.8187229330222072,
+      "grad_norm": 1.5814500169646448,
+      "learning_rate": 1.1074681277806476e-07,
+      "loss": 0.6441,
+      "step": 71270
+    },
+    {
+      "epoch": 2.8191184322410963,
+      "grad_norm": 1.6815995049544257,
+      "learning_rate": 1.1026572353416532e-07,
+      "loss": 0.6207,
+      "step": 71280
+    },
+    {
+      "epoch": 2.8195139314599853,
+      "grad_norm": 1.236125210479998,
+      "learning_rate": 1.0978566985221428e-07,
+      "loss": 0.6214,
+      "step": 71290
+    },
+    {
+      "epoch": 2.8199094306788743,
+      "grad_norm": 1.3884029202987864,
+      "learning_rate": 1.0930665183387701e-07,
+      "loss": 0.6369,
+      "step": 71300
+    },
+    {
+      "epoch": 2.8203049298977634,
+      "grad_norm": 1.6981650316123413,
+      "learning_rate": 1.08828669580604e-07,
+      "loss": 0.6346,
+      "step": 71310
+    },
+    {
+      "epoch": 2.8207004291166524,
+      "grad_norm": 1.3093396160459512,
+      "learning_rate": 1.0835172319362153e-07,
+      "loss": 0.6644,
+      "step": 71320
+    },
+    {
+      "epoch": 2.8210959283355415,
+      "grad_norm": 1.4272402044636827,
+      "learning_rate": 1.0787581277393932e-07,
+      "loss": 0.5882,
+      "step": 71330
+    },
+    {
+      "epoch": 2.8214914275544305,
+      "grad_norm": 1.5532684370975687,
+      "learning_rate": 1.0740093842234734e-07,
+      "loss": 0.6195,
+      "step": 71340
+    },
+    {
+      "epoch": 2.8218869267733195,
+      "grad_norm": 1.3198582362346123,
+      "learning_rate": 1.0692710023941566e-07,
+      "loss": 0.6564,
+      "step": 71350
+    },
+    {
+      "epoch": 2.8222824259922086,
+      "grad_norm": 1.7941719683925077,
+      "learning_rate": 1.0645429832549514e-07,
+      "loss": 0.6093,
+      "step": 71360
+    },
+    {
+      "epoch": 2.8226779252110976,
+      "grad_norm": 1.5344986401770568,
+      "learning_rate": 1.0598253278071679e-07,
+      "loss": 0.6497,
+      "step": 71370
+    },
+    {
+      "epoch": 2.8230734244299867,
+      "grad_norm": 1.3474874968039983,
+      "learning_rate": 1.0551180370499348e-07,
+      "loss": 0.6759,
+      "step": 71380
+    },
+    {
+      "epoch": 2.8234689236488757,
+      "grad_norm": 1.3443842174996126,
+      "learning_rate": 1.0504211119801599e-07,
+      "loss": 0.6632,
+      "step": 71390
+    },
+    {
+      "epoch": 2.8238644228677647,
+      "grad_norm": 1.4571229259003577,
+      "learning_rate": 1.0457345535925866e-07,
+      "loss": 0.6473,
+      "step": 71400
+    },
+    {
+      "epoch": 2.8242599220866538,
+      "grad_norm": 1.4397939063178338,
+      "learning_rate": 1.0410583628797377e-07,
+      "loss": 0.6083,
+      "step": 71410
+    },
+    {
+      "epoch": 2.824655421305543,
+      "grad_norm": 1.4478803115383612,
+      "learning_rate": 1.0363925408319597e-07,
+      "loss": 0.6516,
+      "step": 71420
+    },
+    {
+      "epoch": 2.825050920524432,
+      "grad_norm": 1.6263864733819333,
+      "learning_rate": 1.0317370884373789e-07,
+      "loss": 0.6157,
+      "step": 71430
+    },
+    {
+      "epoch": 2.825446419743321,
+      "grad_norm": 1.648136273206422,
+      "learning_rate": 1.0270920066819567e-07,
+      "loss": 0.6413,
+      "step": 71440
+    },
+    {
+      "epoch": 2.82584191896221,
+      "grad_norm": 1.5424322560928423,
+      "learning_rate": 1.0224572965494284e-07,
+      "loss": 0.6658,
+      "step": 71450
+    },
+    {
+      "epoch": 2.826237418181099,
+      "grad_norm": 1.4329446773604395,
+      "learning_rate": 1.0178329590213532e-07,
+      "loss": 0.6333,
+      "step": 71460
+    },
+    {
+      "epoch": 2.826632917399988,
+      "grad_norm": 1.2667116444395536,
+      "learning_rate": 1.0132189950770865e-07,
+      "loss": 0.682,
+      "step": 71470
+    },
+    {
+      "epoch": 2.827028416618877,
+      "grad_norm": 1.3723226841049856,
+      "learning_rate": 1.0086154056937858e-07,
+      "loss": 0.6571,
+      "step": 71480
+    },
+    {
+      "epoch": 2.827423915837766,
+      "grad_norm": 1.4224864145469023,
+      "learning_rate": 1.0040221918464155e-07,
+      "loss": 0.6362,
+      "step": 71490
+    },
+    {
+      "epoch": 2.827819415056655,
+      "grad_norm": 1.5072740557916464,
+      "learning_rate": 9.994393545077308e-08,
+      "loss": 0.6698,
+      "step": 71500
+    },
+    {
+      "epoch": 2.828214914275544,
+      "grad_norm": 1.5421550523979828,
+      "learning_rate": 9.94866894648311e-08,
+      "loss": 0.6513,
+      "step": 71510
+    },
+    {
+      "epoch": 2.828610413494433,
+      "grad_norm": 1.6190451322504884,
+      "learning_rate": 9.90304813236509e-08,
+      "loss": 0.6508,
+      "step": 71520
+    },
+    {
+      "epoch": 2.8290059127133222,
+      "grad_norm": 1.4927239530699181,
+      "learning_rate": 9.857531112385133e-08,
+      "loss": 0.6173,
+      "step": 71530
+    },
+    {
+      "epoch": 2.8294014119322113,
+      "grad_norm": 1.5948860647555736,
+      "learning_rate": 9.81211789618286e-08,
+      "loss": 0.6582,
+      "step": 71540
+    },
+    {
+      "epoch": 2.8297969111511003,
+      "grad_norm": 1.42711083667429,
+      "learning_rate": 9.766808493375968e-08,
+      "loss": 0.6242,
+      "step": 71550
+    },
+    {
+      "epoch": 2.8301924103699894,
+      "grad_norm": 1.393300021615929,
+      "learning_rate": 9.72160291356028e-08,
+      "loss": 0.6473,
+      "step": 71560
+    },
+    {
+      "epoch": 2.8305879095888784,
+      "grad_norm": 1.3943864491392837,
+      "learning_rate": 9.676501166309582e-08,
+      "loss": 0.693,
+      "step": 71570
+    },
+    {
+      "epoch": 2.8309834088077674,
+      "grad_norm": 1.4348801207867234,
+      "learning_rate": 9.631503261175567e-08,
+      "loss": 0.6408,
+      "step": 71580
+    },
+    {
+      "epoch": 2.8313789080266565,
+      "grad_norm": 1.53546313887525,
+      "learning_rate": 9.58660920768817e-08,
+      "loss": 0.647,
+      "step": 71590
+    },
+    {
+      "epoch": 2.831774407245546,
+      "grad_norm": 1.5414501086832788,
+      "learning_rate": 9.541819015355003e-08,
+      "loss": 0.639,
+      "step": 71600
+    },
+    {
+      "epoch": 2.8321699064644346,
+      "grad_norm": 1.4870570239235834,
+      "learning_rate": 9.497132693661981e-08,
+      "loss": 0.6565,
+      "step": 71610
+    },
+    {
+      "epoch": 2.832565405683324,
+      "grad_norm": 1.4479950531786703,
+      "learning_rate": 9.452550252072867e-08,
+      "loss": 0.6463,
+      "step": 71620
+    },
+    {
+      "epoch": 2.8329609049022126,
+      "grad_norm": 1.5534376854482819,
+      "learning_rate": 9.408071700029442e-08,
+      "loss": 0.6548,
+      "step": 71630
+    },
+    {
+      "epoch": 2.833356404121102,
+      "grad_norm": 1.5704144184999533,
+      "learning_rate": 9.363697046951504e-08,
+      "loss": 0.6578,
+      "step": 71640
+    },
+    {
+      "epoch": 2.8337519033399907,
+      "grad_norm": 1.3940005353907636,
+      "learning_rate": 9.319426302236922e-08,
+      "loss": 0.6522,
+      "step": 71650
+    },
+    {
+      "epoch": 2.83414740255888,
+      "grad_norm": 1.3620695261247602,
+      "learning_rate": 9.275259475261366e-08,
+      "loss": 0.6503,
+      "step": 71660
+    },
+    {
+      "epoch": 2.834542901777769,
+      "grad_norm": 1.5659402360075487,
+      "learning_rate": 9.231196575378687e-08,
+      "loss": 0.6114,
+      "step": 71670
+    },
+    {
+      "epoch": 2.8349384009966583,
+      "grad_norm": 1.287793740585784,
+      "learning_rate": 9.187237611920585e-08,
+      "loss": 0.6371,
+      "step": 71680
+    },
+    {
+      "epoch": 2.835333900215547,
+      "grad_norm": 1.3874033632511464,
+      "learning_rate": 9.143382594196948e-08,
+      "loss": 0.6344,
+      "step": 71690
+    },
+    {
+      "epoch": 2.8357293994344364,
+      "grad_norm": 1.4629847629990715,
+      "learning_rate": 9.099631531495346e-08,
+      "loss": 0.636,
+      "step": 71700
+    },
+    {
+      "epoch": 2.836124898653325,
+      "grad_norm": 1.4942618092172686,
+      "learning_rate": 9.055984433081588e-08,
+      "loss": 0.6901,
+      "step": 71710
+    },
+    {
+      "epoch": 2.8365203978722144,
+      "grad_norm": 1.2801768958137962,
+      "learning_rate": 9.012441308199449e-08,
+      "loss": 0.6434,
+      "step": 71720
+    },
+    {
+      "epoch": 2.836915897091103,
+      "grad_norm": 1.6076141619874595,
+      "learning_rate": 8.969002166070496e-08,
+      "loss": 0.6337,
+      "step": 71730
+    },
+    {
+      "epoch": 2.8373113963099925,
+      "grad_norm": 1.2415270630600788,
+      "learning_rate": 8.925667015894479e-08,
+      "loss": 0.655,
+      "step": 71740
+    },
+    {
+      "epoch": 2.837706895528881,
+      "grad_norm": 1.409355282040025,
+      "learning_rate": 8.882435866848948e-08,
+      "loss": 0.6421,
+      "step": 71750
+    },
+    {
+      "epoch": 2.8381023947477706,
+      "grad_norm": 1.5267283857952438,
+      "learning_rate": 8.839308728089635e-08,
+      "loss": 0.6584,
+      "step": 71760
+    },
+    {
+      "epoch": 2.838497893966659,
+      "grad_norm": 1.56125606558126,
+      "learning_rate": 8.79628560875001e-08,
+      "loss": 0.6636,
+      "step": 71770
+    },
+    {
+      "epoch": 2.8388933931855487,
+      "grad_norm": 1.3706291895479226,
+      "learning_rate": 8.753366517941841e-08,
+      "loss": 0.6669,
+      "step": 71780
+    },
+    {
+      "epoch": 2.8392888924044373,
+      "grad_norm": 1.3035873133951499,
+      "learning_rate": 8.710551464754414e-08,
+      "loss": 0.6406,
+      "step": 71790
+    },
+    {
+      "epoch": 2.8396843916233268,
+      "grad_norm": 1.3825212769679238,
+      "learning_rate": 8.667840458255305e-08,
+      "loss": 0.6315,
+      "step": 71800
+    },
+    {
+      "epoch": 2.8400798908422153,
+      "grad_norm": 1.3982388632872584,
+      "learning_rate": 8.625233507490005e-08,
+      "loss": 0.632,
+      "step": 71810
+    },
+    {
+      "epoch": 2.840475390061105,
+      "grad_norm": 1.5781047096447292,
+      "learning_rate": 8.58273062148196e-08,
+      "loss": 0.6341,
+      "step": 71820
+    },
+    {
+      "epoch": 2.8408708892799934,
+      "grad_norm": 1.5441019233512676,
+      "learning_rate": 8.540331809232471e-08,
+      "loss": 0.6257,
+      "step": 71830
+    },
+    {
+      "epoch": 2.841266388498883,
+      "grad_norm": 1.5054411731881552,
+      "learning_rate": 8.498037079720966e-08,
+      "loss": 0.6513,
+      "step": 71840
+    },
+    {
+      "epoch": 2.8416618877177715,
+      "grad_norm": 1.4815673703969714,
+      "learning_rate": 8.455846441904669e-08,
+      "loss": 0.652,
+      "step": 71850
+    },
+    {
+      "epoch": 2.842057386936661,
+      "grad_norm": 1.503475398636474,
+      "learning_rate": 8.413759904718877e-08,
+      "loss": 0.6467,
+      "step": 71860
+    },
+    {
+      "epoch": 2.8424528861555496,
+      "grad_norm": 2.067684370145825,
+      "learning_rate": 8.371777477076792e-08,
+      "loss": 0.608,
+      "step": 71870
+    },
+    {
+      "epoch": 2.842848385374439,
+      "grad_norm": 1.387306214458457,
+      "learning_rate": 8.329899167869582e-08,
+      "loss": 0.6557,
+      "step": 71880
+    },
+    {
+      "epoch": 2.8432438845933277,
+      "grad_norm": 1.3374542335648076,
+      "learning_rate": 8.288124985966262e-08,
+      "loss": 0.6602,
+      "step": 71890
+    },
+    {
+      "epoch": 2.843639383812217,
+      "grad_norm": 1.478595604111618,
+      "learning_rate": 8.246454940214143e-08,
+      "loss": 0.6679,
+      "step": 71900
+    },
+    {
+      "epoch": 2.8440348830311057,
+      "grad_norm": 1.437195358675303,
+      "learning_rate": 8.20488903943789e-08,
+      "loss": 0.6459,
+      "step": 71910
+    },
+    {
+      "epoch": 2.8444303822499952,
+      "grad_norm": 1.5705743870705775,
+      "learning_rate": 8.163427292440685e-08,
+      "loss": 0.6148,
+      "step": 71920
+    },
+    {
+      "epoch": 2.844825881468884,
+      "grad_norm": 1.2780124466918952,
+      "learning_rate": 8.122069708003333e-08,
+      "loss": 0.6674,
+      "step": 71930
+    },
+    {
+      "epoch": 2.8452213806877733,
+      "grad_norm": 1.4710905571876607,
+      "learning_rate": 8.080816294884664e-08,
+      "loss": 0.6189,
+      "step": 71940
+    },
+    {
+      "epoch": 2.8456168799066623,
+      "grad_norm": 1.3484872251320974,
+      "learning_rate": 8.03966706182141e-08,
+      "loss": 0.6511,
+      "step": 71950
+    },
+    {
+      "epoch": 2.8460123791255514,
+      "grad_norm": 1.5038879828895375,
+      "learning_rate": 7.99862201752838e-08,
+      "loss": 0.6491,
+      "step": 71960
+    },
+    {
+      "epoch": 2.8464078783444404,
+      "grad_norm": 1.2677884295346622,
+      "learning_rate": 7.957681170698117e-08,
+      "loss": 0.6405,
+      "step": 71970
+    },
+    {
+      "epoch": 2.8468033775633295,
+      "grad_norm": 1.8674788523434265,
+      "learning_rate": 7.916844530001244e-08,
+      "loss": 0.6139,
+      "step": 71980
+    },
+    {
+      "epoch": 2.8471988767822185,
+      "grad_norm": 1.2824188682306423,
+      "learning_rate": 7.876112104086231e-08,
+      "loss": 0.6638,
+      "step": 71990
+    },
+    {
+      "epoch": 2.8475943760011075,
+      "grad_norm": 1.6401352971899505,
+      "learning_rate": 7.835483901579454e-08,
+      "loss": 0.6653,
+      "step": 72000
+    },
+    {
+      "epoch": 2.8479898752199966,
+      "grad_norm": 1.3961740398017697,
+      "learning_rate": 7.794959931085422e-08,
+      "loss": 0.6649,
+      "step": 72010
+    },
+    {
+      "epoch": 2.8483853744388856,
+      "grad_norm": 1.4114497198115452,
+      "learning_rate": 7.754540201186267e-08,
+      "loss": 0.6724,
+      "step": 72020
+    },
+    {
+      "epoch": 2.8487808736577747,
+      "grad_norm": 1.4666726890426882,
+      "learning_rate": 7.714224720442309e-08,
+      "loss": 0.614,
+      "step": 72030
+    },
+    {
+      "epoch": 2.8491763728766637,
+      "grad_norm": 1.5083421564719712,
+      "learning_rate": 7.674013497391553e-08,
+      "loss": 0.6245,
+      "step": 72040
+    },
+    {
+      "epoch": 2.8495718720955527,
+      "grad_norm": 1.3893655027117535,
+      "learning_rate": 7.633906540550185e-08,
+      "loss": 0.6518,
+      "step": 72050
+    },
+    {
+      "epoch": 2.8499673713144418,
+      "grad_norm": 1.1982052669857983,
+      "learning_rate": 7.593903858412022e-08,
+      "loss": 0.6671,
+      "step": 72060
+    },
+    {
+      "epoch": 2.850362870533331,
+      "grad_norm": 1.5550177486475851,
+      "learning_rate": 7.554005459449065e-08,
+      "loss": 0.6359,
+      "step": 72070
+    },
+    {
+      "epoch": 2.85075836975222,
+      "grad_norm": 1.4845948876461585,
+      "learning_rate": 7.514211352111056e-08,
+      "loss": 0.6421,
+      "step": 72080
+    },
+    {
+      "epoch": 2.851153868971109,
+      "grad_norm": 1.6801664990383498,
+      "learning_rate": 7.474521544825752e-08,
+      "loss": 0.6371,
+      "step": 72090
+    },
+    {
+      "epoch": 2.851549368189998,
+      "grad_norm": 1.428534336493384,
+      "learning_rate": 7.434936045998764e-08,
+      "loss": 0.6511,
+      "step": 72100
+    },
+    {
+      "epoch": 2.851944867408887,
+      "grad_norm": 1.5162142873336415,
+      "learning_rate": 7.395454864013552e-08,
+      "loss": 0.6463,
+      "step": 72110
+    },
+    {
+      "epoch": 2.852340366627776,
+      "grad_norm": 1.4202078216114058,
+      "learning_rate": 7.356078007231649e-08,
+      "loss": 0.6358,
+      "step": 72120
+    },
+    {
+      "epoch": 2.852735865846665,
+      "grad_norm": 1.6415014379473447,
+      "learning_rate": 7.316805483992329e-08,
+      "loss": 0.6299,
+      "step": 72130
+    },
+    {
+      "epoch": 2.853131365065554,
+      "grad_norm": 1.5678912756714454,
+      "learning_rate": 7.277637302612883e-08,
+      "loss": 0.6517,
+      "step": 72140
+    },
+    {
+      "epoch": 2.853526864284443,
+      "grad_norm": 1.3899347976952365,
+      "learning_rate": 7.238573471388455e-08,
+      "loss": 0.6358,
+      "step": 72150
+    },
+    {
+      "epoch": 2.853922363503332,
+      "grad_norm": 1.3111640823340012,
+      "learning_rate": 7.199613998592036e-08,
+      "loss": 0.6414,
+      "step": 72160
+    },
+    {
+      "epoch": 2.854317862722221,
+      "grad_norm": 1.229488001937905,
+      "learning_rate": 7.160758892474695e-08,
+      "loss": 0.6269,
+      "step": 72170
+    },
+    {
+      "epoch": 2.8547133619411103,
+      "grad_norm": 1.2757646388419683,
+      "learning_rate": 7.122008161265126e-08,
+      "loss": 0.6732,
+      "step": 72180
+    },
+    {
+      "epoch": 2.8551088611599993,
+      "grad_norm": 1.6348302350276103,
+      "learning_rate": 7.083361813170208e-08,
+      "loss": 0.6245,
+      "step": 72190
+    },
+    {
+      "epoch": 2.8555043603788883,
+      "grad_norm": 1.4705692079303059,
+      "learning_rate": 7.044819856374507e-08,
+      "loss": 0.6365,
+      "step": 72200
+    },
+    {
+      "epoch": 2.8558998595977774,
+      "grad_norm": 1.7227328874389856,
+      "learning_rate": 7.006382299040493e-08,
+      "loss": 0.6529,
+      "step": 72210
+    },
+    {
+      "epoch": 2.8562953588166664,
+      "grad_norm": 1.3573578734233607,
+      "learning_rate": 6.968049149308708e-08,
+      "loss": 0.6801,
+      "step": 72220
+    },
+    {
+      "epoch": 2.8566908580355554,
+      "grad_norm": 1.399463356219504,
+      "learning_rate": 6.929820415297383e-08,
+      "loss": 0.6639,
+      "step": 72230
+    },
+    {
+      "epoch": 2.8570863572544445,
+      "grad_norm": 1.658952308953823,
+      "learning_rate": 6.891696105102763e-08,
+      "loss": 0.6187,
+      "step": 72240
+    },
+    {
+      "epoch": 2.8574818564733335,
+      "grad_norm": 1.5435937784319809,
+      "learning_rate": 6.853676226798777e-08,
+      "loss": 0.6259,
+      "step": 72250
+    },
+    {
+      "epoch": 2.8578773556922226,
+      "grad_norm": 1.4117160225612395,
+      "learning_rate": 6.815760788437598e-08,
+      "loss": 0.6024,
+      "step": 72260
+    },
+    {
+      "epoch": 2.8582728549111116,
+      "grad_norm": 1.7455187494468822,
+      "learning_rate": 6.777949798048966e-08,
+      "loss": 0.6476,
+      "step": 72270
+    },
+    {
+      "epoch": 2.8586683541300006,
+      "grad_norm": 1.4072155571715825,
+      "learning_rate": 6.740243263640533e-08,
+      "loss": 0.6264,
+      "step": 72280
+    },
+    {
+      "epoch": 2.8590638533488897,
+      "grad_norm": 1.759758970873472,
+      "learning_rate": 6.702641193197967e-08,
+      "loss": 0.6415,
+      "step": 72290
+    },
+    {
+      "epoch": 2.8594593525677787,
+      "grad_norm": 1.700861968911209,
+      "learning_rate": 6.66514359468473e-08,
+      "loss": 0.6276,
+      "step": 72300
+    },
+    {
+      "epoch": 2.8598548517866678,
+      "grad_norm": 1.3422475633848485,
+      "learning_rate": 6.627750476042139e-08,
+      "loss": 0.6677,
+      "step": 72310
+    },
+    {
+      "epoch": 2.860250351005557,
+      "grad_norm": 1.3182407408161956,
+      "learning_rate": 6.590461845189522e-08,
+      "loss": 0.6515,
+      "step": 72320
+    },
+    {
+      "epoch": 2.860645850224446,
+      "grad_norm": 1.4806199732936745,
+      "learning_rate": 6.553277710023842e-08,
+      "loss": 0.651,
+      "step": 72330
+    },
+    {
+      "epoch": 2.861041349443335,
+      "grad_norm": 1.3502937366604248,
+      "learning_rate": 6.516198078420189e-08,
+      "loss": 0.6437,
+      "step": 72340
+    },
+    {
+      "epoch": 2.861436848662224,
+      "grad_norm": 1.8530057237432134,
+      "learning_rate": 6.47922295823128e-08,
+      "loss": 0.6428,
+      "step": 72350
+    },
+    {
+      "epoch": 2.861832347881113,
+      "grad_norm": 1.2879955958524398,
+      "learning_rate": 6.442352357287852e-08,
+      "loss": 0.6334,
+      "step": 72360
+    },
+    {
+      "epoch": 2.862227847100002,
+      "grad_norm": 1.4072641193957272,
+      "learning_rate": 6.405586283398491e-08,
+      "loss": 0.6896,
+      "step": 72370
+    },
+    {
+      "epoch": 2.862623346318891,
+      "grad_norm": 1.4983629491296078,
+      "learning_rate": 6.36892474434958e-08,
+      "loss": 0.646,
+      "step": 72380
+    },
+    {
+      "epoch": 2.86301884553778,
+      "grad_norm": 1.4326776250624267,
+      "learning_rate": 6.332367747905466e-08,
+      "loss": 0.6906,
+      "step": 72390
+    },
+    {
+      "epoch": 2.863414344756669,
+      "grad_norm": 1.3635475141215665,
+      "learning_rate": 6.295915301808231e-08,
+      "loss": 0.6876,
+      "step": 72400
+    },
+    {
+      "epoch": 2.863809843975558,
+      "grad_norm": 1.2200244161353813,
+      "learning_rate": 6.259567413777868e-08,
+      "loss": 0.6347,
+      "step": 72410
+    },
+    {
+      "epoch": 2.864205343194447,
+      "grad_norm": 1.3552047131982294,
+      "learning_rate": 6.223324091512329e-08,
+      "loss": 0.6497,
+      "step": 72420
+    },
+    {
+      "epoch": 2.8646008424133362,
+      "grad_norm": 1.5375161371950397,
+      "learning_rate": 6.187185342687252e-08,
+      "loss": 0.6285,
+      "step": 72430
+    },
+    {
+      "epoch": 2.8649963416322253,
+      "grad_norm": 1.249655970444738,
+      "learning_rate": 6.151151174956182e-08,
+      "loss": 0.6421,
+      "step": 72440
+    },
+    {
+      "epoch": 2.8653918408511143,
+      "grad_norm": 1.262237077238412,
+      "learning_rate": 6.115221595950682e-08,
+      "loss": 0.6164,
+      "step": 72450
+    },
+    {
+      "epoch": 2.8657873400700034,
+      "grad_norm": 1.203439467062273,
+      "learning_rate": 6.07939661327983e-08,
+      "loss": 0.643,
+      "step": 72460
+    },
+    {
+      "epoch": 2.8661828392888924,
+      "grad_norm": 1.417418223036004,
+      "learning_rate": 6.043676234530837e-08,
+      "loss": 0.6226,
+      "step": 72470
+    },
+    {
+      "epoch": 2.8665783385077814,
+      "grad_norm": 1.5740146543409002,
+      "learning_rate": 6.008060467268706e-08,
+      "loss": 0.6523,
+      "step": 72480
+    },
+    {
+      "epoch": 2.8669738377266705,
+      "grad_norm": 1.349582716418027,
+      "learning_rate": 5.97254931903618e-08,
+      "loss": 0.6938,
+      "step": 72490
+    },
+    {
+      "epoch": 2.8673693369455595,
+      "grad_norm": 1.634075965660607,
+      "learning_rate": 5.937142797353912e-08,
+      "loss": 0.6527,
+      "step": 72500
+    },
+    {
+      "epoch": 2.8677648361644485,
+      "grad_norm": 1.3807830694126326,
+      "learning_rate": 5.901840909720513e-08,
+      "loss": 0.6313,
+      "step": 72510
+    },
+    {
+      "epoch": 2.8681603353833376,
+      "grad_norm": 1.7106302140460086,
+      "learning_rate": 5.86664366361217e-08,
+      "loss": 0.625,
+      "step": 72520
+    },
+    {
+      "epoch": 2.8685558346022266,
+      "grad_norm": 1.5619605048497767,
+      "learning_rate": 5.831551066483088e-08,
+      "loss": 0.6364,
+      "step": 72530
+    },
+    {
+      "epoch": 2.8689513338211157,
+      "grad_norm": 1.421725802970187,
+      "learning_rate": 5.79656312576532e-08,
+      "loss": 0.619,
+      "step": 72540
+    },
+    {
+      "epoch": 2.8693468330400047,
+      "grad_norm": 1.2069242650206728,
+      "learning_rate": 5.7616798488687176e-08,
+      "loss": 0.6526,
+      "step": 72550
+    },
+    {
+      "epoch": 2.8697423322588937,
+      "grad_norm": 1.7169527039551473,
+      "learning_rate": 5.726901243180871e-08,
+      "loss": 0.6472,
+      "step": 72560
+    },
+    {
+      "epoch": 2.870137831477783,
+      "grad_norm": 1.480201705865561,
+      "learning_rate": 5.6922273160674444e-08,
+      "loss": 0.6177,
+      "step": 72570
+    },
+    {
+      "epoch": 2.870533330696672,
+      "grad_norm": 1.4752908902225705,
+      "learning_rate": 5.6576580748716195e-08,
+      "loss": 0.665,
+      "step": 72580
+    },
+    {
+      "epoch": 2.870928829915561,
+      "grad_norm": 1.5083567174791324,
+      "learning_rate": 5.6231935269147054e-08,
+      "loss": 0.6305,
+      "step": 72590
+    },
+    {
+      "epoch": 2.87132432913445,
+      "grad_norm": 1.561256121729682,
+      "learning_rate": 5.588833679495642e-08,
+      "loss": 0.643,
+      "step": 72600
+    },
+    {
+      "epoch": 2.871719828353339,
+      "grad_norm": 1.610856633861929,
+      "learning_rate": 5.5545785398912755e-08,
+      "loss": 0.6547,
+      "step": 72610
+    },
+    {
+      "epoch": 2.872115327572228,
+      "grad_norm": 1.4777326951322076,
+      "learning_rate": 5.5204281153561914e-08,
+      "loss": 0.6531,
+      "step": 72620
+    },
+    {
+      "epoch": 2.872510826791117,
+      "grad_norm": 1.35165347190245,
+      "learning_rate": 5.486382413122937e-08,
+      "loss": 0.6747,
+      "step": 72630
+    },
+    {
+      "epoch": 2.872906326010006,
+      "grad_norm": 1.465090142971119,
+      "learning_rate": 5.452441440401801e-08,
+      "loss": 0.6729,
+      "step": 72640
+    },
+    {
+      "epoch": 2.873301825228895,
+      "grad_norm": 1.2597268928551326,
+      "learning_rate": 5.4186052043808666e-08,
+      "loss": 0.6575,
+      "step": 72650
+    },
+    {
+      "epoch": 2.873697324447784,
+      "grad_norm": 1.5184075743379273,
+      "learning_rate": 5.384873712226124e-08,
+      "loss": 0.6589,
+      "step": 72660
+    },
+    {
+      "epoch": 2.874092823666673,
+      "grad_norm": 1.6122070085517288,
+      "learning_rate": 5.351246971081248e-08,
+      "loss": 0.6642,
+      "step": 72670
+    },
+    {
+      "epoch": 2.874488322885562,
+      "grad_norm": 1.718819098683133,
+      "learning_rate": 5.317724988067874e-08,
+      "loss": 0.6195,
+      "step": 72680
+    },
+    {
+      "epoch": 2.8748838221044513,
+      "grad_norm": 1.4102826738237104,
+      "learning_rate": 5.284307770285324e-08,
+      "loss": 0.665,
+      "step": 72690
+    },
+    {
+      "epoch": 2.8752793213233403,
+      "grad_norm": 1.4836432098153078,
+      "learning_rate": 5.250995324810826e-08,
+      "loss": 0.6561,
+      "step": 72700
+    },
+    {
+      "epoch": 2.8756748205422293,
+      "grad_norm": 1.210518337484923,
+      "learning_rate": 5.2177876586994024e-08,
+      "loss": 0.6596,
+      "step": 72710
+    },
+    {
+      "epoch": 2.8760703197611184,
+      "grad_norm": 1.4309615217082419,
+      "learning_rate": 5.184684778983873e-08,
+      "loss": 0.6389,
+      "step": 72720
+    },
+    {
+      "epoch": 2.8764658189800074,
+      "grad_norm": 1.531355736173954,
+      "learning_rate": 5.1516866926747953e-08,
+      "loss": 0.6329,
+      "step": 72730
+    },
+    {
+      "epoch": 2.8768613181988965,
+      "grad_norm": 1.4103575427945292,
+      "learning_rate": 5.118793406760636e-08,
+      "loss": 0.6832,
+      "step": 72740
+    },
+    {
+      "epoch": 2.8772568174177855,
+      "grad_norm": 1.4535690283014577,
+      "learning_rate": 5.086004928207655e-08,
+      "loss": 0.6613,
+      "step": 72750
+    },
+    {
+      "epoch": 2.8776523166366745,
+      "grad_norm": 1.4005578818184357,
+      "learning_rate": 5.053321263959909e-08,
+      "loss": 0.6669,
+      "step": 72760
+    },
+    {
+      "epoch": 2.8780478158555636,
+      "grad_norm": 1.6556440501689134,
+      "learning_rate": 5.02074242093914e-08,
+      "loss": 0.6374,
+      "step": 72770
+    },
+    {
+      "epoch": 2.8784433150744526,
+      "grad_norm": 1.6805707688486475,
+      "learning_rate": 4.988268406045105e-08,
+      "loss": 0.6659,
+      "step": 72780
+    },
+    {
+      "epoch": 2.8788388142933417,
+      "grad_norm": 1.2452304115421555,
+      "learning_rate": 4.9558992261551364e-08,
+      "loss": 0.6601,
+      "step": 72790
+    },
+    {
+      "epoch": 2.8792343135122307,
+      "grad_norm": 1.4993082581900317,
+      "learning_rate": 4.9236348881245286e-08,
+      "loss": 0.6566,
+      "step": 72800
+    },
+    {
+      "epoch": 2.8796298127311197,
+      "grad_norm": 1.5226098101439518,
+      "learning_rate": 4.891475398786316e-08,
+      "loss": 0.6371,
+      "step": 72810
+    },
+    {
+      "epoch": 2.8800253119500088,
+      "grad_norm": 1.255480487223256,
+      "learning_rate": 4.859420764951328e-08,
+      "loss": 0.6698,
+      "step": 72820
+    },
+    {
+      "epoch": 2.880420811168898,
+      "grad_norm": 1.3476733314233706,
+      "learning_rate": 4.827470993408134e-08,
+      "loss": 0.6538,
+      "step": 72830
+    },
+    {
+      "epoch": 2.880816310387787,
+      "grad_norm": 1.4669120556339963,
+      "learning_rate": 4.795626090923267e-08,
+      "loss": 0.6531,
+      "step": 72840
+    },
+    {
+      "epoch": 2.881211809606676,
+      "grad_norm": 1.2327257736566601,
+      "learning_rate": 4.7638860642408305e-08,
+      "loss": 0.6445,
+      "step": 72850
+    },
+    {
+      "epoch": 2.881607308825565,
+      "grad_norm": 1.43284919010261,
+      "learning_rate": 4.732250920082837e-08,
+      "loss": 0.6496,
+      "step": 72860
+    },
+    {
+      "epoch": 2.882002808044454,
+      "grad_norm": 1.678470475015289,
+      "learning_rate": 4.700720665149094e-08,
+      "loss": 0.6274,
+      "step": 72870
+    },
+    {
+      "epoch": 2.882398307263343,
+      "grad_norm": 1.3212522110082225,
+      "learning_rate": 4.66929530611715e-08,
+      "loss": 0.6435,
+      "step": 72880
+    },
+    {
+      "epoch": 2.882793806482232,
+      "grad_norm": 1.5237024323125776,
+      "learning_rate": 4.637974849642346e-08,
+      "loss": 0.6203,
+      "step": 72890
+    },
+    {
+      "epoch": 2.883189305701121,
+      "grad_norm": 1.3369753703052774,
+      "learning_rate": 4.606759302357822e-08,
+      "loss": 0.6707,
+      "step": 72900
+    },
+    {
+      "epoch": 2.88358480492001,
+      "grad_norm": 1.487196996409093,
+      "learning_rate": 4.575648670874566e-08,
+      "loss": 0.6162,
+      "step": 72910
+    },
+    {
+      "epoch": 2.883980304138899,
+      "grad_norm": 1.652736144974241,
+      "learning_rate": 4.544642961781143e-08,
+      "loss": 0.6396,
+      "step": 72920
+    },
+    {
+      "epoch": 2.884375803357788,
+      "grad_norm": 1.3567343586098006,
+      "learning_rate": 4.513742181644187e-08,
+      "loss": 0.6671,
+      "step": 72930
+    },
+    {
+      "epoch": 2.8847713025766772,
+      "grad_norm": 1.4751191129610746,
+      "learning_rate": 4.482946337007799e-08,
+      "loss": 0.6282,
+      "step": 72940
+    },
+    {
+      "epoch": 2.8851668017955667,
+      "grad_norm": 1.450479351550353,
+      "learning_rate": 4.452255434394093e-08,
+      "loss": 0.6393,
+      "step": 72950
+    },
+    {
+      "epoch": 2.8855623010144553,
+      "grad_norm": 1.5609670866457233,
+      "learning_rate": 4.4216694803028705e-08,
+      "loss": 0.6555,
+      "step": 72960
+    },
+    {
+      "epoch": 2.885957800233345,
+      "grad_norm": 1.3885795081012786,
+      "learning_rate": 4.3911884812117276e-08,
+      "loss": 0.6385,
+      "step": 72970
+    },
+    {
+      "epoch": 2.8863532994522334,
+      "grad_norm": 1.4323153141002634,
+      "learning_rate": 4.360812443576001e-08,
+      "loss": 0.6705,
+      "step": 72980
+    },
+    {
+      "epoch": 2.886748798671123,
+      "grad_norm": 1.5802102158166254,
+      "learning_rate": 4.330541373828823e-08,
+      "loss": 0.6549,
+      "step": 72990
+    },
+    {
+      "epoch": 2.8871442978900115,
+      "grad_norm": 1.5173291056679128,
+      "learning_rate": 4.300375278381064e-08,
+      "loss": 0.6382,
+      "step": 73000
+    },
+    {
+      "epoch": 2.887539797108901,
+      "grad_norm": 1.4228779557593552,
+      "learning_rate": 4.270314163621447e-08,
+      "loss": 0.6388,
+      "step": 73010
+    },
+    {
+      "epoch": 2.8879352963277896,
+      "grad_norm": 1.2394541547630626,
+      "learning_rate": 4.24035803591627e-08,
+      "loss": 0.6147,
+      "step": 73020
+    },
+    {
+      "epoch": 2.888330795546679,
+      "grad_norm": 1.158935180851554,
+      "learning_rate": 4.210506901609901e-08,
+      "loss": 0.6614,
+      "step": 73030
+    },
+    {
+      "epoch": 2.8887262947655676,
+      "grad_norm": 1.3394673174843224,
+      "learning_rate": 4.180760767024117e-08,
+      "loss": 0.6339,
+      "step": 73040
+    },
+    {
+      "epoch": 2.889121793984457,
+      "grad_norm": 1.4991891227655139,
+      "learning_rate": 4.1511196384588226e-08,
+      "loss": 0.6588,
+      "step": 73050
+    },
+    {
+      "epoch": 2.8895172932033457,
+      "grad_norm": 1.5894360741224016,
+      "learning_rate": 4.12158352219133e-08,
+      "loss": 0.6315,
+      "step": 73060
+    },
+    {
+      "epoch": 2.889912792422235,
+      "grad_norm": 1.2212752434595693,
+      "learning_rate": 4.092152424477025e-08,
+      "loss": 0.6372,
+      "step": 73070
+    },
+    {
+      "epoch": 2.890308291641124,
+      "grad_norm": 1.5621856230101503,
+      "learning_rate": 4.0628263515488654e-08,
+      "loss": 0.6373,
+      "step": 73080
+    },
+    {
+      "epoch": 2.8907037908600133,
+      "grad_norm": 1.6804074142,
+      "learning_rate": 4.0336053096174945e-08,
+      "loss": 0.6308,
+      "step": 73090
+    },
+    {
+      "epoch": 2.891099290078902,
+      "grad_norm": 1.4844377693483426,
+      "learning_rate": 4.004489304871628e-08,
+      "loss": 0.6456,
+      "step": 73100
+    },
+    {
+      "epoch": 2.8914947892977914,
+      "grad_norm": 1.292577091658977,
+      "learning_rate": 3.97547834347739e-08,
+      "loss": 0.6533,
+      "step": 73110
+    },
+    {
+      "epoch": 2.89189028851668,
+      "grad_norm": 1.2870549378420648,
+      "learning_rate": 3.9465724315788655e-08,
+      "loss": 0.6776,
+      "step": 73120
+    },
+    {
+      "epoch": 2.8922857877355694,
+      "grad_norm": 1.5082532434548177,
+      "learning_rate": 3.9177715752978244e-08,
+      "loss": 0.6253,
+      "step": 73130
+    },
+    {
+      "epoch": 2.892681286954458,
+      "grad_norm": 1.7460971315415368,
+      "learning_rate": 3.889075780733831e-08,
+      "loss": 0.5943,
+      "step": 73140
+    },
+    {
+      "epoch": 2.8930767861733475,
+      "grad_norm": 1.3485375973865619,
+      "learning_rate": 3.860485053964025e-08,
+      "loss": 0.6708,
+      "step": 73150
+    },
+    {
+      "epoch": 2.893472285392236,
+      "grad_norm": 1.4099137017458634,
+      "learning_rate": 3.831999401043618e-08,
+      "loss": 0.6338,
+      "step": 73160
+    },
+    {
+      "epoch": 2.8938677846111256,
+      "grad_norm": 1.4691497602730943,
+      "learning_rate": 3.803618828005229e-08,
+      "loss": 0.6788,
+      "step": 73170
+    },
+    {
+      "epoch": 2.894263283830014,
+      "grad_norm": 1.4353341115967935,
+      "learning_rate": 3.775343340859494e-08,
+      "loss": 0.6394,
+      "step": 73180
+    },
+    {
+      "epoch": 2.8946587830489037,
+      "grad_norm": 1.3132876924754062,
+      "learning_rate": 3.747172945594568e-08,
+      "loss": 0.6471,
+      "step": 73190
+    },
+    {
+      "epoch": 2.8950542822677923,
+      "grad_norm": 1.4791646400255996,
+      "learning_rate": 3.7191076481765675e-08,
+      "loss": 0.6392,
+      "step": 73200
+    },
+    {
+      "epoch": 2.8954497814866818,
+      "grad_norm": 1.4725139616173228,
+      "learning_rate": 3.691147454549127e-08,
+      "loss": 0.5941,
+      "step": 73210
+    },
+    {
+      "epoch": 2.8958452807055703,
+      "grad_norm": 1.491512605359283,
+      "learning_rate": 3.663292370633842e-08,
+      "loss": 0.6715,
+      "step": 73220
+    },
+    {
+      "epoch": 2.89624077992446,
+      "grad_norm": 1.4748372298308459,
+      "learning_rate": 3.635542402329883e-08,
+      "loss": 0.6519,
+      "step": 73230
+    },
+    {
+      "epoch": 2.8966362791433484,
+      "grad_norm": 1.6510368319464077,
+      "learning_rate": 3.6078975555142724e-08,
+      "loss": 0.6286,
+      "step": 73240
+    },
+    {
+      "epoch": 2.897031778362238,
+      "grad_norm": 1.3607040530014676,
+      "learning_rate": 3.5803578360416594e-08,
+      "loss": 0.6335,
+      "step": 73250
+    },
+    {
+      "epoch": 2.8974272775811265,
+      "grad_norm": 1.4017562980589908,
+      "learning_rate": 3.552923249744489e-08,
+      "loss": 0.6145,
+      "step": 73260
+    },
+    {
+      "epoch": 2.897822776800016,
+      "grad_norm": 1.3808757950575428,
+      "learning_rate": 3.5255938024329475e-08,
+      "loss": 0.6228,
+      "step": 73270
+    },
+    {
+      "epoch": 2.898218276018905,
+      "grad_norm": 1.1678762392477666,
+      "learning_rate": 3.498369499894905e-08,
+      "loss": 0.634,
+      "step": 73280
+    },
+    {
+      "epoch": 2.898613775237794,
+      "grad_norm": 1.4261898522473708,
+      "learning_rate": 3.4712503478960827e-08,
+      "loss": 0.6502,
+      "step": 73290
+    },
+    {
+      "epoch": 2.899009274456683,
+      "grad_norm": 1.4858853493301873,
+      "learning_rate": 3.444236352179831e-08,
+      "loss": 0.6359,
+      "step": 73300
+    },
+    {
+      "epoch": 2.899404773675572,
+      "grad_norm": 1.3449930880817293,
+      "learning_rate": 3.4173275184672396e-08,
+      "loss": 0.6295,
+      "step": 73310
+    },
+    {
+      "epoch": 2.899800272894461,
+      "grad_norm": 1.6060015811468955,
+      "learning_rate": 3.390523852457084e-08,
+      "loss": 0.5993,
+      "step": 73320
+    },
+    {
+      "epoch": 2.9001957721133502,
+      "grad_norm": 1.2430674200860625,
+      "learning_rate": 3.36382535982599e-08,
+      "loss": 0.6261,
+      "step": 73330
+    },
+    {
+      "epoch": 2.9005912713322393,
+      "grad_norm": 1.4579801675869906,
+      "learning_rate": 3.337232046228211e-08,
+      "loss": 0.65,
+      "step": 73340
+    },
+    {
+      "epoch": 2.9009867705511283,
+      "grad_norm": 1.3857771060602695,
+      "learning_rate": 3.3107439172958e-08,
+      "loss": 0.6449,
+      "step": 73350
+    },
+    {
+      "epoch": 2.9013822697700173,
+      "grad_norm": 1.5374007586473863,
+      "learning_rate": 3.284360978638379e-08,
+      "loss": 0.6731,
+      "step": 73360
+    },
+    {
+      "epoch": 2.9017777689889064,
+      "grad_norm": 1.3020068730036634,
+      "learning_rate": 3.25808323584359e-08,
+      "loss": 0.6775,
+      "step": 73370
+    },
+    {
+      "epoch": 2.9021732682077954,
+      "grad_norm": 1.4269098238545106,
+      "learning_rate": 3.23191069447637e-08,
+      "loss": 0.6328,
+      "step": 73380
+    },
+    {
+      "epoch": 2.9025687674266845,
+      "grad_norm": 1.6622459834457164,
+      "learning_rate": 3.205843360079841e-08,
+      "loss": 0.6536,
+      "step": 73390
+    },
+    {
+      "epoch": 2.9029642666455735,
+      "grad_norm": 1.7766758127097282,
+      "learning_rate": 3.179881238174476e-08,
+      "loss": 0.644,
+      "step": 73400
+    },
+    {
+      "epoch": 2.9033597658644625,
+      "grad_norm": 1.5757928447311629,
+      "learning_rate": 3.154024334258654e-08,
+      "loss": 0.6316,
+      "step": 73410
+    },
+    {
+      "epoch": 2.9037552650833516,
+      "grad_norm": 1.3925146576999683,
+      "learning_rate": 3.128272653808384e-08,
+      "loss": 0.6447,
+      "step": 73420
+    },
+    {
+      "epoch": 2.9041507643022406,
+      "grad_norm": 1.7114550606358028,
+      "learning_rate": 3.10262620227747e-08,
+      "loss": 0.6464,
+      "step": 73430
+    },
+    {
+      "epoch": 2.9045462635211297,
+      "grad_norm": 1.3473978752412272,
+      "learning_rate": 3.0770849850974006e-08,
+      "loss": 0.6417,
+      "step": 73440
+    },
+    {
+      "epoch": 2.9049417627400187,
+      "grad_norm": 1.2790154087281085,
+      "learning_rate": 3.051649007677404e-08,
+      "loss": 0.6779,
+      "step": 73450
+    },
+    {
+      "epoch": 2.9053372619589077,
+      "grad_norm": 1.364313043353785,
+      "learning_rate": 3.026318275404283e-08,
+      "loss": 0.6609,
+      "step": 73460
+    },
+    {
+      "epoch": 2.9057327611777968,
+      "grad_norm": 1.9577798360631313,
+      "learning_rate": 3.00109279364269e-08,
+      "loss": 0.6426,
+      "step": 73470
+    },
+    {
+      "epoch": 2.906128260396686,
+      "grad_norm": 1.2865122416197172,
+      "learning_rate": 2.9759725677349638e-08,
+      "loss": 0.6513,
+      "step": 73480
+    },
+    {
+      "epoch": 2.906523759615575,
+      "grad_norm": 1.4803393798680207,
+      "learning_rate": 2.9509576030012367e-08,
+      "loss": 0.635,
+      "step": 73490
+    },
+    {
+      "epoch": 2.906919258834464,
+      "grad_norm": 1.5203096024610656,
+      "learning_rate": 2.926047904739049e-08,
+      "loss": 0.6062,
+      "step": 73500
+    },
+    {
+      "epoch": 2.907314758053353,
+      "grad_norm": 1.6943166581873972,
+      "learning_rate": 2.9012434782239584e-08,
+      "loss": 0.6594,
+      "step": 73510
+    },
+    {
+      "epoch": 2.907710257272242,
+      "grad_norm": 1.2709935165090178,
+      "learning_rate": 2.8765443287091523e-08,
+      "loss": 0.663,
+      "step": 73520
+    },
+    {
+      "epoch": 2.908105756491131,
+      "grad_norm": 1.5260170312306132,
+      "learning_rate": 2.85195046142539e-08,
+      "loss": 0.6576,
+      "step": 73530
+    },
+    {
+      "epoch": 2.90850125571002,
+      "grad_norm": 1.5330157772928157,
+      "learning_rate": 2.827461881581339e-08,
+      "loss": 0.6364,
+      "step": 73540
+    },
+    {
+      "epoch": 2.908896754928909,
+      "grad_norm": 1.9527984201422075,
+      "learning_rate": 2.8030785943631843e-08,
+      "loss": 0.6421,
+      "step": 73550
+    },
+    {
+      "epoch": 2.909292254147798,
+      "grad_norm": 1.5179959299978178,
+      "learning_rate": 2.7788006049349058e-08,
+      "loss": 0.6559,
+      "step": 73560
+    },
+    {
+      "epoch": 2.909687753366687,
+      "grad_norm": 1.3658604850621558,
+      "learning_rate": 2.7546279184381686e-08,
+      "loss": 0.6378,
+      "step": 73570
+    },
+    {
+      "epoch": 2.910083252585576,
+      "grad_norm": 1.5388832562587558,
+      "learning_rate": 2.7305605399923773e-08,
+      "loss": 0.6458,
+      "step": 73580
+    },
+    {
+      "epoch": 2.9104787518044652,
+      "grad_norm": 1.6183400670321177,
+      "learning_rate": 2.706598474694455e-08,
+      "loss": 0.6413,
+      "step": 73590
+    },
+    {
+      "epoch": 2.9108742510233543,
+      "grad_norm": 1.5524208145611658,
+      "learning_rate": 2.6827417276193423e-08,
+      "loss": 0.6582,
+      "step": 73600
+    },
+    {
+      "epoch": 2.9112697502422433,
+      "grad_norm": 1.4780749074187067,
+      "learning_rate": 2.658990303819331e-08,
+      "loss": 0.6193,
+      "step": 73610
+    },
+    {
+      "epoch": 2.9116652494611324,
+      "grad_norm": 1.4384321617070752,
+      "learning_rate": 2.6353442083247304e-08,
+      "loss": 0.6314,
+      "step": 73620
+    },
+    {
+      "epoch": 2.9120607486800214,
+      "grad_norm": 1.3769430558478037,
+      "learning_rate": 2.6118034461432018e-08,
+      "loss": 0.6668,
+      "step": 73630
+    },
+    {
+      "epoch": 2.9124562478989104,
+      "grad_norm": 1.6739059508543381,
+      "learning_rate": 2.588368022260368e-08,
+      "loss": 0.6246,
+      "step": 73640
+    },
+    {
+      "epoch": 2.9128517471177995,
+      "grad_norm": 1.3058256254216958,
+      "learning_rate": 2.5650379416394256e-08,
+      "loss": 0.6747,
+      "step": 73650
+    },
+    {
+      "epoch": 2.9132472463366885,
+      "grad_norm": 1.495369401888642,
+      "learning_rate": 2.541813209221311e-08,
+      "loss": 0.6516,
+      "step": 73660
+    },
+    {
+      "epoch": 2.9136427455555776,
+      "grad_norm": 1.6181070932766162,
+      "learning_rate": 2.5186938299246456e-08,
+      "loss": 0.6313,
+      "step": 73670
+    },
+    {
+      "epoch": 2.9140382447744666,
+      "grad_norm": 1.376908189963363,
+      "learning_rate": 2.4956798086456234e-08,
+      "loss": 0.6679,
+      "step": 73680
+    },
+    {
+      "epoch": 2.9144337439933556,
+      "grad_norm": 1.5525881040933487,
+      "learning_rate": 2.4727711502582908e-08,
+      "loss": 0.6403,
+      "step": 73690
+    },
+    {
+      "epoch": 2.9148292432122447,
+      "grad_norm": 1.6413504376016281,
+      "learning_rate": 2.449967859614322e-08,
+      "loss": 0.6577,
+      "step": 73700
+    },
+    {
+      "epoch": 2.9152247424311337,
+      "grad_norm": 1.4669545664862746,
+      "learning_rate": 2.4272699415430202e-08,
+      "loss": 0.6331,
+      "step": 73710
+    },
+    {
+      "epoch": 2.9156202416500228,
+      "grad_norm": 1.4165551219656751,
+      "learning_rate": 2.404677400851485e-08,
+      "loss": 0.6641,
+      "step": 73720
+    },
+    {
+      "epoch": 2.916015740868912,
+      "grad_norm": 1.33591608303022,
+      "learning_rate": 2.3821902423243337e-08,
+      "loss": 0.617,
+      "step": 73730
+    },
+    {
+      "epoch": 2.916411240087801,
+      "grad_norm": 1.670073083005954,
+      "learning_rate": 2.3598084707240344e-08,
+      "loss": 0.6495,
+      "step": 73740
+    },
+    {
+      "epoch": 2.91680673930669,
+      "grad_norm": 1.336096388620189,
+      "learning_rate": 2.337532090790573e-08,
+      "loss": 0.6681,
+      "step": 73750
+    },
+    {
+      "epoch": 2.917202238525579,
+      "grad_norm": 1.3226340600280069,
+      "learning_rate": 2.3153611072418424e-08,
+      "loss": 0.6089,
+      "step": 73760
+    },
+    {
+      "epoch": 2.917597737744468,
+      "grad_norm": 1.4415691713027183,
+      "learning_rate": 2.2932955247731425e-08,
+      "loss": 0.6668,
+      "step": 73770
+    },
+    {
+      "epoch": 2.917993236963357,
+      "grad_norm": 1.393681057459904,
+      "learning_rate": 2.2713353480576795e-08,
+      "loss": 0.671,
+      "step": 73780
+    },
+    {
+      "epoch": 2.918388736182246,
+      "grad_norm": 1.3608468874762747,
+      "learning_rate": 2.2494805817461774e-08,
+      "loss": 0.6759,
+      "step": 73790
+    },
+    {
+      "epoch": 2.918784235401135,
+      "grad_norm": 1.5722304904607298,
+      "learning_rate": 2.2277312304671562e-08,
+      "loss": 0.6968,
+      "step": 73800
+    },
+    {
+      "epoch": 2.919179734620024,
+      "grad_norm": 1.2158107809001115,
+      "learning_rate": 2.206087298826709e-08,
+      "loss": 0.6801,
+      "step": 73810
+    },
+    {
+      "epoch": 2.919575233838913,
+      "grad_norm": 1.6602405277369507,
+      "learning_rate": 2.184548791408725e-08,
+      "loss": 0.6367,
+      "step": 73820
+    },
+    {
+      "epoch": 2.919970733057802,
+      "grad_norm": 1.5138865023208754,
+      "learning_rate": 2.1631157127746105e-08,
+      "loss": 0.6401,
+      "step": 73830
+    },
+    {
+      "epoch": 2.9203662322766912,
+      "grad_norm": 1.9377118630001589,
+      "learning_rate": 2.141788067463513e-08,
+      "loss": 0.6755,
+      "step": 73840
+    },
+    {
+      "epoch": 2.9207617314955803,
+      "grad_norm": 1.4682798791002638,
+      "learning_rate": 2.1205658599923183e-08,
+      "loss": 0.6593,
+      "step": 73850
+    },
+    {
+      "epoch": 2.9211572307144693,
+      "grad_norm": 1.35992177545572,
+      "learning_rate": 2.0994490948555434e-08,
+      "loss": 0.6743,
+      "step": 73860
+    },
+    {
+      "epoch": 2.9215527299333583,
+      "grad_norm": 1.505064744193168,
+      "learning_rate": 2.0784377765253326e-08,
+      "loss": 0.6452,
+      "step": 73870
+    },
+    {
+      "epoch": 2.9219482291522474,
+      "grad_norm": 1.5116412220022382,
+      "learning_rate": 2.05753190945146e-08,
+      "loss": 0.6532,
+      "step": 73880
+    },
+    {
+      "epoch": 2.9223437283711364,
+      "grad_norm": 1.7212297632257676,
+      "learning_rate": 2.0367314980615506e-08,
+      "loss": 0.6303,
+      "step": 73890
+    },
+    {
+      "epoch": 2.9227392275900255,
+      "grad_norm": 1.727146250896952,
+      "learning_rate": 2.016036546760636e-08,
+      "loss": 0.6479,
+      "step": 73900
+    },
+    {
+      "epoch": 2.9231347268089145,
+      "grad_norm": 1.3123577942848836,
+      "learning_rate": 1.9954470599316546e-08,
+      "loss": 0.6127,
+      "step": 73910
+    },
+    {
+      "epoch": 2.9235302260278035,
+      "grad_norm": 1.4770600814224353,
+      "learning_rate": 1.974963041935063e-08,
+      "loss": 0.6071,
+      "step": 73920
+    },
+    {
+      "epoch": 2.9239257252466926,
+      "grad_norm": 1.310967421526731,
+      "learning_rate": 1.954584497109058e-08,
+      "loss": 0.635,
+      "step": 73930
+    },
+    {
+      "epoch": 2.9243212244655816,
+      "grad_norm": 1.2443510062765006,
+      "learning_rate": 1.9343114297694643e-08,
+      "loss": 0.6548,
+      "step": 73940
+    },
+    {
+      "epoch": 2.9247167236844707,
+      "grad_norm": 1.387982223017377,
+      "learning_rate": 1.9141438442097372e-08,
+      "loss": 0.6238,
+      "step": 73950
+    },
+    {
+      "epoch": 2.9251122229033597,
+      "grad_norm": 1.6110149774713163,
+      "learning_rate": 1.8940817447010708e-08,
+      "loss": 0.6349,
+      "step": 73960
+    },
+    {
+      "epoch": 2.9255077221222487,
+      "grad_norm": 1.5807481061662034,
+      "learning_rate": 1.8741251354921773e-08,
+      "loss": 0.6284,
+      "step": 73970
+    },
+    {
+      "epoch": 2.925903221341138,
+      "grad_norm": 1.4516527096014262,
+      "learning_rate": 1.8542740208096764e-08,
+      "loss": 0.643,
+      "step": 73980
+    },
+    {
+      "epoch": 2.926298720560027,
+      "grad_norm": 1.582427128361681,
+      "learning_rate": 1.8345284048575385e-08,
+      "loss": 0.6468,
+      "step": 73990
+    },
+    {
+      "epoch": 2.926694219778916,
+      "grad_norm": 1.5340072458361254,
+      "learning_rate": 1.8148882918176404e-08,
+      "loss": 0.5876,
+      "step": 74000
+    },
+    {
+      "epoch": 2.927089718997805,
+      "grad_norm": 1.2939608633572102,
+      "learning_rate": 1.7953536858494326e-08,
+      "loss": 0.6332,
+      "step": 74010
+    },
+    {
+      "epoch": 2.927485218216694,
+      "grad_norm": 1.213407587319952,
+      "learning_rate": 1.7759245910899393e-08,
+      "loss": 0.665,
+      "step": 74020
+    },
+    {
+      "epoch": 2.927880717435583,
+      "grad_norm": 1.2327507984166377,
+      "learning_rate": 1.75660101165398e-08,
+      "loss": 0.6047,
+      "step": 74030
+    },
+    {
+      "epoch": 2.928276216654472,
+      "grad_norm": 1.8149716481421807,
+      "learning_rate": 1.7373829516338926e-08,
+      "loss": 0.6508,
+      "step": 74040
+    },
+    {
+      "epoch": 2.928671715873361,
+      "grad_norm": 1.4705837957290206,
+      "learning_rate": 1.7182704150998096e-08,
+      "loss": 0.6238,
+      "step": 74050
+    },
+    {
+      "epoch": 2.92906721509225,
+      "grad_norm": 1.6270558558308765,
+      "learning_rate": 1.6992634060993828e-08,
+      "loss": 0.6436,
+      "step": 74060
+    },
+    {
+      "epoch": 2.929462714311139,
+      "grad_norm": 1.414701533571263,
+      "learning_rate": 1.6803619286579477e-08,
+      "loss": 0.657,
+      "step": 74070
+    },
+    {
+      "epoch": 2.929858213530028,
+      "grad_norm": 1.7333727238938672,
+      "learning_rate": 1.6615659867785792e-08,
+      "loss": 0.6392,
+      "step": 74080
+    },
+    {
+      "epoch": 2.930253712748917,
+      "grad_norm": 1.4597776683518446,
+      "learning_rate": 1.642875584441872e-08,
+      "loss": 0.6515,
+      "step": 74090
+    },
+    {
+      "epoch": 2.9306492119678063,
+      "grad_norm": 1.3049665753359068,
+      "learning_rate": 1.6242907256062145e-08,
+      "loss": 0.6263,
+      "step": 74100
+    },
+    {
+      "epoch": 2.9310447111866953,
+      "grad_norm": 1.4476140312771326,
+      "learning_rate": 1.6058114142075143e-08,
+      "loss": 0.6488,
+      "step": 74110
+    },
+    {
+      "epoch": 2.9314402104055843,
+      "grad_norm": 1.5985985226428492,
+      "learning_rate": 1.587437654159363e-08,
+      "loss": 0.6448,
+      "step": 74120
+    },
+    {
+      "epoch": 2.9318357096244734,
+      "grad_norm": 1.55991870069027,
+      "learning_rate": 1.569169449352981e-08,
+      "loss": 0.6202,
+      "step": 74130
+    },
+    {
+      "epoch": 2.9322312088433624,
+      "grad_norm": 1.4722672469390565,
+      "learning_rate": 1.5510068036573288e-08,
+      "loss": 0.6294,
+      "step": 74140
+    },
+    {
+      "epoch": 2.9326267080622515,
+      "grad_norm": 1.5009381866707456,
+      "learning_rate": 1.532949720918886e-08,
+      "loss": 0.664,
+      "step": 74150
+    },
+    {
+      "epoch": 2.9330222072811405,
+      "grad_norm": 1.4589707556727036,
+      "learning_rate": 1.514998204961926e-08,
+      "loss": 0.6611,
+      "step": 74160
+    },
+    {
+      "epoch": 2.9334177065000295,
+      "grad_norm": 1.7684706653463056,
+      "learning_rate": 1.4971522595881306e-08,
+      "loss": 0.6038,
+      "step": 74170
+    },
+    {
+      "epoch": 2.9338132057189186,
+      "grad_norm": 1.589191622086741,
+      "learning_rate": 1.4794118885770869e-08,
+      "loss": 0.6226,
+      "step": 74180
+    },
+    {
+      "epoch": 2.9342087049378076,
+      "grad_norm": 1.5341608678916385,
+      "learning_rate": 1.4617770956858457e-08,
+      "loss": 0.6049,
+      "step": 74190
+    },
+    {
+      "epoch": 2.9346042041566966,
+      "grad_norm": 1.4119029298173666,
+      "learning_rate": 1.4442478846491415e-08,
+      "loss": 0.6299,
+      "step": 74200
+    },
+    {
+      "epoch": 2.9349997033755857,
+      "grad_norm": 1.4271520548881262,
+      "learning_rate": 1.426824259179449e-08,
+      "loss": 0.6512,
+      "step": 74210
+    },
+    {
+      "epoch": 2.9353952025944747,
+      "grad_norm": 1.6523780942766524,
+      "learning_rate": 1.40950622296665e-08,
+      "loss": 0.6275,
+      "step": 74220
+    },
+    {
+      "epoch": 2.9357907018133638,
+      "grad_norm": 1.4628095509263386,
+      "learning_rate": 1.392293779678533e-08,
+      "loss": 0.6738,
+      "step": 74230
+    },
+    {
+      "epoch": 2.936186201032253,
+      "grad_norm": 1.2881537490943038,
+      "learning_rate": 1.3751869329603485e-08,
+      "loss": 0.6493,
+      "step": 74240
+    },
+    {
+      "epoch": 2.936581700251142,
+      "grad_norm": 1.7929240965417017,
+      "learning_rate": 1.3581856864350318e-08,
+      "loss": 0.6521,
+      "step": 74250
+    },
+    {
+      "epoch": 2.936977199470031,
+      "grad_norm": 1.2842700725791327,
+      "learning_rate": 1.3412900437031474e-08,
+      "loss": 0.6903,
+      "step": 74260
+    },
+    {
+      "epoch": 2.93737269868892,
+      "grad_norm": 1.6397111809128573,
+      "learning_rate": 1.3245000083429437e-08,
+      "loss": 0.5999,
+      "step": 74270
+    },
+    {
+      "epoch": 2.9377681979078094,
+      "grad_norm": 1.4045967808228152,
+      "learning_rate": 1.3078155839101881e-08,
+      "loss": 0.639,
+      "step": 74280
+    },
+    {
+      "epoch": 2.938163697126698,
+      "grad_norm": 1.7004426576513894,
+      "learning_rate": 1.2912367739384425e-08,
+      "loss": 0.6115,
+      "step": 74290
+    },
+    {
+      "epoch": 2.9385591963455875,
+      "grad_norm": 1.559438811440199,
+      "learning_rate": 1.2747635819387872e-08,
+      "loss": 0.6443,
+      "step": 74300
+    },
+    {
+      "epoch": 2.938954695564476,
+      "grad_norm": 1.312465664906808,
+      "learning_rate": 1.2583960113999316e-08,
+      "loss": 0.6887,
+      "step": 74310
+    },
+    {
+      "epoch": 2.9393501947833656,
+      "grad_norm": 1.2728121325403894,
+      "learning_rate": 1.2421340657882142e-08,
+      "loss": 0.6453,
+      "step": 74320
+    },
+    {
+      "epoch": 2.939745694002254,
+      "grad_norm": 1.7718362817399798,
+      "learning_rate": 1.2259777485477687e-08,
+      "loss": 0.6556,
+      "step": 74330
+    },
+    {
+      "epoch": 2.9401411932211436,
+      "grad_norm": 1.1950781798632801,
+      "learning_rate": 1.2099270631000804e-08,
+      "loss": 0.6631,
+      "step": 74340
+    },
+    {
+      "epoch": 2.9405366924400322,
+      "grad_norm": 1.4756367622417887,
+      "learning_rate": 1.1939820128445411e-08,
+      "loss": 0.6296,
+      "step": 74350
+    },
+    {
+      "epoch": 2.9409321916589217,
+      "grad_norm": 1.2909865140881334,
+      "learning_rate": 1.17814260115795e-08,
+      "loss": 0.6505,
+      "step": 74360
+    },
+    {
+      "epoch": 2.9413276908778103,
+      "grad_norm": 1.5841153344593966,
+      "learning_rate": 1.1624088313948456e-08,
+      "loss": 0.6488,
+      "step": 74370
+    },
+    {
+      "epoch": 2.9417231900967,
+      "grad_norm": 1.5552836635504748,
+      "learning_rate": 1.1467807068873404e-08,
+      "loss": 0.6152,
+      "step": 74380
+    },
+    {
+      "epoch": 2.9421186893155884,
+      "grad_norm": 1.4063102864209973,
+      "learning_rate": 1.1312582309452313e-08,
+      "loss": 0.6318,
+      "step": 74390
+    },
+    {
+      "epoch": 2.942514188534478,
+      "grad_norm": 1.795580376106693,
+      "learning_rate": 1.1158414068559442e-08,
+      "loss": 0.6172,
+      "step": 74400
+    },
+    {
+      "epoch": 2.9429096877533665,
+      "grad_norm": 1.3568480648088053,
+      "learning_rate": 1.100530237884423e-08,
+      "loss": 0.6983,
+      "step": 74410
+    },
+    {
+      "epoch": 2.943305186972256,
+      "grad_norm": 1.5291784230981096,
+      "learning_rate": 1.0853247272734068e-08,
+      "loss": 0.6517,
+      "step": 74420
+    },
+    {
+      "epoch": 2.9437006861911446,
+      "grad_norm": 1.2841925560594245,
+      "learning_rate": 1.070224878243098e-08,
+      "loss": 0.6613,
+      "step": 74430
+    },
+    {
+      "epoch": 2.944096185410034,
+      "grad_norm": 1.6613901676953224,
+      "learning_rate": 1.055230693991438e-08,
+      "loss": 0.6465,
+      "step": 74440
+    },
+    {
+      "epoch": 2.9444916846289226,
+      "grad_norm": 1.4687027127998962,
+      "learning_rate": 1.0403421776938316e-08,
+      "loss": 0.615,
+      "step": 74450
+    },
+    {
+      "epoch": 2.944887183847812,
+      "grad_norm": 1.4900023781217484,
+      "learning_rate": 1.025559332503534e-08,
+      "loss": 0.6412,
+      "step": 74460
+    },
+    {
+      "epoch": 2.9452826830667007,
+      "grad_norm": 1.6153747907270886,
+      "learning_rate": 1.0108821615512077e-08,
+      "loss": 0.6605,
+      "step": 74470
+    },
+    {
+      "epoch": 2.94567818228559,
+      "grad_norm": 1.6008286218990273,
+      "learning_rate": 9.96310667945255e-09,
+      "loss": 0.6776,
+      "step": 74480
+    },
+    {
+      "epoch": 2.946073681504479,
+      "grad_norm": 1.9284491025303494,
+      "learning_rate": 9.818448547717075e-09,
+      "loss": 0.6389,
+      "step": 74490
+    },
+    {
+      "epoch": 2.9464691807233683,
+      "grad_norm": 1.576719591308475,
+      "learning_rate": 9.6748472509417e-09,
+      "loss": 0.6452,
+      "step": 74500
+    },
+    {
+      "epoch": 2.946864679942257,
+      "grad_norm": 1.441596405376397,
+      "learning_rate": 9.532302819538209e-09,
+      "loss": 0.6607,
+      "step": 74510
+    },
+    {
+      "epoch": 2.9472601791611464,
+      "grad_norm": 1.5486034709408758,
+      "learning_rate": 9.390815283695231e-09,
+      "loss": 0.6556,
+      "step": 74520
+    },
+    {
+      "epoch": 2.947655678380035,
+      "grad_norm": 1.5559701590231496,
+      "learning_rate": 9.250384673377689e-09,
+      "loss": 0.6193,
+      "step": 74530
+    },
+    {
+      "epoch": 2.9480511775989244,
+      "grad_norm": 1.4564111923399143,
+      "learning_rate": 9.111011018326233e-09,
+      "loss": 0.6347,
+      "step": 74540
+    },
+    {
+      "epoch": 2.948446676817813,
+      "grad_norm": 1.4769508606939148,
+      "learning_rate": 8.972694348057254e-09,
+      "loss": 0.6745,
+      "step": 74550
+    },
+    {
+      "epoch": 2.9488421760367025,
+      "grad_norm": 1.4511134355689703,
+      "learning_rate": 8.835434691865096e-09,
+      "loss": 0.6667,
+      "step": 74560
+    },
+    {
+      "epoch": 2.949237675255591,
+      "grad_norm": 1.3836006836493282,
+      "learning_rate": 8.69923207881762e-09,
+      "loss": 0.6416,
+      "step": 74570
+    },
+    {
+      "epoch": 2.9496331744744806,
+      "grad_norm": 1.5024419916298608,
+      "learning_rate": 8.56408653776175e-09,
+      "loss": 0.6266,
+      "step": 74580
+    },
+    {
+      "epoch": 2.950028673693369,
+      "grad_norm": 1.8031606331327024,
+      "learning_rate": 8.429998097317372e-09,
+      "loss": 0.6392,
+      "step": 74590
+    },
+    {
+      "epoch": 2.9504241729122587,
+      "grad_norm": 1.8460586093353146,
+      "learning_rate": 8.29696678588343e-09,
+      "loss": 0.6407,
+      "step": 74600
+    },
+    {
+      "epoch": 2.9508196721311473,
+      "grad_norm": 1.3365413639143147,
+      "learning_rate": 8.164992631632396e-09,
+      "loss": 0.6554,
+      "step": 74610
+    },
+    {
+      "epoch": 2.9512151713500367,
+      "grad_norm": 1.4961993912429425,
+      "learning_rate": 8.034075662515795e-09,
+      "loss": 0.6483,
+      "step": 74620
+    },
+    {
+      "epoch": 2.951610670568926,
+      "grad_norm": 1.5933648065697557,
+      "learning_rate": 7.904215906258116e-09,
+      "loss": 0.6552,
+      "step": 74630
+    },
+    {
+      "epoch": 2.952006169787815,
+      "grad_norm": 1.3783989761197568,
+      "learning_rate": 7.775413390361809e-09,
+      "loss": 0.6548,
+      "step": 74640
+    },
+    {
+      "epoch": 2.952401669006704,
+      "grad_norm": 1.3570911850653213,
+      "learning_rate": 7.647668142105603e-09,
+      "loss": 0.6562,
+      "step": 74650
+    },
+    {
+      "epoch": 2.952797168225593,
+      "grad_norm": 1.5168896381561028,
+      "learning_rate": 7.520980188542859e-09,
+      "loss": 0.6356,
+      "step": 74660
+    },
+    {
+      "epoch": 2.953192667444482,
+      "grad_norm": 1.2579265897777971,
+      "learning_rate": 7.395349556504894e-09,
+      "loss": 0.6326,
+      "step": 74670
+    },
+    {
+      "epoch": 2.953588166663371,
+      "grad_norm": 1.2619313307887843,
+      "learning_rate": 7.270776272597646e-09,
+      "loss": 0.6718,
+      "step": 74680
+    },
+    {
+      "epoch": 2.95398366588226,
+      "grad_norm": 1.3164945555597718,
+      "learning_rate": 7.147260363203346e-09,
+      "loss": 0.6137,
+      "step": 74690
+    },
+    {
+      "epoch": 2.954379165101149,
+      "grad_norm": 1.6049882415247216,
+      "learning_rate": 7.024801854481067e-09,
+      "loss": 0.6212,
+      "step": 74700
+    },
+    {
+      "epoch": 2.954774664320038,
+      "grad_norm": 1.3623941746428814,
+      "learning_rate": 6.9034007723645105e-09,
+      "loss": 0.6644,
+      "step": 74710
+    },
+    {
+      "epoch": 2.955170163538927,
+      "grad_norm": 1.651786609129843,
+      "learning_rate": 6.783057142565885e-09,
+      "loss": 0.6268,
+      "step": 74720
+    },
+    {
+      "epoch": 2.955565662757816,
+      "grad_norm": 1.6124752174678787,
+      "learning_rate": 6.663770990570362e-09,
+      "loss": 0.6267,
+      "step": 74730
+    },
+    {
+      "epoch": 2.955961161976705,
+      "grad_norm": 1.6464893507930403,
+      "learning_rate": 6.5455423416416195e-09,
+      "loss": 0.6388,
+      "step": 74740
+    },
+    {
+      "epoch": 2.9563566611955943,
+      "grad_norm": 1.522469868820525,
+      "learning_rate": 6.428371220818519e-09,
+      "loss": 0.6152,
+      "step": 74750
+    },
+    {
+      "epoch": 2.9567521604144833,
+      "grad_norm": 1.478830126027556,
+      "learning_rate": 6.312257652915099e-09,
+      "loss": 0.6257,
+      "step": 74760
+    },
+    {
+      "epoch": 2.9571476596333723,
+      "grad_norm": 1.5430585828201524,
+      "learning_rate": 6.1972016625228e-09,
+      "loss": 0.6421,
+      "step": 74770
+    },
+    {
+      "epoch": 2.9575431588522614,
+      "grad_norm": 1.3594554689223473,
+      "learning_rate": 6.083203274008242e-09,
+      "loss": 0.634,
+      "step": 74780
+    },
+    {
+      "epoch": 2.9579386580711504,
+      "grad_norm": 1.7739712528195106,
+      "learning_rate": 5.97026251151489e-09,
+      "loss": 0.6465,
+      "step": 74790
+    },
+    {
+      "epoch": 2.9583341572900395,
+      "grad_norm": 1.4447911825052047,
+      "learning_rate": 5.858379398960834e-09,
+      "loss": 0.6489,
+      "step": 74800
+    },
+    {
+      "epoch": 2.9587296565089285,
+      "grad_norm": 1.655079209568356,
+      "learning_rate": 5.747553960042118e-09,
+      "loss": 0.6361,
+      "step": 74810
+    },
+    {
+      "epoch": 2.9591251557278175,
+      "grad_norm": 1.6122534214013735,
+      "learning_rate": 5.637786218228303e-09,
+      "loss": 0.642,
+      "step": 74820
+    },
+    {
+      "epoch": 2.9595206549467066,
+      "grad_norm": 1.391384922418022,
+      "learning_rate": 5.529076196767458e-09,
+      "loss": 0.6222,
+      "step": 74830
+    },
+    {
+      "epoch": 2.9599161541655956,
+      "grad_norm": 1.3691674914862355,
+      "learning_rate": 5.421423918681723e-09,
+      "loss": 0.6322,
+      "step": 74840
+    },
+    {
+      "epoch": 2.9603116533844847,
+      "grad_norm": 1.7584409136596035,
+      "learning_rate": 5.314829406770639e-09,
+      "loss": 0.6701,
+      "step": 74850
+    },
+    {
+      "epoch": 2.9607071526033737,
+      "grad_norm": 1.6209984492877785,
+      "learning_rate": 5.209292683608924e-09,
+      "loss": 0.6302,
+      "step": 74860
+    },
+    {
+      "epoch": 2.9611026518222627,
+      "grad_norm": 1.5431252051398947,
+      "learning_rate": 5.1048137715470345e-09,
+      "loss": 0.6344,
+      "step": 74870
+    },
+    {
+      "epoch": 2.9614981510411518,
+      "grad_norm": 1.6825723290395584,
+      "learning_rate": 5.001392692711715e-09,
+      "loss": 0.6385,
+      "step": 74880
+    },
+    {
+      "epoch": 2.961893650260041,
+      "grad_norm": 1.4147061765580498,
+      "learning_rate": 4.899029469006556e-09,
+      "loss": 0.6649,
+      "step": 74890
+    },
+    {
+      "epoch": 2.96228914947893,
+      "grad_norm": 1.2492783765009212,
+      "learning_rate": 4.797724122110325e-09,
+      "loss": 0.6459,
+      "step": 74900
+    },
+    {
+      "epoch": 2.962684648697819,
+      "grad_norm": 1.688553028521607,
+      "learning_rate": 4.697476673476975e-09,
+      "loss": 0.637,
+      "step": 74910
+    },
+    {
+      "epoch": 2.963080147916708,
+      "grad_norm": 1.431511383458665,
+      "learning_rate": 4.598287144337299e-09,
+      "loss": 0.6447,
+      "step": 74920
+    },
+    {
+      "epoch": 2.963475647135597,
+      "grad_norm": 1.4545510572725713,
+      "learning_rate": 4.5001555556983824e-09,
+      "loss": 0.6129,
+      "step": 74930
+    },
+    {
+      "epoch": 2.963871146354486,
+      "grad_norm": 1.6186478053074254,
+      "learning_rate": 4.4030819283430445e-09,
+      "loss": 0.628,
+      "step": 74940
+    },
+    {
+      "epoch": 2.964266645573375,
+      "grad_norm": 1.6302149152057273,
+      "learning_rate": 4.307066282829286e-09,
+      "loss": 0.6194,
+      "step": 74950
+    },
+    {
+      "epoch": 2.964662144792264,
+      "grad_norm": 1.4780535461559339,
+      "learning_rate": 4.212108639491952e-09,
+      "loss": 0.6521,
+      "step": 74960
+    },
+    {
+      "epoch": 2.965057644011153,
+      "grad_norm": 1.2493366364575227,
+      "learning_rate": 4.118209018440511e-09,
+      "loss": 0.652,
+      "step": 74970
+    },
+    {
+      "epoch": 2.965453143230042,
+      "grad_norm": 1.2589958731347763,
+      "learning_rate": 4.025367439562944e-09,
+      "loss": 0.6955,
+      "step": 74980
+    },
+    {
+      "epoch": 2.965848642448931,
+      "grad_norm": 1.5853134337464636,
+      "learning_rate": 3.933583922519635e-09,
+      "loss": 0.6011,
+      "step": 74990
+    },
+    {
+      "epoch": 2.9662441416678202,
+      "grad_norm": 1.2605075781945128,
+      "learning_rate": 3.842858486750034e-09,
+      "loss": 0.6712,
+      "step": 75000
+    },
+    {
+      "epoch": 2.9666396408867093,
+      "grad_norm": 1.4068048415604375,
+      "learning_rate": 3.753191151468216e-09,
+      "loss": 0.6493,
+      "step": 75010
+    },
+    {
+      "epoch": 2.9670351401055983,
+      "grad_norm": 1.4678818460843905,
+      "learning_rate": 3.664581935663436e-09,
+      "loss": 0.628,
+      "step": 75020
+    },
+    {
+      "epoch": 2.9674306393244874,
+      "grad_norm": 1.4243209499110554,
+      "learning_rate": 3.577030858102348e-09,
+      "loss": 0.6281,
+      "step": 75030
+    },
+    {
+      "epoch": 2.9678261385433764,
+      "grad_norm": 1.4597063660944907,
+      "learning_rate": 3.4905379373262327e-09,
+      "loss": 0.6294,
+      "step": 75040
+    },
+    {
+      "epoch": 2.9682216377622654,
+      "grad_norm": 1.5420936459573134,
+      "learning_rate": 3.405103191653214e-09,
+      "loss": 0.6207,
+      "step": 75050
+    },
+    {
+      "epoch": 2.9686171369811545,
+      "grad_norm": 1.5678044545490732,
+      "learning_rate": 3.320726639176597e-09,
+      "loss": 0.6169,
+      "step": 75060
+    },
+    {
+      "epoch": 2.9690126362000435,
+      "grad_norm": 1.5837276688561837,
+      "learning_rate": 3.237408297766531e-09,
+      "loss": 0.6521,
+      "step": 75070
+    },
+    {
+      "epoch": 2.9694081354189326,
+      "grad_norm": 1.4567197823453915,
+      "learning_rate": 3.155148185067236e-09,
+      "loss": 0.6379,
+      "step": 75080
+    },
+    {
+      "epoch": 2.9698036346378216,
+      "grad_norm": 1.4121875262162478,
+      "learning_rate": 3.0739463185008868e-09,
+      "loss": 0.6278,
+      "step": 75090
+    },
+    {
+      "epoch": 2.9701991338567106,
+      "grad_norm": 1.2437593009784589,
+      "learning_rate": 2.9938027152642825e-09,
+      "loss": 0.6396,
+      "step": 75100
+    },
+    {
+      "epoch": 2.9705946330755997,
+      "grad_norm": 1.2660020573484911,
+      "learning_rate": 2.914717392331068e-09,
+      "loss": 0.6762,
+      "step": 75110
+    },
+    {
+      "epoch": 2.9709901322944887,
+      "grad_norm": 1.711535282925223,
+      "learning_rate": 2.8366903664495128e-09,
+      "loss": 0.6431,
+      "step": 75120
+    },
+    {
+      "epoch": 2.9713856315133778,
+      "grad_norm": 1.5909848871876575,
+      "learning_rate": 2.7597216541441763e-09,
+      "loss": 0.6292,
+      "step": 75130
+    },
+    {
+      "epoch": 2.971781130732267,
+      "grad_norm": 1.4385085756643556,
+      "learning_rate": 2.683811271716463e-09,
+      "loss": 0.6577,
+      "step": 75140
+    },
+    {
+      "epoch": 2.972176629951156,
+      "grad_norm": 1.32032383793393,
+      "learning_rate": 2.6089592352424033e-09,
+      "loss": 0.6148,
+      "step": 75150
+    },
+    {
+      "epoch": 2.972572129170045,
+      "grad_norm": 1.741420476121603,
+      "learning_rate": 2.5351655605748704e-09,
+      "loss": 0.6309,
+      "step": 75160
+    },
+    {
+      "epoch": 2.972967628388934,
+      "grad_norm": 1.415553982775396,
+      "learning_rate": 2.4624302633413643e-09,
+      "loss": 0.6536,
+      "step": 75170
+    },
+    {
+      "epoch": 2.973363127607823,
+      "grad_norm": 1.3981544815873328,
+      "learning_rate": 2.3907533589467848e-09,
+      "loss": 0.6333,
+      "step": 75180
+    },
+    {
+      "epoch": 2.973758626826712,
+      "grad_norm": 1.5398290473941099,
+      "learning_rate": 2.3201348625701003e-09,
+      "loss": 0.5774,
+      "step": 75190
+    },
+    {
+      "epoch": 2.974154126045601,
+      "grad_norm": 1.4157250252287614,
+      "learning_rate": 2.250574789167681e-09,
+      "loss": 0.6457,
+      "step": 75200
+    },
+    {
+      "epoch": 2.97454962526449,
+      "grad_norm": 1.6423073403184665,
+      "learning_rate": 2.182073153471631e-09,
+      "loss": 0.6101,
+      "step": 75210
+    },
+    {
+      "epoch": 2.974945124483379,
+      "grad_norm": 1.396346183045812,
+      "learning_rate": 2.1146299699886795e-09,
+      "loss": 0.632,
+      "step": 75220
+    },
+    {
+      "epoch": 2.975340623702268,
+      "grad_norm": 1.5078811731332071,
+      "learning_rate": 2.048245253002401e-09,
+      "loss": 0.6378,
+      "step": 75230
+    },
+    {
+      "epoch": 2.975736122921157,
+      "grad_norm": 1.25381496806973,
+      "learning_rate": 1.9829190165721046e-09,
+      "loss": 0.6697,
+      "step": 75240
+    },
+    {
+      "epoch": 2.9761316221400462,
+      "grad_norm": 1.7940583923776858,
+      "learning_rate": 1.9186512745322796e-09,
+      "loss": 0.6053,
+      "step": 75250
+    },
+    {
+      "epoch": 2.9765271213589353,
+      "grad_norm": 1.6043019537142902,
+      "learning_rate": 1.8554420404942596e-09,
+      "loss": 0.6436,
+      "step": 75260
+    },
+    {
+      "epoch": 2.9769226205778243,
+      "grad_norm": 1.521770604423184,
+      "learning_rate": 1.7932913278440044e-09,
+      "loss": 0.6837,
+      "step": 75270
+    },
+    {
+      "epoch": 2.9773181197967133,
+      "grad_norm": 1.4864976927423421,
+      "learning_rate": 1.7321991497448731e-09,
+      "loss": 0.6705,
+      "step": 75280
+    },
+    {
+      "epoch": 2.9777136190156024,
+      "grad_norm": 1.4399478614918257,
+      "learning_rate": 1.6721655191348497e-09,
+      "loss": 0.6404,
+      "step": 75290
+    },
+    {
+      "epoch": 2.9781091182344914,
+      "grad_norm": 1.5446595925732718,
+      "learning_rate": 1.613190448727653e-09,
+      "loss": 0.6443,
+      "step": 75300
+    },
+    {
+      "epoch": 2.9785046174533805,
+      "grad_norm": 1.477823627203876,
+      "learning_rate": 1.5552739510132919e-09,
+      "loss": 0.631,
+      "step": 75310
+    },
+    {
+      "epoch": 2.9789001166722695,
+      "grad_norm": 1.308303481305066,
+      "learning_rate": 1.4984160382575108e-09,
+      "loss": 0.6473,
+      "step": 75320
+    },
+    {
+      "epoch": 2.9792956158911585,
+      "grad_norm": 1.3179339787561173,
+      "learning_rate": 1.4426167225023436e-09,
+      "loss": 0.6377,
+      "step": 75330
+    },
+    {
+      "epoch": 2.9796911151100476,
+      "grad_norm": 1.2635890914942947,
+      "learning_rate": 1.387876015564449e-09,
+      "loss": 0.6398,
+      "step": 75340
+    },
+    {
+      "epoch": 2.9800866143289366,
+      "grad_norm": 1.4692902098728338,
+      "learning_rate": 1.3341939290373308e-09,
+      "loss": 0.6674,
+      "step": 75350
+    },
+    {
+      "epoch": 2.9804821135478257,
+      "grad_norm": 1.5136782957687511,
+      "learning_rate": 1.2815704742896728e-09,
+      "loss": 0.6595,
+      "step": 75360
+    },
+    {
+      "epoch": 2.9808776127667147,
+      "grad_norm": 1.2599673842229004,
+      "learning_rate": 1.2300056624664492e-09,
+      "loss": 0.6487,
+      "step": 75370
+    },
+    {
+      "epoch": 2.9812731119856037,
+      "grad_norm": 1.6242487669552406,
+      "learning_rate": 1.1794995044883684e-09,
+      "loss": 0.6342,
+      "step": 75380
+    },
+    {
+      "epoch": 2.981668611204493,
+      "grad_norm": 1.3829382961041896,
+      "learning_rate": 1.1300520110513192e-09,
+      "loss": 0.6327,
+      "step": 75390
+    },
+    {
+      "epoch": 2.982064110423382,
+      "grad_norm": 1.485347987490532,
+      "learning_rate": 1.0816631926274802e-09,
+      "loss": 0.6713,
+      "step": 75400
+    },
+    {
+      "epoch": 2.982459609642271,
+      "grad_norm": 1.4529475997219343,
+      "learning_rate": 1.03433305946532e-09,
+      "loss": 0.6404,
+      "step": 75410
+    },
+    {
+      "epoch": 2.98285510886116,
+      "grad_norm": 1.6253571361681403,
+      "learning_rate": 9.880616215879324e-10,
+      "loss": 0.6524,
+      "step": 75420
+    },
+    {
+      "epoch": 2.983250608080049,
+      "grad_norm": 1.4962361410135567,
+      "learning_rate": 9.428488887952558e-10,
+      "loss": 0.607,
+      "step": 75430
+    },
+    {
+      "epoch": 2.983646107298938,
+      "grad_norm": 1.3346341948544451,
+      "learning_rate": 8.986948706624088e-10,
+      "loss": 0.6686,
+      "step": 75440
+    },
+    {
+      "epoch": 2.984041606517827,
+      "grad_norm": 1.5262170638424148,
+      "learning_rate": 8.555995765407999e-10,
+      "loss": 0.6447,
+      "step": 75450
+    },
+    {
+      "epoch": 2.984437105736716,
+      "grad_norm": 1.619897785918023,
+      "learning_rate": 8.135630155564622e-10,
+      "loss": 0.6289,
+      "step": 75460
+    },
+    {
+      "epoch": 2.984832604955605,
+      "grad_norm": 1.2956858012182204,
+      "learning_rate": 7.725851966122744e-10,
+      "loss": 0.6503,
+      "step": 75470
+    },
+    {
+      "epoch": 2.985228104174494,
+      "grad_norm": 1.445731249320724,
+      "learning_rate": 7.326661283874048e-10,
+      "loss": 0.6682,
+      "step": 75480
+    },
+    {
+      "epoch": 2.985623603393383,
+      "grad_norm": 1.28110989703659,
+      "learning_rate": 6.938058193350916e-10,
+      "loss": 0.662,
+      "step": 75490
+    },
+    {
+      "epoch": 2.986019102612272,
+      "grad_norm": 1.714521019527429,
+      "learning_rate": 6.560042776854181e-10,
+      "loss": 0.6301,
+      "step": 75500
+    },
+    {
+      "epoch": 2.9864146018311613,
+      "grad_norm": 1.2517103403917649,
+      "learning_rate": 6.192615114447576e-10,
+      "loss": 0.6664,
+      "step": 75510
+    },
+    {
+      "epoch": 2.9868101010500503,
+      "grad_norm": 1.6458483808091464,
+      "learning_rate": 5.835775283941081e-10,
+      "loss": 0.6684,
+      "step": 75520
+    },
+    {
+      "epoch": 2.9872056002689393,
+      "grad_norm": 1.4486338943637045,
+      "learning_rate": 5.489523360902028e-10,
+      "loss": 0.6228,
+      "step": 75530
+    },
+    {
+      "epoch": 2.9876010994878284,
+      "grad_norm": 1.6992803194296027,
+      "learning_rate": 5.153859418671748e-10,
+      "loss": 0.651,
+      "step": 75540
+    },
+    {
+      "epoch": 2.9879965987067174,
+      "grad_norm": 1.5222250377392132,
+      "learning_rate": 4.828783528332271e-10,
+      "loss": 0.6176,
+      "step": 75550
+    },
+    {
+      "epoch": 2.9883920979256064,
+      "grad_norm": 1.4129325805060393,
+      "learning_rate": 4.5142957587340774e-10,
+      "loss": 0.6467,
+      "step": 75560
+    },
+    {
+      "epoch": 2.9887875971444955,
+      "grad_norm": 1.5953229460641893,
+      "learning_rate": 4.2103961764683457e-10,
+      "loss": 0.6225,
+      "step": 75570
+    },
+    {
+      "epoch": 2.9891830963633845,
+      "grad_norm": 1.5002173466125093,
+      "learning_rate": 3.9170848459058055e-10,
+      "loss": 0.6155,
+      "step": 75580
+    },
+    {
+      "epoch": 2.9895785955822736,
+      "grad_norm": 1.426615461288542,
+      "learning_rate": 3.634361829163435e-10,
+      "loss": 0.6544,
+      "step": 75590
+    },
+    {
+      "epoch": 2.9899740948011626,
+      "grad_norm": 1.6223286719494556,
+      "learning_rate": 3.362227186121114e-10,
+      "loss": 0.6263,
+      "step": 75600
+    },
+    {
+      "epoch": 2.9903695940200516,
+      "grad_norm": 1.3958423191500335,
+      "learning_rate": 3.100680974399417e-10,
+      "loss": 0.6247,
+      "step": 75610
+    },
+    {
+      "epoch": 2.9907650932389407,
+      "grad_norm": 1.3907307659766843,
+      "learning_rate": 2.849723249398473e-10,
+      "loss": 0.656,
+      "step": 75620
+    },
+    {
+      "epoch": 2.99116059245783,
+      "grad_norm": 1.5579688916749102,
+      "learning_rate": 2.6093540642702085e-10,
+      "loss": 0.6667,
+      "step": 75630
+    },
+    {
+      "epoch": 2.9915560916767188,
+      "grad_norm": 1.3853201734642822,
+      "learning_rate": 2.379573469907248e-10,
+      "loss": 0.6324,
+      "step": 75640
+    },
+    {
+      "epoch": 2.9919515908956082,
+      "grad_norm": 1.5627128118464633,
+      "learning_rate": 2.1603815149873196e-10,
+      "loss": 0.6162,
+      "step": 75650
+    },
+    {
+      "epoch": 2.992347090114497,
+      "grad_norm": 1.9609497278470716,
+      "learning_rate": 1.951778245928848e-10,
+      "loss": 0.6329,
+      "step": 75660
+    },
+    {
+      "epoch": 2.9927425893333863,
+      "grad_norm": 1.4399687485105364,
+      "learning_rate": 1.7537637069020563e-10,
+      "loss": 0.6439,
+      "step": 75670
+    },
+    {
+      "epoch": 2.993138088552275,
+      "grad_norm": 1.2859001723230108,
+      "learning_rate": 1.5663379398511702e-10,
+      "loss": 0.6727,
+      "step": 75680
+    },
+    {
+      "epoch": 2.9935335877711644,
+      "grad_norm": 1.540621106707812,
+      "learning_rate": 1.3895009844722142e-10,
+      "loss": 0.6601,
+      "step": 75690
+    },
+    {
+      "epoch": 2.993929086990053,
+      "grad_norm": 1.2216806932987352,
+      "learning_rate": 1.223252878201908e-10,
+      "loss": 0.6445,
+      "step": 75700
+    },
+    {
+      "epoch": 2.9943245862089425,
+      "grad_norm": 1.5059383365090029,
+      "learning_rate": 1.0675936562676292e-10,
+      "loss": 0.6551,
+      "step": 75710
+    },
+    {
+      "epoch": 2.994720085427831,
+      "grad_norm": 1.5638408016327519,
+      "learning_rate": 9.225233516207966e-11,
+      "loss": 0.655,
+      "step": 75720
+    },
+    {
+      "epoch": 2.9951155846467206,
+      "grad_norm": 1.5980857472480596,
+      "learning_rate": 7.880419949868323e-11,
+      "loss": 0.6517,
+      "step": 75730
+    },
+    {
+      "epoch": 2.995511083865609,
+      "grad_norm": 1.5165589909400723,
+      "learning_rate": 6.641496148540594e-11,
+      "loss": 0.6746,
+      "step": 75740
+    },
+    {
+      "epoch": 2.9959065830844986,
+      "grad_norm": 1.2567363456933929,
+      "learning_rate": 5.508462374570478e-11,
+      "loss": 0.626,
+      "step": 75750
+    },
+    {
+      "epoch": 2.9963020823033872,
+      "grad_norm": 1.7366048532457277,
+      "learning_rate": 4.4813188678771715e-11,
+      "loss": 0.6505,
+      "step": 75760
+    },
+    {
+      "epoch": 2.9966975815222767,
+      "grad_norm": 1.580052978547302,
+      "learning_rate": 3.5600658460088756e-11,
+      "loss": 0.6296,
+      "step": 75770
+    },
+    {
+      "epoch": 2.9970930807411653,
+      "grad_norm": 1.6119513334127769,
+      "learning_rate": 2.7447035041427984e-11,
+      "loss": 0.6422,
+      "step": 75780
+    },
+    {
+      "epoch": 2.997488579960055,
+      "grad_norm": 1.269290310427461,
+      "learning_rate": 2.035232014863109e-11,
+      "loss": 0.625,
+      "step": 75790
+    },
+    {
+      "epoch": 2.9978840791789434,
+      "grad_norm": 1.3904786274334968,
+      "learning_rate": 1.4316515284384935e-11,
+      "loss": 0.65,
+      "step": 75800
+    },
+    {
+      "epoch": 2.998279578397833,
+      "grad_norm": 1.452136459921812,
+      "learning_rate": 9.339621727666448e-12,
+      "loss": 0.6577,
+      "step": 75810
+    },
+    {
+      "epoch": 2.9986750776167215,
+      "grad_norm": 1.647329898728647,
+      "learning_rate": 5.421640532077277e-12,
+      "loss": 0.6398,
+      "step": 75820
+    },
+    {
+      "epoch": 2.999070576835611,
+      "grad_norm": 1.4807456076782137,
+      "learning_rate": 2.5625725269540214e-12,
+      "loss": 0.6591,
+      "step": 75830
+    },
+    {
+      "epoch": 2.9994660760544996,
+      "grad_norm": 1.4164661919438535,
+      "learning_rate": 7.624183179233414e-13,
+      "loss": 0.619,
+      "step": 75840
+    },
+    {
+      "epoch": 2.999861575273389,
+      "grad_norm": 1.218056581703874,
+      "learning_rate": 2.1178286901957224e-14,
+      "loss": 0.6283,
+      "step": 75850
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 75852,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 150000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.1003829288173568e+16,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}