diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,53128 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.9999406751171667, + "eval_steps": 500000, + "global_step": 75852, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0003954992188890427, + "grad_norm": 3.7074936605397686, + "learning_rate": 1.318217769575534e-08, + "loss": 2.201, + "step": 10 + }, + { + "epoch": 0.0007909984377780854, + "grad_norm": 3.3109913373920854, + "learning_rate": 2.636435539151068e-08, + "loss": 2.1554, + "step": 20 + }, + { + "epoch": 0.0011864976566671281, + "grad_norm": 3.6800607569494184, + "learning_rate": 3.9546533087266016e-08, + "loss": 2.1944, + "step": 30 + }, + { + "epoch": 0.0015819968755561708, + "grad_norm": 3.743190421032586, + "learning_rate": 5.272871078302136e-08, + "loss": 2.2551, + "step": 40 + }, + { + "epoch": 0.0019774960944452134, + "grad_norm": 4.691004471985028, + "learning_rate": 6.59108884787767e-08, + "loss": 2.3473, + "step": 50 + }, + { + "epoch": 0.0023729953133342562, + "grad_norm": 3.8048532622701434, + "learning_rate": 7.909306617453203e-08, + "loss": 2.328, + "step": 60 + }, + { + "epoch": 0.0027684945322232987, + "grad_norm": 4.001415152354601, + "learning_rate": 9.227524387028737e-08, + "loss": 2.3177, + "step": 70 + }, + { + "epoch": 0.0031639937511123415, + "grad_norm": 4.032244142335064, + "learning_rate": 1.0545742156604272e-07, + "loss": 2.3234, + "step": 80 + }, + { + "epoch": 0.0035594929700013844, + "grad_norm": 3.7312290699451545, + "learning_rate": 1.1863959926179805e-07, + "loss": 2.3067, + "step": 90 + }, + { + "epoch": 0.003954992188890427, + "grad_norm": 4.153113538768762, + "learning_rate": 1.318217769575534e-07, + "loss": 2.3278, + "step": 100 + }, + { + "epoch": 0.00435049140777947, + "grad_norm": 4.096269099899376, + "learning_rate": 1.4500395465330874e-07, + "loss": 2.3194, + "step": 110 + }, + { + "epoch": 0.0047459906266685125, + "grad_norm": 3.235160888190004, + "learning_rate": 1.5818613234906406e-07, + "loss": 2.3154, + "step": 120 + }, + { + "epoch": 0.005141489845557555, + "grad_norm": 3.323526631193669, + "learning_rate": 1.7136831004481941e-07, + "loss": 2.2965, + "step": 130 + }, + { + "epoch": 0.005536989064446597, + "grad_norm": 3.6904397977464054, + "learning_rate": 1.8455048774057474e-07, + "loss": 2.2868, + "step": 140 + }, + { + "epoch": 0.00593248828333564, + "grad_norm": 3.602112215723258, + "learning_rate": 1.977326654363301e-07, + "loss": 2.2448, + "step": 150 + }, + { + "epoch": 0.006327987502224683, + "grad_norm": 3.2370179111059896, + "learning_rate": 2.1091484313208544e-07, + "loss": 2.2357, + "step": 160 + }, + { + "epoch": 0.006723486721113726, + "grad_norm": 3.3605614319330064, + "learning_rate": 2.2409702082784076e-07, + "loss": 2.1997, + "step": 170 + }, + { + "epoch": 0.007118985940002769, + "grad_norm": 3.0808008550864012, + "learning_rate": 2.372791985235961e-07, + "loss": 2.1638, + "step": 180 + }, + { + "epoch": 0.007514485158891812, + "grad_norm": 3.0129714869661623, + "learning_rate": 2.5046137621935146e-07, + "loss": 2.1419, + "step": 190 + }, + { + "epoch": 0.007909984377780854, + "grad_norm": 2.6405846741638626, + "learning_rate": 2.636435539151068e-07, + "loss": 2.1563, + "step": 200 + }, + { + "epoch": 0.008305483596669896, + "grad_norm": 2.643582511409625, + "learning_rate": 2.7682573161086216e-07, + "loss": 2.1003, + "step": 210 + }, + { + "epoch": 0.00870098281555894, + "grad_norm": 2.232571959869797, + "learning_rate": 2.900079093066175e-07, + "loss": 2.0861, + "step": 220 + }, + { + "epoch": 0.009096482034447982, + "grad_norm": 2.128301735621245, + "learning_rate": 3.031900870023728e-07, + "loss": 2.0055, + "step": 230 + }, + { + "epoch": 0.009491981253337025, + "grad_norm": 2.020636723569866, + "learning_rate": 3.1637226469812813e-07, + "loss": 1.9564, + "step": 240 + }, + { + "epoch": 0.009887480472226068, + "grad_norm": 2.1434864010939, + "learning_rate": 3.295544423938835e-07, + "loss": 1.9526, + "step": 250 + }, + { + "epoch": 0.01028297969111511, + "grad_norm": 2.1828048079576456, + "learning_rate": 3.4273662008963883e-07, + "loss": 1.9264, + "step": 260 + }, + { + "epoch": 0.010678478910004154, + "grad_norm": 1.9105029100185225, + "learning_rate": 3.5591879778539415e-07, + "loss": 1.8921, + "step": 270 + }, + { + "epoch": 0.011073978128893195, + "grad_norm": 2.4926551290834817, + "learning_rate": 3.691009754811495e-07, + "loss": 1.8819, + "step": 280 + }, + { + "epoch": 0.011469477347782238, + "grad_norm": 1.8847166285822843, + "learning_rate": 3.8228315317690485e-07, + "loss": 1.8367, + "step": 290 + }, + { + "epoch": 0.01186497656667128, + "grad_norm": 1.8019359837801003, + "learning_rate": 3.954653308726602e-07, + "loss": 1.8826, + "step": 300 + }, + { + "epoch": 0.012260475785560323, + "grad_norm": 1.7209235517173358, + "learning_rate": 4.086475085684155e-07, + "loss": 1.8866, + "step": 310 + }, + { + "epoch": 0.012655975004449366, + "grad_norm": 1.6203504074860473, + "learning_rate": 4.218296862641709e-07, + "loss": 1.8438, + "step": 320 + }, + { + "epoch": 0.013051474223338409, + "grad_norm": 1.6671035396275247, + "learning_rate": 4.350118639599262e-07, + "loss": 1.835, + "step": 330 + }, + { + "epoch": 0.013446973442227452, + "grad_norm": 1.5304792384555093, + "learning_rate": 4.481940416556815e-07, + "loss": 1.809, + "step": 340 + }, + { + "epoch": 0.013842472661116495, + "grad_norm": 1.5048385764472376, + "learning_rate": 4.613762193514369e-07, + "loss": 1.7621, + "step": 350 + }, + { + "epoch": 0.014237971880005537, + "grad_norm": 1.7833195775272743, + "learning_rate": 4.745583970471922e-07, + "loss": 1.7634, + "step": 360 + }, + { + "epoch": 0.01463347109889458, + "grad_norm": 1.6045933891412605, + "learning_rate": 4.877405747429475e-07, + "loss": 1.7578, + "step": 370 + }, + { + "epoch": 0.015028970317783623, + "grad_norm": 1.6923943685841414, + "learning_rate": 5.009227524387029e-07, + "loss": 1.7499, + "step": 380 + }, + { + "epoch": 0.015424469536672664, + "grad_norm": 1.746191195367448, + "learning_rate": 5.141049301344583e-07, + "loss": 1.7321, + "step": 390 + }, + { + "epoch": 0.015819968755561707, + "grad_norm": 1.5229493549326838, + "learning_rate": 5.272871078302136e-07, + "loss": 1.7276, + "step": 400 + }, + { + "epoch": 0.01621546797445075, + "grad_norm": 1.550582846508733, + "learning_rate": 5.404692855259689e-07, + "loss": 1.7101, + "step": 410 + }, + { + "epoch": 0.016610967193339793, + "grad_norm": 1.6232357312318062, + "learning_rate": 5.536514632217243e-07, + "loss": 1.6954, + "step": 420 + }, + { + "epoch": 0.017006466412228836, + "grad_norm": 1.5664773994327206, + "learning_rate": 5.668336409174796e-07, + "loss": 1.7404, + "step": 430 + }, + { + "epoch": 0.01740196563111788, + "grad_norm": 1.6713626953240466, + "learning_rate": 5.80015818613235e-07, + "loss": 1.7557, + "step": 440 + }, + { + "epoch": 0.01779746485000692, + "grad_norm": 1.5298539994905176, + "learning_rate": 5.931979963089903e-07, + "loss": 1.74, + "step": 450 + }, + { + "epoch": 0.018192964068895964, + "grad_norm": 1.5719731844258467, + "learning_rate": 6.063801740047456e-07, + "loss": 1.7228, + "step": 460 + }, + { + "epoch": 0.018588463287785007, + "grad_norm": 1.696690769626351, + "learning_rate": 6.19562351700501e-07, + "loss": 1.6842, + "step": 470 + }, + { + "epoch": 0.01898396250667405, + "grad_norm": 1.6477134561193705, + "learning_rate": 6.327445293962563e-07, + "loss": 1.6852, + "step": 480 + }, + { + "epoch": 0.019379461725563093, + "grad_norm": 1.4954021115012037, + "learning_rate": 6.459267070920116e-07, + "loss": 1.6848, + "step": 490 + }, + { + "epoch": 0.019774960944452136, + "grad_norm": 1.4543001068041868, + "learning_rate": 6.59108884787767e-07, + "loss": 1.6932, + "step": 500 + }, + { + "epoch": 0.02017046016334118, + "grad_norm": 1.5572647621051774, + "learning_rate": 6.722910624835223e-07, + "loss": 1.6939, + "step": 510 + }, + { + "epoch": 0.02056595938223022, + "grad_norm": 1.5060407384324919, + "learning_rate": 6.854732401792777e-07, + "loss": 1.665, + "step": 520 + }, + { + "epoch": 0.020961458601119264, + "grad_norm": 1.5473694995949017, + "learning_rate": 6.98655417875033e-07, + "loss": 1.6816, + "step": 530 + }, + { + "epoch": 0.021356957820008307, + "grad_norm": 1.585877480822181, + "learning_rate": 7.118375955707883e-07, + "loss": 1.6502, + "step": 540 + }, + { + "epoch": 0.021752457038897346, + "grad_norm": 1.552409095115036, + "learning_rate": 7.250197732665437e-07, + "loss": 1.6307, + "step": 550 + }, + { + "epoch": 0.02214795625778639, + "grad_norm": 1.4684053904227856, + "learning_rate": 7.38201950962299e-07, + "loss": 1.6293, + "step": 560 + }, + { + "epoch": 0.022543455476675432, + "grad_norm": 1.5474705238190347, + "learning_rate": 7.513841286580543e-07, + "loss": 1.6238, + "step": 570 + }, + { + "epoch": 0.022938954695564475, + "grad_norm": 1.4126204821167332, + "learning_rate": 7.645663063538097e-07, + "loss": 1.6373, + "step": 580 + }, + { + "epoch": 0.023334453914453518, + "grad_norm": 1.566399471829619, + "learning_rate": 7.77748484049565e-07, + "loss": 1.6068, + "step": 590 + }, + { + "epoch": 0.02372995313334256, + "grad_norm": 1.4763703195584525, + "learning_rate": 7.909306617453203e-07, + "loss": 1.6294, + "step": 600 + }, + { + "epoch": 0.024125452352231604, + "grad_norm": 1.4214745635789, + "learning_rate": 8.041128394410757e-07, + "loss": 1.6015, + "step": 610 + }, + { + "epoch": 0.024520951571120646, + "grad_norm": 1.4712408124656606, + "learning_rate": 8.17295017136831e-07, + "loss": 1.6183, + "step": 620 + }, + { + "epoch": 0.02491645079000969, + "grad_norm": 1.3914568736563913, + "learning_rate": 8.304771948325864e-07, + "loss": 1.5748, + "step": 630 + }, + { + "epoch": 0.025311950008898732, + "grad_norm": 1.628242307981156, + "learning_rate": 8.436593725283417e-07, + "loss": 1.5899, + "step": 640 + }, + { + "epoch": 0.025707449227787775, + "grad_norm": 1.5522926075709969, + "learning_rate": 8.56841550224097e-07, + "loss": 1.6057, + "step": 650 + }, + { + "epoch": 0.026102948446676818, + "grad_norm": 1.3563714916369782, + "learning_rate": 8.700237279198524e-07, + "loss": 1.5756, + "step": 660 + }, + { + "epoch": 0.02649844766556586, + "grad_norm": 1.5473797209639122, + "learning_rate": 8.832059056156078e-07, + "loss": 1.5848, + "step": 670 + }, + { + "epoch": 0.026893946884454904, + "grad_norm": 1.3448232230490351, + "learning_rate": 8.96388083311363e-07, + "loss": 1.5797, + "step": 680 + }, + { + "epoch": 0.027289446103343946, + "grad_norm": 1.4678885776783204, + "learning_rate": 9.095702610071184e-07, + "loss": 1.5598, + "step": 690 + }, + { + "epoch": 0.02768494532223299, + "grad_norm": 1.4395122891633945, + "learning_rate": 9.227524387028738e-07, + "loss": 1.5448, + "step": 700 + }, + { + "epoch": 0.028080444541122032, + "grad_norm": 1.3091642457498578, + "learning_rate": 9.359346163986291e-07, + "loss": 1.5719, + "step": 710 + }, + { + "epoch": 0.028475943760011075, + "grad_norm": 1.4465270166087172, + "learning_rate": 9.491167940943844e-07, + "loss": 1.5232, + "step": 720 + }, + { + "epoch": 0.028871442978900118, + "grad_norm": 1.413997577563463, + "learning_rate": 9.6229897179014e-07, + "loss": 1.5481, + "step": 730 + }, + { + "epoch": 0.02926694219778916, + "grad_norm": 1.4180765994412476, + "learning_rate": 9.75481149485895e-07, + "loss": 1.5304, + "step": 740 + }, + { + "epoch": 0.029662441416678204, + "grad_norm": 1.374935810420744, + "learning_rate": 9.886633271816505e-07, + "loss": 1.5182, + "step": 750 + }, + { + "epoch": 0.030057940635567246, + "grad_norm": 1.4614624968528402, + "learning_rate": 1.0018455048774058e-06, + "loss": 1.5315, + "step": 760 + }, + { + "epoch": 0.030453439854456286, + "grad_norm": 1.5654133110212314, + "learning_rate": 1.0150276825731612e-06, + "loss": 1.5604, + "step": 770 + }, + { + "epoch": 0.03084893907334533, + "grad_norm": 1.3406860820189042, + "learning_rate": 1.0282098602689166e-06, + "loss": 1.5199, + "step": 780 + }, + { + "epoch": 0.03124443829223437, + "grad_norm": 1.415493720966217, + "learning_rate": 1.0413920379646718e-06, + "loss": 1.5086, + "step": 790 + }, + { + "epoch": 0.031639937511123414, + "grad_norm": 1.3992787320863422, + "learning_rate": 1.0545742156604271e-06, + "loss": 1.5212, + "step": 800 + }, + { + "epoch": 0.03203543673001246, + "grad_norm": 1.403735804574894, + "learning_rate": 1.0677563933561825e-06, + "loss": 1.513, + "step": 810 + }, + { + "epoch": 0.0324309359489015, + "grad_norm": 1.3853535238209604, + "learning_rate": 1.0809385710519379e-06, + "loss": 1.5152, + "step": 820 + }, + { + "epoch": 0.032826435167790546, + "grad_norm": 1.3750744422194516, + "learning_rate": 1.0941207487476933e-06, + "loss": 1.5273, + "step": 830 + }, + { + "epoch": 0.033221934386679586, + "grad_norm": 1.33200650080592, + "learning_rate": 1.1073029264434486e-06, + "loss": 1.5034, + "step": 840 + }, + { + "epoch": 0.03361743360556863, + "grad_norm": 1.410768669372819, + "learning_rate": 1.1204851041392038e-06, + "loss": 1.516, + "step": 850 + }, + { + "epoch": 0.03401293282445767, + "grad_norm": 1.3774153439612187, + "learning_rate": 1.1336672818349592e-06, + "loss": 1.5088, + "step": 860 + }, + { + "epoch": 0.03440843204334672, + "grad_norm": 1.3779966259760223, + "learning_rate": 1.1468494595307146e-06, + "loss": 1.5183, + "step": 870 + }, + { + "epoch": 0.03480393126223576, + "grad_norm": 1.344217420218594, + "learning_rate": 1.16003163722647e-06, + "loss": 1.4865, + "step": 880 + }, + { + "epoch": 0.0351994304811248, + "grad_norm": 1.432525298396332, + "learning_rate": 1.1732138149222253e-06, + "loss": 1.4758, + "step": 890 + }, + { + "epoch": 0.03559492970001384, + "grad_norm": 1.3306163364600878, + "learning_rate": 1.1863959926179807e-06, + "loss": 1.4826, + "step": 900 + }, + { + "epoch": 0.03599042891890288, + "grad_norm": 1.5235420655487502, + "learning_rate": 1.1995781703137358e-06, + "loss": 1.4898, + "step": 910 + }, + { + "epoch": 0.03638592813779193, + "grad_norm": 1.346509814724087, + "learning_rate": 1.2127603480094912e-06, + "loss": 1.505, + "step": 920 + }, + { + "epoch": 0.03678142735668097, + "grad_norm": 1.3877406779619121, + "learning_rate": 1.2259425257052466e-06, + "loss": 1.4767, + "step": 930 + }, + { + "epoch": 0.037176926575570014, + "grad_norm": 1.3846214860230308, + "learning_rate": 1.239124703401002e-06, + "loss": 1.4724, + "step": 940 + }, + { + "epoch": 0.037572425794459054, + "grad_norm": 1.2817258137618222, + "learning_rate": 1.2523068810967574e-06, + "loss": 1.4713, + "step": 950 + }, + { + "epoch": 0.0379679250133481, + "grad_norm": 1.3439675775454174, + "learning_rate": 1.2654890587925125e-06, + "loss": 1.4752, + "step": 960 + }, + { + "epoch": 0.03836342423223714, + "grad_norm": 1.3554867357811966, + "learning_rate": 1.278671236488268e-06, + "loss": 1.4562, + "step": 970 + }, + { + "epoch": 0.038758923451126186, + "grad_norm": 1.2790133713776188, + "learning_rate": 1.2918534141840233e-06, + "loss": 1.4674, + "step": 980 + }, + { + "epoch": 0.039154422670015225, + "grad_norm": 1.343329302614488, + "learning_rate": 1.3050355918797786e-06, + "loss": 1.4666, + "step": 990 + }, + { + "epoch": 0.03954992188890427, + "grad_norm": 1.2672292495398485, + "learning_rate": 1.318217769575534e-06, + "loss": 1.4556, + "step": 1000 + }, + { + "epoch": 0.03994542110779331, + "grad_norm": 1.3037813733806745, + "learning_rate": 1.3313999472712894e-06, + "loss": 1.4497, + "step": 1010 + }, + { + "epoch": 0.04034092032668236, + "grad_norm": 1.345660418942053, + "learning_rate": 1.3445821249670446e-06, + "loss": 1.4426, + "step": 1020 + }, + { + "epoch": 0.040736419545571396, + "grad_norm": 1.2609132287515321, + "learning_rate": 1.3577643026628001e-06, + "loss": 1.4565, + "step": 1030 + }, + { + "epoch": 0.04113191876446044, + "grad_norm": 1.374665502512752, + "learning_rate": 1.3709464803585553e-06, + "loss": 1.4412, + "step": 1040 + }, + { + "epoch": 0.04152741798334948, + "grad_norm": 1.3081072490504966, + "learning_rate": 1.3841286580543107e-06, + "loss": 1.4402, + "step": 1050 + }, + { + "epoch": 0.04192291720223853, + "grad_norm": 1.4330687455032025, + "learning_rate": 1.397310835750066e-06, + "loss": 1.4683, + "step": 1060 + }, + { + "epoch": 0.04231841642112757, + "grad_norm": 1.3606849988116267, + "learning_rate": 1.4104930134458214e-06, + "loss": 1.4333, + "step": 1070 + }, + { + "epoch": 0.042713915640016614, + "grad_norm": 1.3074383876921958, + "learning_rate": 1.4236751911415766e-06, + "loss": 1.4498, + "step": 1080 + }, + { + "epoch": 0.043109414858905654, + "grad_norm": 1.4042081366074939, + "learning_rate": 1.4368573688373322e-06, + "loss": 1.4528, + "step": 1090 + }, + { + "epoch": 0.04350491407779469, + "grad_norm": 1.272696004817707, + "learning_rate": 1.4500395465330874e-06, + "loss": 1.4277, + "step": 1100 + }, + { + "epoch": 0.04390041329668374, + "grad_norm": 1.3590575435554115, + "learning_rate": 1.4632217242288427e-06, + "loss": 1.4233, + "step": 1110 + }, + { + "epoch": 0.04429591251557278, + "grad_norm": 1.2769709382668435, + "learning_rate": 1.476403901924598e-06, + "loss": 1.4244, + "step": 1120 + }, + { + "epoch": 0.044691411734461825, + "grad_norm": 1.2345551348610775, + "learning_rate": 1.4895860796203535e-06, + "loss": 1.4428, + "step": 1130 + }, + { + "epoch": 0.045086910953350864, + "grad_norm": 1.4051433004667393, + "learning_rate": 1.5027682573161087e-06, + "loss": 1.4464, + "step": 1140 + }, + { + "epoch": 0.04548241017223991, + "grad_norm": 1.2673234929231048, + "learning_rate": 1.515950435011864e-06, + "loss": 1.4083, + "step": 1150 + }, + { + "epoch": 0.04587790939112895, + "grad_norm": 1.2619921688980378, + "learning_rate": 1.5291326127076194e-06, + "loss": 1.4198, + "step": 1160 + }, + { + "epoch": 0.046273408610017996, + "grad_norm": 1.3402764138765177, + "learning_rate": 1.5423147904033748e-06, + "loss": 1.4147, + "step": 1170 + }, + { + "epoch": 0.046668907828907036, + "grad_norm": 1.3856239712083365, + "learning_rate": 1.55549696809913e-06, + "loss": 1.4184, + "step": 1180 + }, + { + "epoch": 0.04706440704779608, + "grad_norm": 1.3189510305969174, + "learning_rate": 1.5686791457948855e-06, + "loss": 1.4032, + "step": 1190 + }, + { + "epoch": 0.04745990626668512, + "grad_norm": 1.3714820128166048, + "learning_rate": 1.5818613234906407e-06, + "loss": 1.4088, + "step": 1200 + }, + { + "epoch": 0.04785540548557417, + "grad_norm": 1.1741079276066009, + "learning_rate": 1.595043501186396e-06, + "loss": 1.4101, + "step": 1210 + }, + { + "epoch": 0.04825090470446321, + "grad_norm": 1.2891130628308234, + "learning_rate": 1.6082256788821515e-06, + "loss": 1.3912, + "step": 1220 + }, + { + "epoch": 0.048646403923352254, + "grad_norm": 1.269826134186319, + "learning_rate": 1.6214078565779068e-06, + "loss": 1.3908, + "step": 1230 + }, + { + "epoch": 0.04904190314224129, + "grad_norm": 1.2966310224708233, + "learning_rate": 1.634590034273662e-06, + "loss": 1.4013, + "step": 1240 + }, + { + "epoch": 0.04943740236113034, + "grad_norm": 1.2623601906553965, + "learning_rate": 1.6477722119694176e-06, + "loss": 1.3954, + "step": 1250 + }, + { + "epoch": 0.04983290158001938, + "grad_norm": 1.2093054401404613, + "learning_rate": 1.6609543896651727e-06, + "loss": 1.3933, + "step": 1260 + }, + { + "epoch": 0.050228400798908425, + "grad_norm": 1.312509373739013, + "learning_rate": 1.6741365673609281e-06, + "loss": 1.4016, + "step": 1270 + }, + { + "epoch": 0.050623900017797464, + "grad_norm": 1.2875671531479649, + "learning_rate": 1.6873187450566835e-06, + "loss": 1.3783, + "step": 1280 + }, + { + "epoch": 0.05101939923668651, + "grad_norm": 1.2478098054962192, + "learning_rate": 1.7005009227524389e-06, + "loss": 1.4164, + "step": 1290 + }, + { + "epoch": 0.05141489845557555, + "grad_norm": 1.2796969742474207, + "learning_rate": 1.713683100448194e-06, + "loss": 1.4055, + "step": 1300 + }, + { + "epoch": 0.051810397674464596, + "grad_norm": 1.2046753206553034, + "learning_rate": 1.7268652781439496e-06, + "loss": 1.3803, + "step": 1310 + }, + { + "epoch": 0.052205896893353636, + "grad_norm": 1.2212394785406553, + "learning_rate": 1.7400474558397048e-06, + "loss": 1.3904, + "step": 1320 + }, + { + "epoch": 0.052601396112242675, + "grad_norm": 1.2646871837332208, + "learning_rate": 1.7532296335354602e-06, + "loss": 1.3713, + "step": 1330 + }, + { + "epoch": 0.05299689533113172, + "grad_norm": 1.1867445931970122, + "learning_rate": 1.7664118112312155e-06, + "loss": 1.4019, + "step": 1340 + }, + { + "epoch": 0.05339239455002076, + "grad_norm": 1.3679672932922804, + "learning_rate": 1.779593988926971e-06, + "loss": 1.4007, + "step": 1350 + }, + { + "epoch": 0.05378789376890981, + "grad_norm": 1.2031507878416894, + "learning_rate": 1.792776166622726e-06, + "loss": 1.4041, + "step": 1360 + }, + { + "epoch": 0.05418339298779885, + "grad_norm": 1.3930446030305885, + "learning_rate": 1.8059583443184817e-06, + "loss": 1.3709, + "step": 1370 + }, + { + "epoch": 0.05457889220668789, + "grad_norm": 1.3165081578311295, + "learning_rate": 1.8191405220142368e-06, + "loss": 1.3929, + "step": 1380 + }, + { + "epoch": 0.05497439142557693, + "grad_norm": 1.3804310114347087, + "learning_rate": 1.8323226997099922e-06, + "loss": 1.371, + "step": 1390 + }, + { + "epoch": 0.05536989064446598, + "grad_norm": 1.2659266021240543, + "learning_rate": 1.8455048774057476e-06, + "loss": 1.3599, + "step": 1400 + }, + { + "epoch": 0.05576538986335502, + "grad_norm": 1.1998894502716475, + "learning_rate": 1.858687055101503e-06, + "loss": 1.3698, + "step": 1410 + }, + { + "epoch": 0.056160889082244064, + "grad_norm": 1.1797459236151298, + "learning_rate": 1.8718692327972581e-06, + "loss": 1.3725, + "step": 1420 + }, + { + "epoch": 0.056556388301133104, + "grad_norm": 1.3850200456812771, + "learning_rate": 1.8850514104930137e-06, + "loss": 1.3904, + "step": 1430 + }, + { + "epoch": 0.05695188752002215, + "grad_norm": 1.284926362127649, + "learning_rate": 1.8982335881887689e-06, + "loss": 1.3617, + "step": 1440 + }, + { + "epoch": 0.05734738673891119, + "grad_norm": 1.3130108102859457, + "learning_rate": 1.9114157658845243e-06, + "loss": 1.387, + "step": 1450 + }, + { + "epoch": 0.057742885957800236, + "grad_norm": 1.2515641080831983, + "learning_rate": 1.92459794358028e-06, + "loss": 1.3434, + "step": 1460 + }, + { + "epoch": 0.058138385176689275, + "grad_norm": 1.2855780839226325, + "learning_rate": 1.937780121276035e-06, + "loss": 1.3818, + "step": 1470 + }, + { + "epoch": 0.05853388439557832, + "grad_norm": 1.3951421747758692, + "learning_rate": 1.95096229897179e-06, + "loss": 1.3525, + "step": 1480 + }, + { + "epoch": 0.05892938361446736, + "grad_norm": 1.319151820190548, + "learning_rate": 1.9641444766675458e-06, + "loss": 1.369, + "step": 1490 + }, + { + "epoch": 0.05932488283335641, + "grad_norm": 1.283286982030512, + "learning_rate": 1.977326654363301e-06, + "loss": 1.3727, + "step": 1500 + }, + { + "epoch": 0.059720382052245446, + "grad_norm": 1.225386210337589, + "learning_rate": 1.990508832059056e-06, + "loss": 1.3847, + "step": 1510 + }, + { + "epoch": 0.06011588127113449, + "grad_norm": 1.3323436425897397, + "learning_rate": 2.0036910097548117e-06, + "loss": 1.368, + "step": 1520 + }, + { + "epoch": 0.06051138049002353, + "grad_norm": 1.2114254618537965, + "learning_rate": 2.016873187450567e-06, + "loss": 1.3842, + "step": 1530 + }, + { + "epoch": 0.06090687970891257, + "grad_norm": 1.174176153009977, + "learning_rate": 2.0300553651463224e-06, + "loss": 1.382, + "step": 1540 + }, + { + "epoch": 0.06130237892780162, + "grad_norm": 1.2213823235440902, + "learning_rate": 2.043237542842078e-06, + "loss": 1.3775, + "step": 1550 + }, + { + "epoch": 0.06169787814669066, + "grad_norm": 1.3067532689182895, + "learning_rate": 2.056419720537833e-06, + "loss": 1.3546, + "step": 1560 + }, + { + "epoch": 0.062093377365579704, + "grad_norm": 1.2405561143563724, + "learning_rate": 2.0696018982335883e-06, + "loss": 1.3793, + "step": 1570 + }, + { + "epoch": 0.06248887658446874, + "grad_norm": 1.3444825192839442, + "learning_rate": 2.0827840759293435e-06, + "loss": 1.3568, + "step": 1580 + }, + { + "epoch": 0.06288437580335779, + "grad_norm": 1.3190960946200965, + "learning_rate": 2.095966253625099e-06, + "loss": 1.359, + "step": 1590 + }, + { + "epoch": 0.06327987502224683, + "grad_norm": 1.2444707895977416, + "learning_rate": 2.1091484313208543e-06, + "loss": 1.3519, + "step": 1600 + }, + { + "epoch": 0.06367537424113587, + "grad_norm": 1.2858248835895856, + "learning_rate": 2.1223306090166094e-06, + "loss": 1.3542, + "step": 1610 + }, + { + "epoch": 0.06407087346002492, + "grad_norm": 1.2093456894989756, + "learning_rate": 2.135512786712365e-06, + "loss": 1.3507, + "step": 1620 + }, + { + "epoch": 0.06446637267891396, + "grad_norm": 1.2013316230182205, + "learning_rate": 2.14869496440812e-06, + "loss": 1.3818, + "step": 1630 + }, + { + "epoch": 0.064861871897803, + "grad_norm": 1.2980141671010164, + "learning_rate": 2.1618771421038758e-06, + "loss": 1.3641, + "step": 1640 + }, + { + "epoch": 0.06525737111669204, + "grad_norm": 1.2346815137921718, + "learning_rate": 2.175059319799631e-06, + "loss": 1.3461, + "step": 1650 + }, + { + "epoch": 0.06565287033558109, + "grad_norm": 1.2096389907928575, + "learning_rate": 2.1882414974953865e-06, + "loss": 1.3343, + "step": 1660 + }, + { + "epoch": 0.06604836955447013, + "grad_norm": 1.3952439518632846, + "learning_rate": 2.2014236751911417e-06, + "loss": 1.3375, + "step": 1670 + }, + { + "epoch": 0.06644386877335917, + "grad_norm": 1.174174309336657, + "learning_rate": 2.2146058528868973e-06, + "loss": 1.3411, + "step": 1680 + }, + { + "epoch": 0.06683936799224821, + "grad_norm": 1.2765076663404167, + "learning_rate": 2.2277880305826524e-06, + "loss": 1.3432, + "step": 1690 + }, + { + "epoch": 0.06723486721113726, + "grad_norm": 1.3370795855751043, + "learning_rate": 2.2409702082784076e-06, + "loss": 1.3489, + "step": 1700 + }, + { + "epoch": 0.0676303664300263, + "grad_norm": 1.2366580352436853, + "learning_rate": 2.254152385974163e-06, + "loss": 1.3262, + "step": 1710 + }, + { + "epoch": 0.06802586564891534, + "grad_norm": 1.2943364201255696, + "learning_rate": 2.2673345636699184e-06, + "loss": 1.3339, + "step": 1720 + }, + { + "epoch": 0.06842136486780438, + "grad_norm": 1.196691594127321, + "learning_rate": 2.2805167413656735e-06, + "loss": 1.3379, + "step": 1730 + }, + { + "epoch": 0.06881686408669344, + "grad_norm": 1.2142181703444799, + "learning_rate": 2.293698919061429e-06, + "loss": 1.3515, + "step": 1740 + }, + { + "epoch": 0.06921236330558247, + "grad_norm": 1.172172263971748, + "learning_rate": 2.3068810967571843e-06, + "loss": 1.3424, + "step": 1750 + }, + { + "epoch": 0.06960786252447151, + "grad_norm": 1.1981525906535253, + "learning_rate": 2.32006327445294e-06, + "loss": 1.3359, + "step": 1760 + }, + { + "epoch": 0.07000336174336055, + "grad_norm": 1.1121572319368689, + "learning_rate": 2.3332454521486954e-06, + "loss": 1.3492, + "step": 1770 + }, + { + "epoch": 0.0703988609622496, + "grad_norm": 1.3609407003058103, + "learning_rate": 2.3464276298444506e-06, + "loss": 1.3494, + "step": 1780 + }, + { + "epoch": 0.07079436018113865, + "grad_norm": 1.195358728366932, + "learning_rate": 2.3596098075402058e-06, + "loss": 1.3133, + "step": 1790 + }, + { + "epoch": 0.07118985940002769, + "grad_norm": 1.2660297564882381, + "learning_rate": 2.3727919852359614e-06, + "loss": 1.3286, + "step": 1800 + }, + { + "epoch": 0.07158535861891673, + "grad_norm": 1.1846403707730422, + "learning_rate": 2.3859741629317165e-06, + "loss": 1.3337, + "step": 1810 + }, + { + "epoch": 0.07198085783780576, + "grad_norm": 1.2419714851604897, + "learning_rate": 2.3991563406274717e-06, + "loss": 1.3444, + "step": 1820 + }, + { + "epoch": 0.07237635705669482, + "grad_norm": 1.2247587012064896, + "learning_rate": 2.4123385183232273e-06, + "loss": 1.3297, + "step": 1830 + }, + { + "epoch": 0.07277185627558386, + "grad_norm": 1.2367202399241926, + "learning_rate": 2.4255206960189824e-06, + "loss": 1.3336, + "step": 1840 + }, + { + "epoch": 0.0731673554944729, + "grad_norm": 1.15162704981469, + "learning_rate": 2.4387028737147376e-06, + "loss": 1.3304, + "step": 1850 + }, + { + "epoch": 0.07356285471336194, + "grad_norm": 1.2725548856923554, + "learning_rate": 2.451885051410493e-06, + "loss": 1.3135, + "step": 1860 + }, + { + "epoch": 0.07395835393225099, + "grad_norm": 1.2219938763619118, + "learning_rate": 2.4650672291062484e-06, + "loss": 1.3318, + "step": 1870 + }, + { + "epoch": 0.07435385315114003, + "grad_norm": 1.1414783727229223, + "learning_rate": 2.478249406802004e-06, + "loss": 1.3201, + "step": 1880 + }, + { + "epoch": 0.07474935237002907, + "grad_norm": 1.261774884225301, + "learning_rate": 2.4914315844977595e-06, + "loss": 1.3163, + "step": 1890 + }, + { + "epoch": 0.07514485158891811, + "grad_norm": 1.3685126623521886, + "learning_rate": 2.5046137621935147e-06, + "loss": 1.3393, + "step": 1900 + }, + { + "epoch": 0.07554035080780716, + "grad_norm": 1.2255559190916123, + "learning_rate": 2.51779593988927e-06, + "loss": 1.3089, + "step": 1910 + }, + { + "epoch": 0.0759358500266962, + "grad_norm": 1.1766220763492885, + "learning_rate": 2.530978117585025e-06, + "loss": 1.3395, + "step": 1920 + }, + { + "epoch": 0.07633134924558524, + "grad_norm": 1.2164090577880697, + "learning_rate": 2.54416029528078e-06, + "loss": 1.3402, + "step": 1930 + }, + { + "epoch": 0.07672684846447428, + "grad_norm": 1.1656840897879306, + "learning_rate": 2.557342472976536e-06, + "loss": 1.3193, + "step": 1940 + }, + { + "epoch": 0.07712234768336333, + "grad_norm": 1.2524143523318216, + "learning_rate": 2.5705246506722914e-06, + "loss": 1.3132, + "step": 1950 + }, + { + "epoch": 0.07751784690225237, + "grad_norm": 1.2740067789155567, + "learning_rate": 2.5837068283680465e-06, + "loss": 1.3409, + "step": 1960 + }, + { + "epoch": 0.07791334612114141, + "grad_norm": 1.2041116051593224, + "learning_rate": 2.5968890060638017e-06, + "loss": 1.3129, + "step": 1970 + }, + { + "epoch": 0.07830884534003045, + "grad_norm": 1.297301623510751, + "learning_rate": 2.6100711837595573e-06, + "loss": 1.3177, + "step": 1980 + }, + { + "epoch": 0.07870434455891949, + "grad_norm": 1.1837368447180556, + "learning_rate": 2.6232533614553125e-06, + "loss": 1.3111, + "step": 1990 + }, + { + "epoch": 0.07909984377780854, + "grad_norm": 1.2251227411035477, + "learning_rate": 2.636435539151068e-06, + "loss": 1.3142, + "step": 2000 + }, + { + "epoch": 0.07949534299669758, + "grad_norm": 1.225277517784227, + "learning_rate": 2.6496177168468236e-06, + "loss": 1.3097, + "step": 2010 + }, + { + "epoch": 0.07989084221558662, + "grad_norm": 1.2877850891977878, + "learning_rate": 2.662799894542579e-06, + "loss": 1.3242, + "step": 2020 + }, + { + "epoch": 0.08028634143447566, + "grad_norm": 1.1227169091442835, + "learning_rate": 2.675982072238334e-06, + "loss": 1.3134, + "step": 2030 + }, + { + "epoch": 0.08068184065336471, + "grad_norm": 1.233621741322885, + "learning_rate": 2.689164249934089e-06, + "loss": 1.3349, + "step": 2040 + }, + { + "epoch": 0.08107733987225375, + "grad_norm": 1.1777945722776684, + "learning_rate": 2.7023464276298443e-06, + "loss": 1.321, + "step": 2050 + }, + { + "epoch": 0.08147283909114279, + "grad_norm": 1.2902735762570336, + "learning_rate": 2.7155286053256003e-06, + "loss": 1.3025, + "step": 2060 + }, + { + "epoch": 0.08186833831003183, + "grad_norm": 1.214252890627205, + "learning_rate": 2.7287107830213555e-06, + "loss": 1.309, + "step": 2070 + }, + { + "epoch": 0.08226383752892089, + "grad_norm": 1.157680702032122, + "learning_rate": 2.7418929607171106e-06, + "loss": 1.295, + "step": 2080 + }, + { + "epoch": 0.08265933674780993, + "grad_norm": 1.057546069052866, + "learning_rate": 2.755075138412866e-06, + "loss": 1.3222, + "step": 2090 + }, + { + "epoch": 0.08305483596669896, + "grad_norm": 1.235513698983506, + "learning_rate": 2.7682573161086214e-06, + "loss": 1.3249, + "step": 2100 + }, + { + "epoch": 0.083450335185588, + "grad_norm": 1.1229256349512073, + "learning_rate": 2.7814394938043765e-06, + "loss": 1.3007, + "step": 2110 + }, + { + "epoch": 0.08384583440447706, + "grad_norm": 1.2496919968109956, + "learning_rate": 2.794621671500132e-06, + "loss": 1.3014, + "step": 2120 + }, + { + "epoch": 0.0842413336233661, + "grad_norm": 1.1054196498776563, + "learning_rate": 2.8078038491958877e-06, + "loss": 1.305, + "step": 2130 + }, + { + "epoch": 0.08463683284225514, + "grad_norm": 1.2328726354969368, + "learning_rate": 2.820986026891643e-06, + "loss": 1.2916, + "step": 2140 + }, + { + "epoch": 0.08503233206114418, + "grad_norm": 1.289009845738552, + "learning_rate": 2.834168204587398e-06, + "loss": 1.3008, + "step": 2150 + }, + { + "epoch": 0.08542783128003323, + "grad_norm": 1.2111929582849519, + "learning_rate": 2.8473503822831532e-06, + "loss": 1.3128, + "step": 2160 + }, + { + "epoch": 0.08582333049892227, + "grad_norm": 1.2033677687792834, + "learning_rate": 2.8605325599789084e-06, + "loss": 1.3147, + "step": 2170 + }, + { + "epoch": 0.08621882971781131, + "grad_norm": 1.261854934115352, + "learning_rate": 2.8737147376746644e-06, + "loss": 1.3069, + "step": 2180 + }, + { + "epoch": 0.08661432893670035, + "grad_norm": 1.3012047565607785, + "learning_rate": 2.8868969153704196e-06, + "loss": 1.2983, + "step": 2190 + }, + { + "epoch": 0.08700982815558939, + "grad_norm": 1.2549646960874847, + "learning_rate": 2.9000790930661747e-06, + "loss": 1.2962, + "step": 2200 + }, + { + "epoch": 0.08740532737447844, + "grad_norm": 1.10280154543323, + "learning_rate": 2.9132612707619303e-06, + "loss": 1.2977, + "step": 2210 + }, + { + "epoch": 0.08780082659336748, + "grad_norm": 1.1953750453391896, + "learning_rate": 2.9264434484576855e-06, + "loss": 1.3066, + "step": 2220 + }, + { + "epoch": 0.08819632581225652, + "grad_norm": 1.173115521592014, + "learning_rate": 2.9396256261534406e-06, + "loss": 1.3035, + "step": 2230 + }, + { + "epoch": 0.08859182503114556, + "grad_norm": 1.3534880527534277, + "learning_rate": 2.952807803849196e-06, + "loss": 1.307, + "step": 2240 + }, + { + "epoch": 0.08898732425003461, + "grad_norm": 1.2387759451415656, + "learning_rate": 2.965989981544952e-06, + "loss": 1.2956, + "step": 2250 + }, + { + "epoch": 0.08938282346892365, + "grad_norm": 1.2677066907321448, + "learning_rate": 2.979172159240707e-06, + "loss": 1.2928, + "step": 2260 + }, + { + "epoch": 0.08977832268781269, + "grad_norm": 1.1372495129274685, + "learning_rate": 2.992354336936462e-06, + "loss": 1.302, + "step": 2270 + }, + { + "epoch": 0.09017382190670173, + "grad_norm": 1.1894113283549337, + "learning_rate": 3.0055365146322173e-06, + "loss": 1.2806, + "step": 2280 + }, + { + "epoch": 0.09056932112559078, + "grad_norm": 1.1347442967062604, + "learning_rate": 3.0187186923279725e-06, + "loss": 1.3028, + "step": 2290 + }, + { + "epoch": 0.09096482034447982, + "grad_norm": 1.2641426738557546, + "learning_rate": 3.031900870023728e-06, + "loss": 1.3049, + "step": 2300 + }, + { + "epoch": 0.09136031956336886, + "grad_norm": 1.2912543277413953, + "learning_rate": 3.0450830477194836e-06, + "loss": 1.2942, + "step": 2310 + }, + { + "epoch": 0.0917558187822579, + "grad_norm": 1.1640861851973205, + "learning_rate": 3.058265225415239e-06, + "loss": 1.2676, + "step": 2320 + }, + { + "epoch": 0.09215131800114695, + "grad_norm": 1.2761397336573672, + "learning_rate": 3.0714474031109944e-06, + "loss": 1.2743, + "step": 2330 + }, + { + "epoch": 0.09254681722003599, + "grad_norm": 1.3336958425243306, + "learning_rate": 3.0846295808067496e-06, + "loss": 1.3095, + "step": 2340 + }, + { + "epoch": 0.09294231643892503, + "grad_norm": 1.2293364502627973, + "learning_rate": 3.0978117585025047e-06, + "loss": 1.2689, + "step": 2350 + }, + { + "epoch": 0.09333781565781407, + "grad_norm": 1.1964016901569907, + "learning_rate": 3.11099393619826e-06, + "loss": 1.2988, + "step": 2360 + }, + { + "epoch": 0.09373331487670312, + "grad_norm": 1.1652688763465373, + "learning_rate": 3.124176113894016e-06, + "loss": 1.2951, + "step": 2370 + }, + { + "epoch": 0.09412881409559216, + "grad_norm": 1.1857748450845993, + "learning_rate": 3.137358291589771e-06, + "loss": 1.2822, + "step": 2380 + }, + { + "epoch": 0.0945243133144812, + "grad_norm": 1.1673509054243105, + "learning_rate": 3.1505404692855262e-06, + "loss": 1.2938, + "step": 2390 + }, + { + "epoch": 0.09491981253337024, + "grad_norm": 1.2296260905483272, + "learning_rate": 3.1637226469812814e-06, + "loss": 1.2611, + "step": 2400 + }, + { + "epoch": 0.09531531175225928, + "grad_norm": 1.218062373578883, + "learning_rate": 3.1769048246770366e-06, + "loss": 1.2714, + "step": 2410 + }, + { + "epoch": 0.09571081097114834, + "grad_norm": 1.2430563615302073, + "learning_rate": 3.190087002372792e-06, + "loss": 1.2868, + "step": 2420 + }, + { + "epoch": 0.09610631019003738, + "grad_norm": 1.2681172308291901, + "learning_rate": 3.2032691800685477e-06, + "loss": 1.2853, + "step": 2430 + }, + { + "epoch": 0.09650180940892641, + "grad_norm": 1.1547689215385586, + "learning_rate": 3.216451357764303e-06, + "loss": 1.2862, + "step": 2440 + }, + { + "epoch": 0.09689730862781545, + "grad_norm": 1.1768931021697404, + "learning_rate": 3.2296335354600585e-06, + "loss": 1.2818, + "step": 2450 + }, + { + "epoch": 0.09729280784670451, + "grad_norm": 1.190143004164276, + "learning_rate": 3.2428157131558137e-06, + "loss": 1.2917, + "step": 2460 + }, + { + "epoch": 0.09768830706559355, + "grad_norm": 1.2271609371437073, + "learning_rate": 3.255997890851569e-06, + "loss": 1.2966, + "step": 2470 + }, + { + "epoch": 0.09808380628448259, + "grad_norm": 1.30224620720194, + "learning_rate": 3.269180068547324e-06, + "loss": 1.2738, + "step": 2480 + }, + { + "epoch": 0.09847930550337163, + "grad_norm": 1.2072012030104826, + "learning_rate": 3.28236224624308e-06, + "loss": 1.2857, + "step": 2490 + }, + { + "epoch": 0.09887480472226068, + "grad_norm": 1.245746983758924, + "learning_rate": 3.295544423938835e-06, + "loss": 1.2744, + "step": 2500 + }, + { + "epoch": 0.09927030394114972, + "grad_norm": 1.232464505799779, + "learning_rate": 3.3087266016345903e-06, + "loss": 1.2845, + "step": 2510 + }, + { + "epoch": 0.09966580316003876, + "grad_norm": 1.1776225450844722, + "learning_rate": 3.3219087793303455e-06, + "loss": 1.2704, + "step": 2520 + }, + { + "epoch": 0.1000613023789278, + "grad_norm": 1.2070461495279086, + "learning_rate": 3.3350909570261007e-06, + "loss": 1.2858, + "step": 2530 + }, + { + "epoch": 0.10045680159781685, + "grad_norm": 1.2873546275622396, + "learning_rate": 3.3482731347218562e-06, + "loss": 1.2686, + "step": 2540 + }, + { + "epoch": 0.10085230081670589, + "grad_norm": 1.2436617312073135, + "learning_rate": 3.361455312417612e-06, + "loss": 1.2816, + "step": 2550 + }, + { + "epoch": 0.10124780003559493, + "grad_norm": 1.2016848948693586, + "learning_rate": 3.374637490113367e-06, + "loss": 1.2871, + "step": 2560 + }, + { + "epoch": 0.10164329925448397, + "grad_norm": 1.0536038308096831, + "learning_rate": 3.3878196678091226e-06, + "loss": 1.2754, + "step": 2570 + }, + { + "epoch": 0.10203879847337302, + "grad_norm": 1.270637142538822, + "learning_rate": 3.4010018455048777e-06, + "loss": 1.2931, + "step": 2580 + }, + { + "epoch": 0.10243429769226206, + "grad_norm": 1.2705181067783509, + "learning_rate": 3.414184023200633e-06, + "loss": 1.2826, + "step": 2590 + }, + { + "epoch": 0.1028297969111511, + "grad_norm": 1.1622496688215964, + "learning_rate": 3.427366200896388e-06, + "loss": 1.2753, + "step": 2600 + }, + { + "epoch": 0.10322529613004014, + "grad_norm": 1.2243854649828334, + "learning_rate": 3.440548378592144e-06, + "loss": 1.2853, + "step": 2610 + }, + { + "epoch": 0.10362079534892919, + "grad_norm": 1.1921833219835056, + "learning_rate": 3.4537305562878992e-06, + "loss": 1.2947, + "step": 2620 + }, + { + "epoch": 0.10401629456781823, + "grad_norm": 1.1049571202442656, + "learning_rate": 3.4669127339836544e-06, + "loss": 1.2621, + "step": 2630 + }, + { + "epoch": 0.10441179378670727, + "grad_norm": 1.1671222554569785, + "learning_rate": 3.4800949116794096e-06, + "loss": 1.275, + "step": 2640 + }, + { + "epoch": 0.10480729300559631, + "grad_norm": 1.2064194503502172, + "learning_rate": 3.493277089375165e-06, + "loss": 1.2849, + "step": 2650 + }, + { + "epoch": 0.10520279222448535, + "grad_norm": 1.2154560517149036, + "learning_rate": 3.5064592670709203e-06, + "loss": 1.254, + "step": 2660 + }, + { + "epoch": 0.1055982914433744, + "grad_norm": 1.1269630172231502, + "learning_rate": 3.519641444766676e-06, + "loss": 1.2795, + "step": 2670 + }, + { + "epoch": 0.10599379066226344, + "grad_norm": 1.2081274932682922, + "learning_rate": 3.532823622462431e-06, + "loss": 1.2752, + "step": 2680 + }, + { + "epoch": 0.10638928988115248, + "grad_norm": 1.1368132761943728, + "learning_rate": 3.5460058001581867e-06, + "loss": 1.267, + "step": 2690 + }, + { + "epoch": 0.10678478910004152, + "grad_norm": 1.1938542758614383, + "learning_rate": 3.559187977853942e-06, + "loss": 1.2695, + "step": 2700 + }, + { + "epoch": 0.10718028831893057, + "grad_norm": 1.227569725090285, + "learning_rate": 3.572370155549697e-06, + "loss": 1.2877, + "step": 2710 + }, + { + "epoch": 0.10757578753781961, + "grad_norm": 1.0635833460383113, + "learning_rate": 3.585552333245452e-06, + "loss": 1.2722, + "step": 2720 + }, + { + "epoch": 0.10797128675670865, + "grad_norm": 1.1446922334420235, + "learning_rate": 3.5987345109412073e-06, + "loss": 1.2797, + "step": 2730 + }, + { + "epoch": 0.1083667859755977, + "grad_norm": 1.2130529429189103, + "learning_rate": 3.6119166886369633e-06, + "loss": 1.2748, + "step": 2740 + }, + { + "epoch": 0.10876228519448675, + "grad_norm": 1.1964236814556763, + "learning_rate": 3.6250988663327185e-06, + "loss": 1.2702, + "step": 2750 + }, + { + "epoch": 0.10915778441337579, + "grad_norm": 1.1949833709572786, + "learning_rate": 3.6382810440284737e-06, + "loss": 1.2882, + "step": 2760 + }, + { + "epoch": 0.10955328363226483, + "grad_norm": 1.09709150025736, + "learning_rate": 3.6514632217242293e-06, + "loss": 1.2605, + "step": 2770 + }, + { + "epoch": 0.10994878285115386, + "grad_norm": 1.1515072477043464, + "learning_rate": 3.6646453994199844e-06, + "loss": 1.2753, + "step": 2780 + }, + { + "epoch": 0.11034428207004292, + "grad_norm": 1.2458919888173228, + "learning_rate": 3.6778275771157396e-06, + "loss": 1.2696, + "step": 2790 + }, + { + "epoch": 0.11073978128893196, + "grad_norm": 1.1302128174527677, + "learning_rate": 3.691009754811495e-06, + "loss": 1.2605, + "step": 2800 + }, + { + "epoch": 0.111135280507821, + "grad_norm": 1.24222385009568, + "learning_rate": 3.7041919325072508e-06, + "loss": 1.2729, + "step": 2810 + }, + { + "epoch": 0.11153077972671004, + "grad_norm": 1.102787713183318, + "learning_rate": 3.717374110203006e-06, + "loss": 1.2607, + "step": 2820 + }, + { + "epoch": 0.11192627894559909, + "grad_norm": 1.2023360978677924, + "learning_rate": 3.730556287898761e-06, + "loss": 1.2615, + "step": 2830 + }, + { + "epoch": 0.11232177816448813, + "grad_norm": 1.2724137347654667, + "learning_rate": 3.7437384655945163e-06, + "loss": 1.2615, + "step": 2840 + }, + { + "epoch": 0.11271727738337717, + "grad_norm": 1.093152579494928, + "learning_rate": 3.7569206432902714e-06, + "loss": 1.2631, + "step": 2850 + }, + { + "epoch": 0.11311277660226621, + "grad_norm": 1.1704547463612633, + "learning_rate": 3.7701028209860274e-06, + "loss": 1.2644, + "step": 2860 + }, + { + "epoch": 0.11350827582115525, + "grad_norm": 1.161424150089614, + "learning_rate": 3.7832849986817826e-06, + "loss": 1.2767, + "step": 2870 + }, + { + "epoch": 0.1139037750400443, + "grad_norm": 1.1278205431526822, + "learning_rate": 3.7964671763775378e-06, + "loss": 1.2665, + "step": 2880 + }, + { + "epoch": 0.11429927425893334, + "grad_norm": 1.1528269166892364, + "learning_rate": 3.8096493540732933e-06, + "loss": 1.2755, + "step": 2890 + }, + { + "epoch": 0.11469477347782238, + "grad_norm": 1.1067252929827938, + "learning_rate": 3.8228315317690485e-06, + "loss": 1.2871, + "step": 2900 + }, + { + "epoch": 0.11509027269671142, + "grad_norm": 1.1120209246517228, + "learning_rate": 3.836013709464803e-06, + "loss": 1.2626, + "step": 2910 + }, + { + "epoch": 0.11548577191560047, + "grad_norm": 1.122877948452319, + "learning_rate": 3.84919588716056e-06, + "loss": 1.263, + "step": 2920 + }, + { + "epoch": 0.11588127113448951, + "grad_norm": 1.124640519597798, + "learning_rate": 3.8623780648563144e-06, + "loss": 1.2731, + "step": 2930 + }, + { + "epoch": 0.11627677035337855, + "grad_norm": 1.125024599583401, + "learning_rate": 3.87556024255207e-06, + "loss": 1.2691, + "step": 2940 + }, + { + "epoch": 0.11667226957226759, + "grad_norm": 1.2126790331792352, + "learning_rate": 3.888742420247826e-06, + "loss": 1.2426, + "step": 2950 + }, + { + "epoch": 0.11706776879115664, + "grad_norm": 1.0611422258130283, + "learning_rate": 3.90192459794358e-06, + "loss": 1.2551, + "step": 2960 + }, + { + "epoch": 0.11746326801004568, + "grad_norm": 1.1931586393046836, + "learning_rate": 3.915106775639336e-06, + "loss": 1.262, + "step": 2970 + }, + { + "epoch": 0.11785876722893472, + "grad_norm": 1.1209628020676263, + "learning_rate": 3.9282889533350915e-06, + "loss": 1.2688, + "step": 2980 + }, + { + "epoch": 0.11825426644782376, + "grad_norm": 1.397330863918019, + "learning_rate": 3.941471131030847e-06, + "loss": 1.2564, + "step": 2990 + }, + { + "epoch": 0.11864976566671281, + "grad_norm": 1.0906431181507632, + "learning_rate": 3.954653308726602e-06, + "loss": 1.2706, + "step": 3000 + }, + { + "epoch": 0.11904526488560185, + "grad_norm": 1.1586328470171683, + "learning_rate": 3.9678354864223574e-06, + "loss": 1.2626, + "step": 3010 + }, + { + "epoch": 0.11944076410449089, + "grad_norm": 1.0967133685495674, + "learning_rate": 3.981017664118112e-06, + "loss": 1.2548, + "step": 3020 + }, + { + "epoch": 0.11983626332337993, + "grad_norm": 1.1269755343665413, + "learning_rate": 3.994199841813868e-06, + "loss": 1.2643, + "step": 3030 + }, + { + "epoch": 0.12023176254226899, + "grad_norm": 1.091934409490822, + "learning_rate": 4.007382019509623e-06, + "loss": 1.2662, + "step": 3040 + }, + { + "epoch": 0.12062726176115803, + "grad_norm": 1.1440701382940623, + "learning_rate": 4.020564197205379e-06, + "loss": 1.2468, + "step": 3050 + }, + { + "epoch": 0.12102276098004706, + "grad_norm": 1.1751888661402632, + "learning_rate": 4.033746374901134e-06, + "loss": 1.2512, + "step": 3060 + }, + { + "epoch": 0.1214182601989361, + "grad_norm": 1.1501843407322958, + "learning_rate": 4.046928552596889e-06, + "loss": 1.2601, + "step": 3070 + }, + { + "epoch": 0.12181375941782514, + "grad_norm": 1.1762906184019515, + "learning_rate": 4.060110730292645e-06, + "loss": 1.2637, + "step": 3080 + }, + { + "epoch": 0.1222092586367142, + "grad_norm": 1.2032163341674906, + "learning_rate": 4.0732929079884e-06, + "loss": 1.2741, + "step": 3090 + }, + { + "epoch": 0.12260475785560324, + "grad_norm": 1.290646553414388, + "learning_rate": 4.086475085684156e-06, + "loss": 1.2365, + "step": 3100 + }, + { + "epoch": 0.12300025707449228, + "grad_norm": 1.1601572556541933, + "learning_rate": 4.099657263379911e-06, + "loss": 1.2392, + "step": 3110 + }, + { + "epoch": 0.12339575629338131, + "grad_norm": 1.0964699034764527, + "learning_rate": 4.112839441075666e-06, + "loss": 1.2577, + "step": 3120 + }, + { + "epoch": 0.12379125551227037, + "grad_norm": 1.1539770797614612, + "learning_rate": 4.126021618771421e-06, + "loss": 1.2573, + "step": 3130 + }, + { + "epoch": 0.12418675473115941, + "grad_norm": 1.0985466764198102, + "learning_rate": 4.139203796467177e-06, + "loss": 1.244, + "step": 3140 + }, + { + "epoch": 0.12458225395004845, + "grad_norm": 1.2184945405951828, + "learning_rate": 4.152385974162932e-06, + "loss": 1.2554, + "step": 3150 + }, + { + "epoch": 0.12497775316893749, + "grad_norm": 1.187418233396473, + "learning_rate": 4.165568151858687e-06, + "loss": 1.263, + "step": 3160 + }, + { + "epoch": 0.12537325238782654, + "grad_norm": 1.1833230618056227, + "learning_rate": 4.178750329554443e-06, + "loss": 1.2533, + "step": 3170 + }, + { + "epoch": 0.12576875160671558, + "grad_norm": 1.1355097475879699, + "learning_rate": 4.191932507250198e-06, + "loss": 1.256, + "step": 3180 + }, + { + "epoch": 0.12616425082560462, + "grad_norm": 1.253125308018465, + "learning_rate": 4.205114684945954e-06, + "loss": 1.2464, + "step": 3190 + }, + { + "epoch": 0.12655975004449366, + "grad_norm": 1.2331745099422928, + "learning_rate": 4.2182968626417085e-06, + "loss": 1.2574, + "step": 3200 + }, + { + "epoch": 0.1269552492633827, + "grad_norm": 1.247226535246477, + "learning_rate": 4.231479040337464e-06, + "loss": 1.26, + "step": 3210 + }, + { + "epoch": 0.12735074848227174, + "grad_norm": 1.1038916765400215, + "learning_rate": 4.244661218033219e-06, + "loss": 1.2382, + "step": 3220 + }, + { + "epoch": 0.1277462477011608, + "grad_norm": 1.092556264313893, + "learning_rate": 4.257843395728975e-06, + "loss": 1.2496, + "step": 3230 + }, + { + "epoch": 0.12814174692004984, + "grad_norm": 1.1264358388022202, + "learning_rate": 4.27102557342473e-06, + "loss": 1.2501, + "step": 3240 + }, + { + "epoch": 0.12853724613893888, + "grad_norm": 1.1184361409466503, + "learning_rate": 4.284207751120486e-06, + "loss": 1.2366, + "step": 3250 + }, + { + "epoch": 0.12893274535782792, + "grad_norm": 1.2406981039602194, + "learning_rate": 4.29738992881624e-06, + "loss": 1.2231, + "step": 3260 + }, + { + "epoch": 0.12932824457671696, + "grad_norm": 1.2567060350031107, + "learning_rate": 4.310572106511996e-06, + "loss": 1.2443, + "step": 3270 + }, + { + "epoch": 0.129723743795606, + "grad_norm": 1.23262189454401, + "learning_rate": 4.3237542842077515e-06, + "loss": 1.2428, + "step": 3280 + }, + { + "epoch": 0.13011924301449504, + "grad_norm": 1.1496110749546276, + "learning_rate": 4.336936461903507e-06, + "loss": 1.2519, + "step": 3290 + }, + { + "epoch": 0.13051474223338408, + "grad_norm": 1.1173408155113762, + "learning_rate": 4.350118639599262e-06, + "loss": 1.2586, + "step": 3300 + }, + { + "epoch": 0.13091024145227312, + "grad_norm": 1.1907311153321274, + "learning_rate": 4.3633008172950175e-06, + "loss": 1.2376, + "step": 3310 + }, + { + "epoch": 0.13130574067116219, + "grad_norm": 1.1642841053506983, + "learning_rate": 4.376482994990773e-06, + "loss": 1.2278, + "step": 3320 + }, + { + "epoch": 0.13170123989005122, + "grad_norm": 1.217803181818625, + "learning_rate": 4.389665172686528e-06, + "loss": 1.2473, + "step": 3330 + }, + { + "epoch": 0.13209673910894026, + "grad_norm": 1.1601491220146136, + "learning_rate": 4.402847350382283e-06, + "loss": 1.2615, + "step": 3340 + }, + { + "epoch": 0.1324922383278293, + "grad_norm": 1.2383436434314843, + "learning_rate": 4.416029528078039e-06, + "loss": 1.24, + "step": 3350 + }, + { + "epoch": 0.13288773754671834, + "grad_norm": 1.1692938600694844, + "learning_rate": 4.4292117057737945e-06, + "loss": 1.2541, + "step": 3360 + }, + { + "epoch": 0.13328323676560738, + "grad_norm": 1.1778183496481327, + "learning_rate": 4.442393883469549e-06, + "loss": 1.258, + "step": 3370 + }, + { + "epoch": 0.13367873598449642, + "grad_norm": 1.1351108705524025, + "learning_rate": 4.455576061165305e-06, + "loss": 1.2586, + "step": 3380 + }, + { + "epoch": 0.13407423520338546, + "grad_norm": 1.1603484288394463, + "learning_rate": 4.4687582388610605e-06, + "loss": 1.2253, + "step": 3390 + }, + { + "epoch": 0.13446973442227453, + "grad_norm": 1.144547572424092, + "learning_rate": 4.481940416556815e-06, + "loss": 1.2353, + "step": 3400 + }, + { + "epoch": 0.13486523364116357, + "grad_norm": 1.180522473535395, + "learning_rate": 4.495122594252571e-06, + "loss": 1.2623, + "step": 3410 + }, + { + "epoch": 0.1352607328600526, + "grad_norm": 1.1237416464133951, + "learning_rate": 4.508304771948326e-06, + "loss": 1.2661, + "step": 3420 + }, + { + "epoch": 0.13565623207894165, + "grad_norm": 1.0952310861427315, + "learning_rate": 4.521486949644082e-06, + "loss": 1.2602, + "step": 3430 + }, + { + "epoch": 0.13605173129783069, + "grad_norm": 1.2519540981327684, + "learning_rate": 4.534669127339837e-06, + "loss": 1.2486, + "step": 3440 + }, + { + "epoch": 0.13644723051671973, + "grad_norm": 1.1453176445124462, + "learning_rate": 4.547851305035592e-06, + "loss": 1.2511, + "step": 3450 + }, + { + "epoch": 0.13684272973560876, + "grad_norm": 1.2492970933494332, + "learning_rate": 4.561033482731347e-06, + "loss": 1.2512, + "step": 3460 + }, + { + "epoch": 0.1372382289544978, + "grad_norm": 1.1183370717472112, + "learning_rate": 4.5742156604271035e-06, + "loss": 1.2683, + "step": 3470 + }, + { + "epoch": 0.13763372817338687, + "grad_norm": 1.3102018394220485, + "learning_rate": 4.587397838122858e-06, + "loss": 1.2332, + "step": 3480 + }, + { + "epoch": 0.1380292273922759, + "grad_norm": 1.1125088730002428, + "learning_rate": 4.600580015818614e-06, + "loss": 1.2393, + "step": 3490 + }, + { + "epoch": 0.13842472661116495, + "grad_norm": 1.0788463422691676, + "learning_rate": 4.6137621935143685e-06, + "loss": 1.2572, + "step": 3500 + }, + { + "epoch": 0.138820225830054, + "grad_norm": 1.1148975227432092, + "learning_rate": 4.626944371210124e-06, + "loss": 1.25, + "step": 3510 + }, + { + "epoch": 0.13921572504894303, + "grad_norm": 1.1501918101905848, + "learning_rate": 4.64012654890588e-06, + "loss": 1.2405, + "step": 3520 + }, + { + "epoch": 0.13961122426783207, + "grad_norm": 1.1965273666353122, + "learning_rate": 4.653308726601635e-06, + "loss": 1.2366, + "step": 3530 + }, + { + "epoch": 0.1400067234867211, + "grad_norm": 1.1368756204903232, + "learning_rate": 4.666490904297391e-06, + "loss": 1.2338, + "step": 3540 + }, + { + "epoch": 0.14040222270561015, + "grad_norm": 1.1082768252138382, + "learning_rate": 4.679673081993146e-06, + "loss": 1.2136, + "step": 3550 + }, + { + "epoch": 0.1407977219244992, + "grad_norm": 1.2668650584687497, + "learning_rate": 4.692855259688901e-06, + "loss": 1.2224, + "step": 3560 + }, + { + "epoch": 0.14119322114338825, + "grad_norm": 1.2404601026210604, + "learning_rate": 4.706037437384656e-06, + "loss": 1.2406, + "step": 3570 + }, + { + "epoch": 0.1415887203622773, + "grad_norm": 1.0846643375311202, + "learning_rate": 4.7192196150804116e-06, + "loss": 1.2368, + "step": 3580 + }, + { + "epoch": 0.14198421958116633, + "grad_norm": 1.0060315583009074, + "learning_rate": 4.732401792776167e-06, + "loss": 1.2437, + "step": 3590 + }, + { + "epoch": 0.14237971880005537, + "grad_norm": 1.2564332345588882, + "learning_rate": 4.745583970471923e-06, + "loss": 1.2504, + "step": 3600 + }, + { + "epoch": 0.1427752180189444, + "grad_norm": 1.036375529178967, + "learning_rate": 4.7587661481676775e-06, + "loss": 1.2411, + "step": 3610 + }, + { + "epoch": 0.14317071723783345, + "grad_norm": 1.196949606969891, + "learning_rate": 4.771948325863433e-06, + "loss": 1.2264, + "step": 3620 + }, + { + "epoch": 0.1435662164567225, + "grad_norm": 1.1286077565195762, + "learning_rate": 4.785130503559189e-06, + "loss": 1.2403, + "step": 3630 + }, + { + "epoch": 0.14396171567561153, + "grad_norm": 1.0885858043496695, + "learning_rate": 4.798312681254943e-06, + "loss": 1.2409, + "step": 3640 + }, + { + "epoch": 0.1443572148945006, + "grad_norm": 1.1046744042088779, + "learning_rate": 4.811494858950699e-06, + "loss": 1.2259, + "step": 3650 + }, + { + "epoch": 0.14475271411338964, + "grad_norm": 1.073761934784075, + "learning_rate": 4.8246770366464546e-06, + "loss": 1.2302, + "step": 3660 + }, + { + "epoch": 0.14514821333227867, + "grad_norm": 1.0442273647850029, + "learning_rate": 4.83785921434221e-06, + "loss": 1.2201, + "step": 3670 + }, + { + "epoch": 0.14554371255116771, + "grad_norm": 1.14133687841234, + "learning_rate": 4.851041392037965e-06, + "loss": 1.2402, + "step": 3680 + }, + { + "epoch": 0.14593921177005675, + "grad_norm": 1.0424041206172552, + "learning_rate": 4.8642235697337205e-06, + "loss": 1.2392, + "step": 3690 + }, + { + "epoch": 0.1463347109889458, + "grad_norm": 1.1722935358179387, + "learning_rate": 4.877405747429475e-06, + "loss": 1.2292, + "step": 3700 + }, + { + "epoch": 0.14673021020783483, + "grad_norm": 1.2177101061453444, + "learning_rate": 4.890587925125231e-06, + "loss": 1.2374, + "step": 3710 + }, + { + "epoch": 0.14712570942672387, + "grad_norm": 1.2342369323304698, + "learning_rate": 4.903770102820986e-06, + "loss": 1.2426, + "step": 3720 + }, + { + "epoch": 0.1475212086456129, + "grad_norm": 1.1921504558647251, + "learning_rate": 4.916952280516742e-06, + "loss": 1.2274, + "step": 3730 + }, + { + "epoch": 0.14791670786450198, + "grad_norm": 1.233202677746452, + "learning_rate": 4.930134458212497e-06, + "loss": 1.2214, + "step": 3740 + }, + { + "epoch": 0.14831220708339102, + "grad_norm": 1.1818383564644324, + "learning_rate": 4.943316635908252e-06, + "loss": 1.2134, + "step": 3750 + }, + { + "epoch": 0.14870770630228006, + "grad_norm": 1.1654870018883667, + "learning_rate": 4.956498813604008e-06, + "loss": 1.2258, + "step": 3760 + }, + { + "epoch": 0.1491032055211691, + "grad_norm": 1.2636489076159825, + "learning_rate": 4.969680991299763e-06, + "loss": 1.2333, + "step": 3770 + }, + { + "epoch": 0.14949870474005814, + "grad_norm": 1.1597012593172427, + "learning_rate": 4.982863168995519e-06, + "loss": 1.2265, + "step": 3780 + }, + { + "epoch": 0.14989420395894718, + "grad_norm": 1.1245543623413068, + "learning_rate": 4.996045346691274e-06, + "loss": 1.2336, + "step": 3790 + }, + { + "epoch": 0.15028970317783621, + "grad_norm": 1.1486476968811328, + "learning_rate": 5.009227524387029e-06, + "loss": 1.2262, + "step": 3800 + }, + { + "epoch": 0.15068520239672525, + "grad_norm": 1.0923697401437276, + "learning_rate": 5.022409702082784e-06, + "loss": 1.2258, + "step": 3810 + }, + { + "epoch": 0.15108070161561432, + "grad_norm": 1.26754825813355, + "learning_rate": 5.03559187977854e-06, + "loss": 1.241, + "step": 3820 + }, + { + "epoch": 0.15147620083450336, + "grad_norm": 1.168185364886432, + "learning_rate": 5.048774057474295e-06, + "loss": 1.2071, + "step": 3830 + }, + { + "epoch": 0.1518717000533924, + "grad_norm": 1.0582271434621335, + "learning_rate": 5.06195623517005e-06, + "loss": 1.2364, + "step": 3840 + }, + { + "epoch": 0.15226719927228144, + "grad_norm": 1.1643566454453338, + "learning_rate": 5.075138412865806e-06, + "loss": 1.2491, + "step": 3850 + }, + { + "epoch": 0.15266269849117048, + "grad_norm": 1.220011945064443, + "learning_rate": 5.08832059056156e-06, + "loss": 1.2099, + "step": 3860 + }, + { + "epoch": 0.15305819771005952, + "grad_norm": 1.2105087817617946, + "learning_rate": 5.101502768257317e-06, + "loss": 1.2241, + "step": 3870 + }, + { + "epoch": 0.15345369692894856, + "grad_norm": 1.1908599952093613, + "learning_rate": 5.114684945953072e-06, + "loss": 1.2354, + "step": 3880 + }, + { + "epoch": 0.1538491961478376, + "grad_norm": 1.200200710315934, + "learning_rate": 5.127867123648827e-06, + "loss": 1.225, + "step": 3890 + }, + { + "epoch": 0.15424469536672666, + "grad_norm": 1.2302439849161493, + "learning_rate": 5.141049301344583e-06, + "loss": 1.2262, + "step": 3900 + }, + { + "epoch": 0.1546401945856157, + "grad_norm": 1.1004820662407016, + "learning_rate": 5.154231479040338e-06, + "loss": 1.2088, + "step": 3910 + }, + { + "epoch": 0.15503569380450474, + "grad_norm": 1.102235460054769, + "learning_rate": 5.167413656736093e-06, + "loss": 1.2433, + "step": 3920 + }, + { + "epoch": 0.15543119302339378, + "grad_norm": 1.23789374465247, + "learning_rate": 5.180595834431849e-06, + "loss": 1.2357, + "step": 3930 + }, + { + "epoch": 0.15582669224228282, + "grad_norm": 1.161533174136612, + "learning_rate": 5.193778012127603e-06, + "loss": 1.2317, + "step": 3940 + }, + { + "epoch": 0.15622219146117186, + "grad_norm": 1.0642322136109936, + "learning_rate": 5.206960189823359e-06, + "loss": 1.2224, + "step": 3950 + }, + { + "epoch": 0.1566176906800609, + "grad_norm": 1.2087215287034732, + "learning_rate": 5.220142367519115e-06, + "loss": 1.2448, + "step": 3960 + }, + { + "epoch": 0.15701318989894994, + "grad_norm": 1.212885228606204, + "learning_rate": 5.233324545214869e-06, + "loss": 1.2208, + "step": 3970 + }, + { + "epoch": 0.15740868911783898, + "grad_norm": 1.1495875886075297, + "learning_rate": 5.246506722910625e-06, + "loss": 1.2342, + "step": 3980 + }, + { + "epoch": 0.15780418833672805, + "grad_norm": 1.2816638398485318, + "learning_rate": 5.259688900606381e-06, + "loss": 1.2223, + "step": 3990 + }, + { + "epoch": 0.15819968755561709, + "grad_norm": 1.1259679640570786, + "learning_rate": 5.272871078302136e-06, + "loss": 1.2163, + "step": 4000 + }, + { + "epoch": 0.15859518677450613, + "grad_norm": 1.1365047394495493, + "learning_rate": 5.286053255997892e-06, + "loss": 1.2343, + "step": 4010 + }, + { + "epoch": 0.15899068599339516, + "grad_norm": 1.1549244055048389, + "learning_rate": 5.299235433693647e-06, + "loss": 1.2231, + "step": 4020 + }, + { + "epoch": 0.1593861852122842, + "grad_norm": 1.1536226134710523, + "learning_rate": 5.312417611389402e-06, + "loss": 1.2235, + "step": 4030 + }, + { + "epoch": 0.15978168443117324, + "grad_norm": 1.221526078235549, + "learning_rate": 5.325599789085158e-06, + "loss": 1.2211, + "step": 4040 + }, + { + "epoch": 0.16017718365006228, + "grad_norm": 1.0183863925295793, + "learning_rate": 5.338781966780912e-06, + "loss": 1.2279, + "step": 4050 + }, + { + "epoch": 0.16057268286895132, + "grad_norm": 1.189482207312474, + "learning_rate": 5.351964144476668e-06, + "loss": 1.2224, + "step": 4060 + }, + { + "epoch": 0.1609681820878404, + "grad_norm": 1.147959951272284, + "learning_rate": 5.3651463221724235e-06, + "loss": 1.2194, + "step": 4070 + }, + { + "epoch": 0.16136368130672943, + "grad_norm": 1.216570820756994, + "learning_rate": 5.378328499868178e-06, + "loss": 1.2115, + "step": 4080 + }, + { + "epoch": 0.16175918052561847, + "grad_norm": 1.1023582438938562, + "learning_rate": 5.391510677563934e-06, + "loss": 1.2118, + "step": 4090 + }, + { + "epoch": 0.1621546797445075, + "grad_norm": 1.1847962397819922, + "learning_rate": 5.4046928552596886e-06, + "loss": 1.2191, + "step": 4100 + }, + { + "epoch": 0.16255017896339655, + "grad_norm": 1.1004471914767329, + "learning_rate": 5.417875032955444e-06, + "loss": 1.2318, + "step": 4110 + }, + { + "epoch": 0.16294567818228559, + "grad_norm": 1.1649211722871002, + "learning_rate": 5.431057210651201e-06, + "loss": 1.2229, + "step": 4120 + }, + { + "epoch": 0.16334117740117463, + "grad_norm": 1.215868544802769, + "learning_rate": 5.444239388346955e-06, + "loss": 1.2097, + "step": 4130 + }, + { + "epoch": 0.16373667662006366, + "grad_norm": 1.1889771147762935, + "learning_rate": 5.457421566042711e-06, + "loss": 1.2382, + "step": 4140 + }, + { + "epoch": 0.16413217583895273, + "grad_norm": 1.2546811487400502, + "learning_rate": 5.4706037437384665e-06, + "loss": 1.1969, + "step": 4150 + }, + { + "epoch": 0.16452767505784177, + "grad_norm": 1.0644584064909797, + "learning_rate": 5.483785921434221e-06, + "loss": 1.2298, + "step": 4160 + }, + { + "epoch": 0.1649231742767308, + "grad_norm": 1.1063298464696862, + "learning_rate": 5.496968099129977e-06, + "loss": 1.2201, + "step": 4170 + }, + { + "epoch": 0.16531867349561985, + "grad_norm": 1.0658384818742714, + "learning_rate": 5.510150276825732e-06, + "loss": 1.2275, + "step": 4180 + }, + { + "epoch": 0.1657141727145089, + "grad_norm": 1.0683881098515082, + "learning_rate": 5.523332454521487e-06, + "loss": 1.2182, + "step": 4190 + }, + { + "epoch": 0.16610967193339793, + "grad_norm": 1.2241782663996508, + "learning_rate": 5.536514632217243e-06, + "loss": 1.2104, + "step": 4200 + }, + { + "epoch": 0.16650517115228697, + "grad_norm": 1.101634265395167, + "learning_rate": 5.5496968099129975e-06, + "loss": 1.2083, + "step": 4210 + }, + { + "epoch": 0.166900670371176, + "grad_norm": 1.1178755199397847, + "learning_rate": 5.562878987608753e-06, + "loss": 1.2241, + "step": 4220 + }, + { + "epoch": 0.16729616959006505, + "grad_norm": 1.1098609400973727, + "learning_rate": 5.576061165304508e-06, + "loss": 1.2124, + "step": 4230 + }, + { + "epoch": 0.16769166880895411, + "grad_norm": 1.1189088715468434, + "learning_rate": 5.589243343000264e-06, + "loss": 1.2234, + "step": 4240 + }, + { + "epoch": 0.16808716802784315, + "grad_norm": 1.1016650861823671, + "learning_rate": 5.60242552069602e-06, + "loss": 1.2152, + "step": 4250 + }, + { + "epoch": 0.1684826672467322, + "grad_norm": 1.0871078425460847, + "learning_rate": 5.6156076983917754e-06, + "loss": 1.2048, + "step": 4260 + }, + { + "epoch": 0.16887816646562123, + "grad_norm": 1.2015510695753984, + "learning_rate": 5.62878987608753e-06, + "loss": 1.2052, + "step": 4270 + }, + { + "epoch": 0.16927366568451027, + "grad_norm": 1.1087395721866484, + "learning_rate": 5.641972053783286e-06, + "loss": 1.2116, + "step": 4280 + }, + { + "epoch": 0.1696691649033993, + "grad_norm": 1.1957481980714524, + "learning_rate": 5.6551542314790405e-06, + "loss": 1.204, + "step": 4290 + }, + { + "epoch": 0.17006466412228835, + "grad_norm": 1.220248337474719, + "learning_rate": 5.668336409174796e-06, + "loss": 1.234, + "step": 4300 + }, + { + "epoch": 0.1704601633411774, + "grad_norm": 1.1288978870452016, + "learning_rate": 5.681518586870552e-06, + "loss": 1.2226, + "step": 4310 + }, + { + "epoch": 0.17085566256006646, + "grad_norm": 1.1583623676766133, + "learning_rate": 5.6947007645663064e-06, + "loss": 1.2078, + "step": 4320 + }, + { + "epoch": 0.1712511617789555, + "grad_norm": 1.0820854303411411, + "learning_rate": 5.707882942262062e-06, + "loss": 1.2275, + "step": 4330 + }, + { + "epoch": 0.17164666099784454, + "grad_norm": 1.0436724693021422, + "learning_rate": 5.721065119957817e-06, + "loss": 1.2208, + "step": 4340 + }, + { + "epoch": 0.17204216021673358, + "grad_norm": 1.0826207101520977, + "learning_rate": 5.734247297653572e-06, + "loss": 1.2259, + "step": 4350 + }, + { + "epoch": 0.17243765943562261, + "grad_norm": 1.0062420188231134, + "learning_rate": 5.747429475349329e-06, + "loss": 1.2152, + "step": 4360 + }, + { + "epoch": 0.17283315865451165, + "grad_norm": 1.1625476486327295, + "learning_rate": 5.760611653045084e-06, + "loss": 1.2126, + "step": 4370 + }, + { + "epoch": 0.1732286578734007, + "grad_norm": 1.0850920716468246, + "learning_rate": 5.773793830740839e-06, + "loss": 1.2063, + "step": 4380 + }, + { + "epoch": 0.17362415709228973, + "grad_norm": 1.1230610081802372, + "learning_rate": 5.786976008436595e-06, + "loss": 1.1971, + "step": 4390 + }, + { + "epoch": 0.17401965631117877, + "grad_norm": 1.1151659191207979, + "learning_rate": 5.8001581861323494e-06, + "loss": 1.2067, + "step": 4400 + }, + { + "epoch": 0.17441515553006784, + "grad_norm": 1.230022261272906, + "learning_rate": 5.813340363828105e-06, + "loss": 1.2075, + "step": 4410 + }, + { + "epoch": 0.17481065474895688, + "grad_norm": 1.1273517804475939, + "learning_rate": 5.826522541523861e-06, + "loss": 1.2277, + "step": 4420 + }, + { + "epoch": 0.17520615396784592, + "grad_norm": 1.033521339125641, + "learning_rate": 5.839704719219615e-06, + "loss": 1.2031, + "step": 4430 + }, + { + "epoch": 0.17560165318673496, + "grad_norm": 1.184836766845481, + "learning_rate": 5.852886896915371e-06, + "loss": 1.2166, + "step": 4440 + }, + { + "epoch": 0.175997152405624, + "grad_norm": 1.0961490896095614, + "learning_rate": 5.866069074611126e-06, + "loss": 1.2072, + "step": 4450 + }, + { + "epoch": 0.17639265162451304, + "grad_norm": 1.0861106050149547, + "learning_rate": 5.879251252306881e-06, + "loss": 1.2063, + "step": 4460 + }, + { + "epoch": 0.17678815084340208, + "grad_norm": 1.1315583725036726, + "learning_rate": 5.892433430002637e-06, + "loss": 1.209, + "step": 4470 + }, + { + "epoch": 0.17718365006229111, + "grad_norm": 1.2140546103918646, + "learning_rate": 5.905615607698392e-06, + "loss": 1.2076, + "step": 4480 + }, + { + "epoch": 0.17757914928118018, + "grad_norm": 1.0819422364079176, + "learning_rate": 5.918797785394148e-06, + "loss": 1.2201, + "step": 4490 + }, + { + "epoch": 0.17797464850006922, + "grad_norm": 1.0987791821906259, + "learning_rate": 5.931979963089904e-06, + "loss": 1.2229, + "step": 4500 + }, + { + "epoch": 0.17837014771895826, + "grad_norm": 1.069938546339024, + "learning_rate": 5.945162140785658e-06, + "loss": 1.213, + "step": 4510 + }, + { + "epoch": 0.1787656469378473, + "grad_norm": 1.0815335186123936, + "learning_rate": 5.958344318481414e-06, + "loss": 1.2106, + "step": 4520 + }, + { + "epoch": 0.17916114615673634, + "grad_norm": 1.1135705456815699, + "learning_rate": 5.971526496177169e-06, + "loss": 1.2102, + "step": 4530 + }, + { + "epoch": 0.17955664537562538, + "grad_norm": 1.16647050216788, + "learning_rate": 5.984708673872924e-06, + "loss": 1.2037, + "step": 4540 + }, + { + "epoch": 0.17995214459451442, + "grad_norm": 1.1292284579138845, + "learning_rate": 5.99789085156868e-06, + "loss": 1.1978, + "step": 4550 + }, + { + "epoch": 0.18034764381340346, + "grad_norm": 1.14664276573325, + "learning_rate": 6.011073029264435e-06, + "loss": 1.2139, + "step": 4560 + }, + { + "epoch": 0.18074314303229252, + "grad_norm": 1.1601126947258054, + "learning_rate": 6.02425520696019e-06, + "loss": 1.2129, + "step": 4570 + }, + { + "epoch": 0.18113864225118156, + "grad_norm": 1.1305462606121826, + "learning_rate": 6.037437384655945e-06, + "loss": 1.2017, + "step": 4580 + }, + { + "epoch": 0.1815341414700706, + "grad_norm": 1.2025591824974453, + "learning_rate": 6.0506195623517005e-06, + "loss": 1.1961, + "step": 4590 + }, + { + "epoch": 0.18192964068895964, + "grad_norm": 1.107927753760213, + "learning_rate": 6.063801740047456e-06, + "loss": 1.2043, + "step": 4600 + }, + { + "epoch": 0.18232513990784868, + "grad_norm": 1.059282958384036, + "learning_rate": 6.0769839177432125e-06, + "loss": 1.2237, + "step": 4610 + }, + { + "epoch": 0.18272063912673772, + "grad_norm": 1.1293359737449702, + "learning_rate": 6.090166095438967e-06, + "loss": 1.2178, + "step": 4620 + }, + { + "epoch": 0.18311613834562676, + "grad_norm": 1.1003580895342506, + "learning_rate": 6.103348273134723e-06, + "loss": 1.2172, + "step": 4630 + }, + { + "epoch": 0.1835116375645158, + "grad_norm": 1.1129213322032758, + "learning_rate": 6.116530450830478e-06, + "loss": 1.2067, + "step": 4640 + }, + { + "epoch": 0.18390713678340484, + "grad_norm": 1.1370152734015335, + "learning_rate": 6.129712628526233e-06, + "loss": 1.2212, + "step": 4650 + }, + { + "epoch": 0.1843026360022939, + "grad_norm": 0.9871263897368611, + "learning_rate": 6.142894806221989e-06, + "loss": 1.1978, + "step": 4660 + }, + { + "epoch": 0.18469813522118295, + "grad_norm": 1.1226188070149417, + "learning_rate": 6.1560769839177435e-06, + "loss": 1.2064, + "step": 4670 + }, + { + "epoch": 0.18509363444007199, + "grad_norm": 1.005155023841371, + "learning_rate": 6.169259161613499e-06, + "loss": 1.215, + "step": 4680 + }, + { + "epoch": 0.18548913365896103, + "grad_norm": 1.1970691027193183, + "learning_rate": 6.182441339309254e-06, + "loss": 1.2262, + "step": 4690 + }, + { + "epoch": 0.18588463287785006, + "grad_norm": 1.1480500843386499, + "learning_rate": 6.1956235170050095e-06, + "loss": 1.2021, + "step": 4700 + }, + { + "epoch": 0.1862801320967391, + "grad_norm": 1.1316159070674807, + "learning_rate": 6.208805694700765e-06, + "loss": 1.1915, + "step": 4710 + }, + { + "epoch": 0.18667563131562814, + "grad_norm": 1.0970783022183737, + "learning_rate": 6.22198787239652e-06, + "loss": 1.1908, + "step": 4720 + }, + { + "epoch": 0.18707113053451718, + "grad_norm": 1.2322937340277362, + "learning_rate": 6.235170050092276e-06, + "loss": 1.1937, + "step": 4730 + }, + { + "epoch": 0.18746662975340625, + "grad_norm": 1.1562788085977769, + "learning_rate": 6.248352227788032e-06, + "loss": 1.1975, + "step": 4740 + }, + { + "epoch": 0.1878621289722953, + "grad_norm": 1.0617214350153394, + "learning_rate": 6.2615344054837865e-06, + "loss": 1.2073, + "step": 4750 + }, + { + "epoch": 0.18825762819118433, + "grad_norm": 1.1267659232258125, + "learning_rate": 6.274716583179542e-06, + "loss": 1.2082, + "step": 4760 + }, + { + "epoch": 0.18865312741007337, + "grad_norm": 1.0953367195241055, + "learning_rate": 6.287898760875297e-06, + "loss": 1.218, + "step": 4770 + }, + { + "epoch": 0.1890486266289624, + "grad_norm": 1.233493824395055, + "learning_rate": 6.3010809385710525e-06, + "loss": 1.1995, + "step": 4780 + }, + { + "epoch": 0.18944412584785145, + "grad_norm": 1.1672051427671783, + "learning_rate": 6.314263116266808e-06, + "loss": 1.2104, + "step": 4790 + }, + { + "epoch": 0.18983962506674049, + "grad_norm": 1.130357352957549, + "learning_rate": 6.327445293962563e-06, + "loss": 1.1854, + "step": 4800 + }, + { + "epoch": 0.19023512428562953, + "grad_norm": 1.1876610829152676, + "learning_rate": 6.340627471658318e-06, + "loss": 1.2025, + "step": 4810 + }, + { + "epoch": 0.19063062350451856, + "grad_norm": 1.1432838733532447, + "learning_rate": 6.353809649354073e-06, + "loss": 1.2002, + "step": 4820 + }, + { + "epoch": 0.19102612272340763, + "grad_norm": 1.1064655435369426, + "learning_rate": 6.366991827049829e-06, + "loss": 1.2054, + "step": 4830 + }, + { + "epoch": 0.19142162194229667, + "grad_norm": 1.0369588688987386, + "learning_rate": 6.380174004745584e-06, + "loss": 1.2098, + "step": 4840 + }, + { + "epoch": 0.1918171211611857, + "grad_norm": 1.1406236057045094, + "learning_rate": 6.393356182441341e-06, + "loss": 1.1847, + "step": 4850 + }, + { + "epoch": 0.19221262038007475, + "grad_norm": 1.1058165241374485, + "learning_rate": 6.4065383601370955e-06, + "loss": 1.1937, + "step": 4860 + }, + { + "epoch": 0.1926081195989638, + "grad_norm": 1.0992793605595896, + "learning_rate": 6.419720537832851e-06, + "loss": 1.1932, + "step": 4870 + }, + { + "epoch": 0.19300361881785283, + "grad_norm": 1.1127877999481255, + "learning_rate": 6.432902715528606e-06, + "loss": 1.2048, + "step": 4880 + }, + { + "epoch": 0.19339911803674187, + "grad_norm": 1.125752676847193, + "learning_rate": 6.446084893224361e-06, + "loss": 1.2097, + "step": 4890 + }, + { + "epoch": 0.1937946172556309, + "grad_norm": 1.1381813091957724, + "learning_rate": 6.459267070920117e-06, + "loss": 1.2113, + "step": 4900 + }, + { + "epoch": 0.19419011647451997, + "grad_norm": 1.1318617892323126, + "learning_rate": 6.472449248615872e-06, + "loss": 1.2025, + "step": 4910 + }, + { + "epoch": 0.19458561569340901, + "grad_norm": 1.1590990307599618, + "learning_rate": 6.485631426311627e-06, + "loss": 1.2255, + "step": 4920 + }, + { + "epoch": 0.19498111491229805, + "grad_norm": 1.0728884274758368, + "learning_rate": 6.498813604007382e-06, + "loss": 1.2192, + "step": 4930 + }, + { + "epoch": 0.1953766141311871, + "grad_norm": 1.1590073488967672, + "learning_rate": 6.511995781703138e-06, + "loss": 1.1997, + "step": 4940 + }, + { + "epoch": 0.19577211335007613, + "grad_norm": 1.0720531676701768, + "learning_rate": 6.525177959398893e-06, + "loss": 1.2053, + "step": 4950 + }, + { + "epoch": 0.19616761256896517, + "grad_norm": 1.102380456447985, + "learning_rate": 6.538360137094648e-06, + "loss": 1.1884, + "step": 4960 + }, + { + "epoch": 0.1965631117878542, + "grad_norm": 1.2196827142160622, + "learning_rate": 6.5515423147904036e-06, + "loss": 1.1909, + "step": 4970 + }, + { + "epoch": 0.19695861100674325, + "grad_norm": 1.2097402651174276, + "learning_rate": 6.56472449248616e-06, + "loss": 1.1928, + "step": 4980 + }, + { + "epoch": 0.19735411022563232, + "grad_norm": 1.196417983790882, + "learning_rate": 6.577906670181915e-06, + "loss": 1.212, + "step": 4990 + }, + { + "epoch": 0.19774960944452136, + "grad_norm": 1.1301826828351647, + "learning_rate": 6.59108884787767e-06, + "loss": 1.2082, + "step": 5000 + }, + { + "epoch": 0.1981451086634104, + "grad_norm": 1.205324899611935, + "learning_rate": 6.604271025573425e-06, + "loss": 1.2016, + "step": 5010 + }, + { + "epoch": 0.19854060788229944, + "grad_norm": 1.0418298360414089, + "learning_rate": 6.617453203269181e-06, + "loss": 1.1904, + "step": 5020 + }, + { + "epoch": 0.19893610710118848, + "grad_norm": 1.1056732498997823, + "learning_rate": 6.630635380964936e-06, + "loss": 1.1847, + "step": 5030 + }, + { + "epoch": 0.19933160632007751, + "grad_norm": 1.1723411776579258, + "learning_rate": 6.643817558660691e-06, + "loss": 1.2013, + "step": 5040 + }, + { + "epoch": 0.19972710553896655, + "grad_norm": 1.0725587178293903, + "learning_rate": 6.6569997363564466e-06, + "loss": 1.1906, + "step": 5050 + }, + { + "epoch": 0.2001226047578556, + "grad_norm": 1.182205126075706, + "learning_rate": 6.670181914052201e-06, + "loss": 1.2293, + "step": 5060 + }, + { + "epoch": 0.20051810397674463, + "grad_norm": 1.1706906166684679, + "learning_rate": 6.683364091747957e-06, + "loss": 1.1839, + "step": 5070 + }, + { + "epoch": 0.2009136031956337, + "grad_norm": 1.0606264241886505, + "learning_rate": 6.6965462694437125e-06, + "loss": 1.1898, + "step": 5080 + }, + { + "epoch": 0.20130910241452274, + "grad_norm": 1.102577300984322, + "learning_rate": 6.709728447139467e-06, + "loss": 1.1931, + "step": 5090 + }, + { + "epoch": 0.20170460163341178, + "grad_norm": 1.0740894576699729, + "learning_rate": 6.722910624835224e-06, + "loss": 1.19, + "step": 5100 + }, + { + "epoch": 0.20210010085230082, + "grad_norm": 1.031551191036934, + "learning_rate": 6.736092802530979e-06, + "loss": 1.1873, + "step": 5110 + }, + { + "epoch": 0.20249560007118986, + "grad_norm": 1.1076310763618626, + "learning_rate": 6.749274980226734e-06, + "loss": 1.1972, + "step": 5120 + }, + { + "epoch": 0.2028910992900789, + "grad_norm": 1.166132111346991, + "learning_rate": 6.7624571579224896e-06, + "loss": 1.1901, + "step": 5130 + }, + { + "epoch": 0.20328659850896794, + "grad_norm": 1.132853692988871, + "learning_rate": 6.775639335618245e-06, + "loss": 1.2034, + "step": 5140 + }, + { + "epoch": 0.20368209772785698, + "grad_norm": 0.9725479759881043, + "learning_rate": 6.788821513314e-06, + "loss": 1.1833, + "step": 5150 + }, + { + "epoch": 0.20407759694674604, + "grad_norm": 1.0332235948730206, + "learning_rate": 6.8020036910097555e-06, + "loss": 1.1982, + "step": 5160 + }, + { + "epoch": 0.20447309616563508, + "grad_norm": 1.0583171205475557, + "learning_rate": 6.81518586870551e-06, + "loss": 1.2041, + "step": 5170 + }, + { + "epoch": 0.20486859538452412, + "grad_norm": 1.109138409419143, + "learning_rate": 6.828368046401266e-06, + "loss": 1.187, + "step": 5180 + }, + { + "epoch": 0.20526409460341316, + "grad_norm": 1.0705760872339762, + "learning_rate": 6.841550224097021e-06, + "loss": 1.1962, + "step": 5190 + }, + { + "epoch": 0.2056595938223022, + "grad_norm": 1.1911369148569064, + "learning_rate": 6.854732401792776e-06, + "loss": 1.1849, + "step": 5200 + }, + { + "epoch": 0.20605509304119124, + "grad_norm": 1.230097996524188, + "learning_rate": 6.867914579488532e-06, + "loss": 1.1891, + "step": 5210 + }, + { + "epoch": 0.20645059226008028, + "grad_norm": 1.0580823525273977, + "learning_rate": 6.881096757184288e-06, + "loss": 1.1863, + "step": 5220 + }, + { + "epoch": 0.20684609147896932, + "grad_norm": 1.0052600867365868, + "learning_rate": 6.894278934880043e-06, + "loss": 1.2044, + "step": 5230 + }, + { + "epoch": 0.20724159069785839, + "grad_norm": 1.1017939365565552, + "learning_rate": 6.9074611125757985e-06, + "loss": 1.1684, + "step": 5240 + }, + { + "epoch": 0.20763708991674742, + "grad_norm": 1.2056077201045035, + "learning_rate": 6.920643290271554e-06, + "loss": 1.2266, + "step": 5250 + }, + { + "epoch": 0.20803258913563646, + "grad_norm": 1.181079240370204, + "learning_rate": 6.933825467967309e-06, + "loss": 1.186, + "step": 5260 + }, + { + "epoch": 0.2084280883545255, + "grad_norm": 1.2195669073367532, + "learning_rate": 6.947007645663064e-06, + "loss": 1.1817, + "step": 5270 + }, + { + "epoch": 0.20882358757341454, + "grad_norm": 1.1235748020775331, + "learning_rate": 6.960189823358819e-06, + "loss": 1.2001, + "step": 5280 + }, + { + "epoch": 0.20921908679230358, + "grad_norm": 1.1521378141517005, + "learning_rate": 6.973372001054575e-06, + "loss": 1.1924, + "step": 5290 + }, + { + "epoch": 0.20961458601119262, + "grad_norm": 1.0166028358003851, + "learning_rate": 6.98655417875033e-06, + "loss": 1.1897, + "step": 5300 + }, + { + "epoch": 0.21001008523008166, + "grad_norm": 1.0869474511568988, + "learning_rate": 6.999736356446085e-06, + "loss": 1.1823, + "step": 5310 + }, + { + "epoch": 0.2104055844489707, + "grad_norm": 1.1130187118253254, + "learning_rate": 7.012918534141841e-06, + "loss": 1.2134, + "step": 5320 + }, + { + "epoch": 0.21080108366785977, + "grad_norm": 1.1469650571130527, + "learning_rate": 7.026100711837595e-06, + "loss": 1.188, + "step": 5330 + }, + { + "epoch": 0.2111965828867488, + "grad_norm": 1.1300566228826372, + "learning_rate": 7.039282889533352e-06, + "loss": 1.1725, + "step": 5340 + }, + { + "epoch": 0.21159208210563785, + "grad_norm": 1.274414479671289, + "learning_rate": 7.052465067229107e-06, + "loss": 1.1988, + "step": 5350 + }, + { + "epoch": 0.21198758132452689, + "grad_norm": 1.1850161637671603, + "learning_rate": 7.065647244924862e-06, + "loss": 1.1975, + "step": 5360 + }, + { + "epoch": 0.21238308054341593, + "grad_norm": 1.0612621577904042, + "learning_rate": 7.078829422620618e-06, + "loss": 1.1728, + "step": 5370 + }, + { + "epoch": 0.21277857976230496, + "grad_norm": 1.1124533314306806, + "learning_rate": 7.092011600316373e-06, + "loss": 1.1843, + "step": 5380 + }, + { + "epoch": 0.213174078981194, + "grad_norm": 1.0477804215375393, + "learning_rate": 7.105193778012128e-06, + "loss": 1.1811, + "step": 5390 + }, + { + "epoch": 0.21356957820008304, + "grad_norm": 1.0372335636733403, + "learning_rate": 7.118375955707884e-06, + "loss": 1.1951, + "step": 5400 + }, + { + "epoch": 0.2139650774189721, + "grad_norm": 1.1481534276974998, + "learning_rate": 7.131558133403638e-06, + "loss": 1.1947, + "step": 5410 + }, + { + "epoch": 0.21436057663786115, + "grad_norm": 1.1697544411860625, + "learning_rate": 7.144740311099394e-06, + "loss": 1.1953, + "step": 5420 + }, + { + "epoch": 0.2147560758567502, + "grad_norm": 1.1311784869070132, + "learning_rate": 7.15792248879515e-06, + "loss": 1.1787, + "step": 5430 + }, + { + "epoch": 0.21515157507563923, + "grad_norm": 1.2282071696512284, + "learning_rate": 7.171104666490904e-06, + "loss": 1.1759, + "step": 5440 + }, + { + "epoch": 0.21554707429452827, + "grad_norm": 1.177619393772581, + "learning_rate": 7.18428684418666e-06, + "loss": 1.1885, + "step": 5450 + }, + { + "epoch": 0.2159425735134173, + "grad_norm": 1.090265552981861, + "learning_rate": 7.197469021882415e-06, + "loss": 1.1939, + "step": 5460 + }, + { + "epoch": 0.21633807273230635, + "grad_norm": 1.142632763613256, + "learning_rate": 7.210651199578171e-06, + "loss": 1.1822, + "step": 5470 + }, + { + "epoch": 0.2167335719511954, + "grad_norm": 1.0718320903292262, + "learning_rate": 7.223833377273927e-06, + "loss": 1.1939, + "step": 5480 + }, + { + "epoch": 0.21712907117008443, + "grad_norm": 1.176301180636614, + "learning_rate": 7.237015554969682e-06, + "loss": 1.1907, + "step": 5490 + }, + { + "epoch": 0.2175245703889735, + "grad_norm": 1.1886012961434531, + "learning_rate": 7.250197732665437e-06, + "loss": 1.1826, + "step": 5500 + }, + { + "epoch": 0.21792006960786253, + "grad_norm": 1.0984046476010765, + "learning_rate": 7.263379910361193e-06, + "loss": 1.1889, + "step": 5510 + }, + { + "epoch": 0.21831556882675157, + "grad_norm": 1.1404654332642885, + "learning_rate": 7.276562088056947e-06, + "loss": 1.1849, + "step": 5520 + }, + { + "epoch": 0.2187110680456406, + "grad_norm": 1.4914170937444111, + "learning_rate": 7.289744265752703e-06, + "loss": 1.2077, + "step": 5530 + }, + { + "epoch": 0.21910656726452965, + "grad_norm": 1.0116134814623248, + "learning_rate": 7.3029264434484585e-06, + "loss": 1.1896, + "step": 5540 + }, + { + "epoch": 0.2195020664834187, + "grad_norm": 1.0576683251568944, + "learning_rate": 7.316108621144213e-06, + "loss": 1.1669, + "step": 5550 + }, + { + "epoch": 0.21989756570230773, + "grad_norm": 1.176212516636309, + "learning_rate": 7.329290798839969e-06, + "loss": 1.1856, + "step": 5560 + }, + { + "epoch": 0.22029306492119677, + "grad_norm": 1.0528687783871176, + "learning_rate": 7.342472976535724e-06, + "loss": 1.1827, + "step": 5570 + }, + { + "epoch": 0.22068856414008584, + "grad_norm": 1.0778721372671816, + "learning_rate": 7.355655154231479e-06, + "loss": 1.1782, + "step": 5580 + }, + { + "epoch": 0.22108406335897487, + "grad_norm": 1.1596046837857599, + "learning_rate": 7.368837331927236e-06, + "loss": 1.1887, + "step": 5590 + }, + { + "epoch": 0.22147956257786391, + "grad_norm": 1.1386802424246674, + "learning_rate": 7.38201950962299e-06, + "loss": 1.183, + "step": 5600 + }, + { + "epoch": 0.22187506179675295, + "grad_norm": 1.1391482737850553, + "learning_rate": 7.395201687318746e-06, + "loss": 1.1697, + "step": 5610 + }, + { + "epoch": 0.222270561015642, + "grad_norm": 1.1660626439813442, + "learning_rate": 7.4083838650145015e-06, + "loss": 1.1932, + "step": 5620 + }, + { + "epoch": 0.22266606023453103, + "grad_norm": 1.1555188872700974, + "learning_rate": 7.421566042710256e-06, + "loss": 1.1763, + "step": 5630 + }, + { + "epoch": 0.22306155945342007, + "grad_norm": 1.0991845739762487, + "learning_rate": 7.434748220406012e-06, + "loss": 1.1853, + "step": 5640 + }, + { + "epoch": 0.2234570586723091, + "grad_norm": 1.0182843488423188, + "learning_rate": 7.447930398101767e-06, + "loss": 1.1988, + "step": 5650 + }, + { + "epoch": 0.22385255789119818, + "grad_norm": 1.1130398385272844, + "learning_rate": 7.461112575797522e-06, + "loss": 1.1913, + "step": 5660 + }, + { + "epoch": 0.22424805711008722, + "grad_norm": 0.9908546857530702, + "learning_rate": 7.474294753493278e-06, + "loss": 1.1704, + "step": 5670 + }, + { + "epoch": 0.22464355632897626, + "grad_norm": 1.067589359758871, + "learning_rate": 7.4874769311890325e-06, + "loss": 1.1913, + "step": 5680 + }, + { + "epoch": 0.2250390555478653, + "grad_norm": 1.1105660597118736, + "learning_rate": 7.500659108884788e-06, + "loss": 1.1759, + "step": 5690 + }, + { + "epoch": 0.22543455476675434, + "grad_norm": 1.041106831265699, + "learning_rate": 7.513841286580543e-06, + "loss": 1.1701, + "step": 5700 + }, + { + "epoch": 0.22583005398564338, + "grad_norm": 1.2642404375824563, + "learning_rate": 7.527023464276299e-06, + "loss": 1.1876, + "step": 5710 + }, + { + "epoch": 0.22622555320453241, + "grad_norm": 1.0686131672242405, + "learning_rate": 7.540205641972055e-06, + "loss": 1.1987, + "step": 5720 + }, + { + "epoch": 0.22662105242342145, + "grad_norm": 1.0928903190558983, + "learning_rate": 7.5533878196678104e-06, + "loss": 1.1769, + "step": 5730 + }, + { + "epoch": 0.2270165516423105, + "grad_norm": 1.1249045038505108, + "learning_rate": 7.566569997363565e-06, + "loss": 1.1777, + "step": 5740 + }, + { + "epoch": 0.22741205086119956, + "grad_norm": 1.053302381764884, + "learning_rate": 7.579752175059321e-06, + "loss": 1.1815, + "step": 5750 + }, + { + "epoch": 0.2278075500800886, + "grad_norm": 1.06009376731742, + "learning_rate": 7.5929343527550755e-06, + "loss": 1.1806, + "step": 5760 + }, + { + "epoch": 0.22820304929897764, + "grad_norm": 1.1668357715197877, + "learning_rate": 7.606116530450831e-06, + "loss": 1.1827, + "step": 5770 + }, + { + "epoch": 0.22859854851786668, + "grad_norm": 1.1374322414862652, + "learning_rate": 7.619298708146587e-06, + "loss": 1.1772, + "step": 5780 + }, + { + "epoch": 0.22899404773675572, + "grad_norm": 1.1515430768494321, + "learning_rate": 7.632480885842342e-06, + "loss": 1.1741, + "step": 5790 + }, + { + "epoch": 0.22938954695564476, + "grad_norm": 1.0747868397069782, + "learning_rate": 7.645663063538097e-06, + "loss": 1.1721, + "step": 5800 + }, + { + "epoch": 0.2297850461745338, + "grad_norm": 1.0802652471530219, + "learning_rate": 7.658845241233852e-06, + "loss": 1.1897, + "step": 5810 + }, + { + "epoch": 0.23018054539342284, + "grad_norm": 1.0518838605645595, + "learning_rate": 7.672027418929607e-06, + "loss": 1.1908, + "step": 5820 + }, + { + "epoch": 0.2305760446123119, + "grad_norm": 1.2135760341685462, + "learning_rate": 7.685209596625363e-06, + "loss": 1.1836, + "step": 5830 + }, + { + "epoch": 0.23097154383120094, + "grad_norm": 0.9999715659492281, + "learning_rate": 7.69839177432112e-06, + "loss": 1.1811, + "step": 5840 + }, + { + "epoch": 0.23136704305008998, + "grad_norm": 1.1261489154359952, + "learning_rate": 7.711573952016874e-06, + "loss": 1.1908, + "step": 5850 + }, + { + "epoch": 0.23176254226897902, + "grad_norm": 1.126828046943543, + "learning_rate": 7.724756129712629e-06, + "loss": 1.1793, + "step": 5860 + }, + { + "epoch": 0.23215804148786806, + "grad_norm": 1.0651831460117613, + "learning_rate": 7.737938307408385e-06, + "loss": 1.1766, + "step": 5870 + }, + { + "epoch": 0.2325535407067571, + "grad_norm": 1.106690753227979, + "learning_rate": 7.75112048510414e-06, + "loss": 1.1867, + "step": 5880 + }, + { + "epoch": 0.23294903992564614, + "grad_norm": 1.101915469968922, + "learning_rate": 7.764302662799895e-06, + "loss": 1.1716, + "step": 5890 + }, + { + "epoch": 0.23334453914453518, + "grad_norm": 1.0570199268648146, + "learning_rate": 7.777484840495651e-06, + "loss": 1.1711, + "step": 5900 + }, + { + "epoch": 0.23374003836342422, + "grad_norm": 1.1084856382867536, + "learning_rate": 7.790667018191406e-06, + "loss": 1.1812, + "step": 5910 + }, + { + "epoch": 0.23413553758231329, + "grad_norm": 1.0572418455319958, + "learning_rate": 7.80384919588716e-06, + "loss": 1.1689, + "step": 5920 + }, + { + "epoch": 0.23453103680120232, + "grad_norm": 1.2009413694561577, + "learning_rate": 7.817031373582915e-06, + "loss": 1.1739, + "step": 5930 + }, + { + "epoch": 0.23492653602009136, + "grad_norm": 1.1712303243934858, + "learning_rate": 7.830213551278672e-06, + "loss": 1.1737, + "step": 5940 + }, + { + "epoch": 0.2353220352389804, + "grad_norm": 1.1495272290453256, + "learning_rate": 7.843395728974427e-06, + "loss": 1.1746, + "step": 5950 + }, + { + "epoch": 0.23571753445786944, + "grad_norm": 1.0896631247253787, + "learning_rate": 7.856577906670183e-06, + "loss": 1.16, + "step": 5960 + }, + { + "epoch": 0.23611303367675848, + "grad_norm": 1.1222862469161627, + "learning_rate": 7.869760084365938e-06, + "loss": 1.1838, + "step": 5970 + }, + { + "epoch": 0.23650853289564752, + "grad_norm": 1.1028633684686153, + "learning_rate": 7.882942262061694e-06, + "loss": 1.1867, + "step": 5980 + }, + { + "epoch": 0.23690403211453656, + "grad_norm": 1.081945129117165, + "learning_rate": 7.896124439757449e-06, + "loss": 1.1637, + "step": 5990 + }, + { + "epoch": 0.23729953133342563, + "grad_norm": 1.0328786633600346, + "learning_rate": 7.909306617453204e-06, + "loss": 1.1871, + "step": 6000 + }, + { + "epoch": 0.23769503055231467, + "grad_norm": 1.1079988408281052, + "learning_rate": 7.92248879514896e-06, + "loss": 1.158, + "step": 6010 + }, + { + "epoch": 0.2380905297712037, + "grad_norm": 1.1122837017014315, + "learning_rate": 7.935670972844715e-06, + "loss": 1.1892, + "step": 6020 + }, + { + "epoch": 0.23848602899009275, + "grad_norm": 1.085944017501584, + "learning_rate": 7.94885315054047e-06, + "loss": 1.1792, + "step": 6030 + }, + { + "epoch": 0.23888152820898179, + "grad_norm": 1.1582566764845728, + "learning_rate": 7.962035328236224e-06, + "loss": 1.1914, + "step": 6040 + }, + { + "epoch": 0.23927702742787083, + "grad_norm": 0.9941134424761493, + "learning_rate": 7.97521750593198e-06, + "loss": 1.1674, + "step": 6050 + }, + { + "epoch": 0.23967252664675986, + "grad_norm": 1.1210103484952265, + "learning_rate": 7.988399683627736e-06, + "loss": 1.1527, + "step": 6060 + }, + { + "epoch": 0.2400680258656489, + "grad_norm": 0.9956069141554965, + "learning_rate": 8.00158186132349e-06, + "loss": 1.1825, + "step": 6070 + }, + { + "epoch": 0.24046352508453797, + "grad_norm": 1.0546877411905147, + "learning_rate": 8.014764039019247e-06, + "loss": 1.1588, + "step": 6080 + }, + { + "epoch": 0.240859024303427, + "grad_norm": 1.0720512777738658, + "learning_rate": 8.027946216715003e-06, + "loss": 1.1608, + "step": 6090 + }, + { + "epoch": 0.24125452352231605, + "grad_norm": 1.1327424990841461, + "learning_rate": 8.041128394410758e-06, + "loss": 1.18, + "step": 6100 + }, + { + "epoch": 0.2416500227412051, + "grad_norm": 1.0246150412127613, + "learning_rate": 8.054310572106513e-06, + "loss": 1.1727, + "step": 6110 + }, + { + "epoch": 0.24204552196009413, + "grad_norm": 1.0547704761882348, + "learning_rate": 8.067492749802267e-06, + "loss": 1.2026, + "step": 6120 + }, + { + "epoch": 0.24244102117898317, + "grad_norm": 1.1540055379752396, + "learning_rate": 8.080674927498024e-06, + "loss": 1.1724, + "step": 6130 + }, + { + "epoch": 0.2428365203978722, + "grad_norm": 1.2501221411947008, + "learning_rate": 8.093857105193779e-06, + "loss": 1.1785, + "step": 6140 + }, + { + "epoch": 0.24323201961676125, + "grad_norm": 1.0614325320732845, + "learning_rate": 8.107039282889533e-06, + "loss": 1.1658, + "step": 6150 + }, + { + "epoch": 0.2436275188356503, + "grad_norm": 1.015507074925763, + "learning_rate": 8.12022146058529e-06, + "loss": 1.1764, + "step": 6160 + }, + { + "epoch": 0.24402301805453935, + "grad_norm": 1.0628681347777855, + "learning_rate": 8.133403638281044e-06, + "loss": 1.1675, + "step": 6170 + }, + { + "epoch": 0.2444185172734284, + "grad_norm": 1.0932486532840522, + "learning_rate": 8.1465858159768e-06, + "loss": 1.1709, + "step": 6180 + }, + { + "epoch": 0.24481401649231743, + "grad_norm": 1.1263862596026384, + "learning_rate": 8.159767993672556e-06, + "loss": 1.1597, + "step": 6190 + }, + { + "epoch": 0.24520951571120647, + "grad_norm": 1.0770805734265192, + "learning_rate": 8.172950171368312e-06, + "loss": 1.174, + "step": 6200 + }, + { + "epoch": 0.2456050149300955, + "grad_norm": 1.0603848160715135, + "learning_rate": 8.186132349064067e-06, + "loss": 1.1768, + "step": 6210 + }, + { + "epoch": 0.24600051414898455, + "grad_norm": 1.253670238853359, + "learning_rate": 8.199314526759822e-06, + "loss": 1.1777, + "step": 6220 + }, + { + "epoch": 0.2463960133678736, + "grad_norm": 1.0934642808612727, + "learning_rate": 8.212496704455576e-06, + "loss": 1.1915, + "step": 6230 + }, + { + "epoch": 0.24679151258676263, + "grad_norm": 0.9847850117555382, + "learning_rate": 8.225678882151333e-06, + "loss": 1.1767, + "step": 6240 + }, + { + "epoch": 0.2471870118056517, + "grad_norm": 1.211193544162175, + "learning_rate": 8.238861059847087e-06, + "loss": 1.1665, + "step": 6250 + }, + { + "epoch": 0.24758251102454074, + "grad_norm": 1.0847733181321655, + "learning_rate": 8.252043237542842e-06, + "loss": 1.1619, + "step": 6260 + }, + { + "epoch": 0.24797801024342978, + "grad_norm": 1.1025174074927875, + "learning_rate": 8.265225415238599e-06, + "loss": 1.1701, + "step": 6270 + }, + { + "epoch": 0.24837350946231881, + "grad_norm": 1.1692208486659628, + "learning_rate": 8.278407592934353e-06, + "loss": 1.1697, + "step": 6280 + }, + { + "epoch": 0.24876900868120785, + "grad_norm": 1.0073475785556216, + "learning_rate": 8.291589770630108e-06, + "loss": 1.1734, + "step": 6290 + }, + { + "epoch": 0.2491645079000969, + "grad_norm": 1.0234624670140824, + "learning_rate": 8.304771948325865e-06, + "loss": 1.1854, + "step": 6300 + }, + { + "epoch": 0.24956000711898593, + "grad_norm": 1.0951585384234093, + "learning_rate": 8.31795412602162e-06, + "loss": 1.1762, + "step": 6310 + }, + { + "epoch": 0.24995550633787497, + "grad_norm": 1.0873306151039657, + "learning_rate": 8.331136303717374e-06, + "loss": 1.1552, + "step": 6320 + }, + { + "epoch": 0.25035100555676404, + "grad_norm": 1.0240120393841852, + "learning_rate": 8.34431848141313e-06, + "loss": 1.1802, + "step": 6330 + }, + { + "epoch": 0.2507465047756531, + "grad_norm": 1.034552069138358, + "learning_rate": 8.357500659108885e-06, + "loss": 1.1668, + "step": 6340 + }, + { + "epoch": 0.2511420039945421, + "grad_norm": 1.1197005220763687, + "learning_rate": 8.370682836804642e-06, + "loss": 1.1738, + "step": 6350 + }, + { + "epoch": 0.25153750321343116, + "grad_norm": 1.119668682368083, + "learning_rate": 8.383865014500396e-06, + "loss": 1.1583, + "step": 6360 + }, + { + "epoch": 0.2519330024323202, + "grad_norm": 1.2075231131828583, + "learning_rate": 8.397047192196151e-06, + "loss": 1.1708, + "step": 6370 + }, + { + "epoch": 0.25232850165120924, + "grad_norm": 1.1377469506231173, + "learning_rate": 8.410229369891908e-06, + "loss": 1.1728, + "step": 6380 + }, + { + "epoch": 0.2527240008700983, + "grad_norm": 1.1178120669656715, + "learning_rate": 8.423411547587662e-06, + "loss": 1.1694, + "step": 6390 + }, + { + "epoch": 0.2531195000889873, + "grad_norm": 1.0779353890928898, + "learning_rate": 8.436593725283417e-06, + "loss": 1.1471, + "step": 6400 + }, + { + "epoch": 0.25351499930787635, + "grad_norm": 0.9953082219668707, + "learning_rate": 8.449775902979172e-06, + "loss": 1.197, + "step": 6410 + }, + { + "epoch": 0.2539104985267654, + "grad_norm": 1.0101990986198501, + "learning_rate": 8.462958080674928e-06, + "loss": 1.1553, + "step": 6420 + }, + { + "epoch": 0.25430599774565443, + "grad_norm": 1.157969204894414, + "learning_rate": 8.476140258370683e-06, + "loss": 1.1623, + "step": 6430 + }, + { + "epoch": 0.2547014969645435, + "grad_norm": 1.0488965315858676, + "learning_rate": 8.489322436066438e-06, + "loss": 1.1451, + "step": 6440 + }, + { + "epoch": 0.2550969961834325, + "grad_norm": 1.1041395386253063, + "learning_rate": 8.502504613762194e-06, + "loss": 1.1659, + "step": 6450 + }, + { + "epoch": 0.2554924954023216, + "grad_norm": 1.1581646432867871, + "learning_rate": 8.51568679145795e-06, + "loss": 1.1766, + "step": 6460 + }, + { + "epoch": 0.25588799462121065, + "grad_norm": 1.0980356702568528, + "learning_rate": 8.528868969153705e-06, + "loss": 1.1785, + "step": 6470 + }, + { + "epoch": 0.2562834938400997, + "grad_norm": 1.1861087679667697, + "learning_rate": 8.54205114684946e-06, + "loss": 1.1585, + "step": 6480 + }, + { + "epoch": 0.2566789930589887, + "grad_norm": 1.0219639518352461, + "learning_rate": 8.555233324545216e-06, + "loss": 1.1772, + "step": 6490 + }, + { + "epoch": 0.25707449227787776, + "grad_norm": 1.056878966123612, + "learning_rate": 8.568415502240971e-06, + "loss": 1.1671, + "step": 6500 + }, + { + "epoch": 0.2574699914967668, + "grad_norm": 1.0940660037017722, + "learning_rate": 8.581597679936726e-06, + "loss": 1.148, + "step": 6510 + }, + { + "epoch": 0.25786549071565584, + "grad_norm": 1.0674300690750151, + "learning_rate": 8.59477985763248e-06, + "loss": 1.174, + "step": 6520 + }, + { + "epoch": 0.2582609899345449, + "grad_norm": 1.168884414847323, + "learning_rate": 8.607962035328237e-06, + "loss": 1.1738, + "step": 6530 + }, + { + "epoch": 0.2586564891534339, + "grad_norm": 1.0901074571606302, + "learning_rate": 8.621144213023992e-06, + "loss": 1.1698, + "step": 6540 + }, + { + "epoch": 0.25905198837232296, + "grad_norm": 1.0081933008955009, + "learning_rate": 8.634326390719747e-06, + "loss": 1.1732, + "step": 6550 + }, + { + "epoch": 0.259447487591212, + "grad_norm": 1.0472523218672491, + "learning_rate": 8.647508568415503e-06, + "loss": 1.1944, + "step": 6560 + }, + { + "epoch": 0.25984298681010104, + "grad_norm": 1.083002614378503, + "learning_rate": 8.66069074611126e-06, + "loss": 1.1759, + "step": 6570 + }, + { + "epoch": 0.2602384860289901, + "grad_norm": 1.0337867350936691, + "learning_rate": 8.673872923807014e-06, + "loss": 1.1786, + "step": 6580 + }, + { + "epoch": 0.2606339852478791, + "grad_norm": 1.1750093553727017, + "learning_rate": 8.687055101502769e-06, + "loss": 1.152, + "step": 6590 + }, + { + "epoch": 0.26102948446676816, + "grad_norm": 1.0065370977739418, + "learning_rate": 8.700237279198524e-06, + "loss": 1.1975, + "step": 6600 + }, + { + "epoch": 0.2614249836856572, + "grad_norm": 1.1200115924116618, + "learning_rate": 8.71341945689428e-06, + "loss": 1.1589, + "step": 6610 + }, + { + "epoch": 0.26182048290454624, + "grad_norm": 1.103576799969859, + "learning_rate": 8.726601634590035e-06, + "loss": 1.1619, + "step": 6620 + }, + { + "epoch": 0.26221598212343533, + "grad_norm": 1.106156567910775, + "learning_rate": 8.73978381228579e-06, + "loss": 1.1391, + "step": 6630 + }, + { + "epoch": 0.26261148134232437, + "grad_norm": 1.1459364589766694, + "learning_rate": 8.752965989981546e-06, + "loss": 1.1438, + "step": 6640 + }, + { + "epoch": 0.2630069805612134, + "grad_norm": 1.0214619379847538, + "learning_rate": 8.7661481676773e-06, + "loss": 1.1629, + "step": 6650 + }, + { + "epoch": 0.26340247978010245, + "grad_norm": 1.0815641824393551, + "learning_rate": 8.779330345373056e-06, + "loss": 1.1714, + "step": 6660 + }, + { + "epoch": 0.2637979789989915, + "grad_norm": 1.0716313595933342, + "learning_rate": 8.792512523068812e-06, + "loss": 1.1749, + "step": 6670 + }, + { + "epoch": 0.26419347821788053, + "grad_norm": 0.9878242959846413, + "learning_rate": 8.805694700764567e-06, + "loss": 1.1618, + "step": 6680 + }, + { + "epoch": 0.26458897743676957, + "grad_norm": 1.1009137554289798, + "learning_rate": 8.818876878460323e-06, + "loss": 1.1669, + "step": 6690 + }, + { + "epoch": 0.2649844766556586, + "grad_norm": 1.1762070685344304, + "learning_rate": 8.832059056156078e-06, + "loss": 1.1741, + "step": 6700 + }, + { + "epoch": 0.26537997587454765, + "grad_norm": 1.1079271551948326, + "learning_rate": 8.845241233851833e-06, + "loss": 1.1627, + "step": 6710 + }, + { + "epoch": 0.2657754750934367, + "grad_norm": 1.1747891181397616, + "learning_rate": 8.858423411547589e-06, + "loss": 1.1501, + "step": 6720 + }, + { + "epoch": 0.2661709743123257, + "grad_norm": 1.037936510673359, + "learning_rate": 8.871605589243344e-06, + "loss": 1.1569, + "step": 6730 + }, + { + "epoch": 0.26656647353121476, + "grad_norm": 1.0870972961027363, + "learning_rate": 8.884787766939099e-06, + "loss": 1.1597, + "step": 6740 + }, + { + "epoch": 0.2669619727501038, + "grad_norm": 1.02864829135046, + "learning_rate": 8.897969944634855e-06, + "loss": 1.1566, + "step": 6750 + }, + { + "epoch": 0.26735747196899284, + "grad_norm": 1.0176116623294185, + "learning_rate": 8.91115212233061e-06, + "loss": 1.151, + "step": 6760 + }, + { + "epoch": 0.2677529711878819, + "grad_norm": 1.0600270797042877, + "learning_rate": 8.924334300026364e-06, + "loss": 1.1648, + "step": 6770 + }, + { + "epoch": 0.2681484704067709, + "grad_norm": 1.0647938834849626, + "learning_rate": 8.937516477722121e-06, + "loss": 1.1482, + "step": 6780 + }, + { + "epoch": 0.26854396962565996, + "grad_norm": 0.9961032924522476, + "learning_rate": 8.950698655417876e-06, + "loss": 1.1587, + "step": 6790 + }, + { + "epoch": 0.26893946884454906, + "grad_norm": 1.188830341978806, + "learning_rate": 8.96388083311363e-06, + "loss": 1.1593, + "step": 6800 + }, + { + "epoch": 0.2693349680634381, + "grad_norm": 1.1656441459820128, + "learning_rate": 8.977063010809385e-06, + "loss": 1.163, + "step": 6810 + }, + { + "epoch": 0.26973046728232714, + "grad_norm": 1.002892323701781, + "learning_rate": 8.990245188505142e-06, + "loss": 1.1418, + "step": 6820 + }, + { + "epoch": 0.2701259665012162, + "grad_norm": 1.0445255581844506, + "learning_rate": 9.003427366200898e-06, + "loss": 1.1692, + "step": 6830 + }, + { + "epoch": 0.2705214657201052, + "grad_norm": 1.1601855371753287, + "learning_rate": 9.016609543896653e-06, + "loss": 1.1587, + "step": 6840 + }, + { + "epoch": 0.27091696493899425, + "grad_norm": 1.0550325955738908, + "learning_rate": 9.029791721592408e-06, + "loss": 1.159, + "step": 6850 + }, + { + "epoch": 0.2713124641578833, + "grad_norm": 1.0481380516758225, + "learning_rate": 9.042973899288164e-06, + "loss": 1.1727, + "step": 6860 + }, + { + "epoch": 0.27170796337677233, + "grad_norm": 1.0508674424021534, + "learning_rate": 9.056156076983919e-06, + "loss": 1.1539, + "step": 6870 + }, + { + "epoch": 0.27210346259566137, + "grad_norm": 0.9900021740965856, + "learning_rate": 9.069338254679673e-06, + "loss": 1.1498, + "step": 6880 + }, + { + "epoch": 0.2724989618145504, + "grad_norm": 1.1802460494125215, + "learning_rate": 9.08252043237543e-06, + "loss": 1.153, + "step": 6890 + }, + { + "epoch": 0.27289446103343945, + "grad_norm": 1.0700598518295985, + "learning_rate": 9.095702610071185e-06, + "loss": 1.171, + "step": 6900 + }, + { + "epoch": 0.2732899602523285, + "grad_norm": 1.067586376504746, + "learning_rate": 9.10888478776694e-06, + "loss": 1.1599, + "step": 6910 + }, + { + "epoch": 0.27368545947121753, + "grad_norm": 1.0839229351108237, + "learning_rate": 9.122066965462694e-06, + "loss": 1.1475, + "step": 6920 + }, + { + "epoch": 0.27408095869010657, + "grad_norm": 1.1326930276551868, + "learning_rate": 9.13524914315845e-06, + "loss": 1.169, + "step": 6930 + }, + { + "epoch": 0.2744764579089956, + "grad_norm": 1.0496366375123172, + "learning_rate": 9.148431320854207e-06, + "loss": 1.1527, + "step": 6940 + }, + { + "epoch": 0.27487195712788465, + "grad_norm": 1.0757628236657761, + "learning_rate": 9.161613498549962e-06, + "loss": 1.1547, + "step": 6950 + }, + { + "epoch": 0.27526745634677374, + "grad_norm": 1.1642124923842847, + "learning_rate": 9.174795676245716e-06, + "loss": 1.1698, + "step": 6960 + }, + { + "epoch": 0.2756629555656628, + "grad_norm": 1.0200901316389057, + "learning_rate": 9.187977853941473e-06, + "loss": 1.1428, + "step": 6970 + }, + { + "epoch": 0.2760584547845518, + "grad_norm": 1.1191047918723218, + "learning_rate": 9.201160031637228e-06, + "loss": 1.1588, + "step": 6980 + }, + { + "epoch": 0.27645395400344086, + "grad_norm": 1.0471658185669384, + "learning_rate": 9.214342209332982e-06, + "loss": 1.1408, + "step": 6990 + }, + { + "epoch": 0.2768494532223299, + "grad_norm": 0.9956419350846413, + "learning_rate": 9.227524387028737e-06, + "loss": 1.1491, + "step": 7000 + }, + { + "epoch": 0.27724495244121894, + "grad_norm": 1.0399861975290758, + "learning_rate": 9.240706564724494e-06, + "loss": 1.1634, + "step": 7010 + }, + { + "epoch": 0.277640451660108, + "grad_norm": 1.0338356619905011, + "learning_rate": 9.253888742420248e-06, + "loss": 1.1497, + "step": 7020 + }, + { + "epoch": 0.278035950878997, + "grad_norm": 1.1210126903669186, + "learning_rate": 9.267070920116003e-06, + "loss": 1.1569, + "step": 7030 + }, + { + "epoch": 0.27843145009788606, + "grad_norm": 1.0370097724964364, + "learning_rate": 9.28025309781176e-06, + "loss": 1.1504, + "step": 7040 + }, + { + "epoch": 0.2788269493167751, + "grad_norm": 1.1003889473115938, + "learning_rate": 9.293435275507514e-06, + "loss": 1.1358, + "step": 7050 + }, + { + "epoch": 0.27922244853566414, + "grad_norm": 1.0337426460445318, + "learning_rate": 9.30661745320327e-06, + "loss": 1.1531, + "step": 7060 + }, + { + "epoch": 0.2796179477545532, + "grad_norm": 1.087073093069295, + "learning_rate": 9.319799630899025e-06, + "loss": 1.1559, + "step": 7070 + }, + { + "epoch": 0.2800134469734422, + "grad_norm": 1.074257522163238, + "learning_rate": 9.332981808594782e-06, + "loss": 1.1444, + "step": 7080 + }, + { + "epoch": 0.28040894619233125, + "grad_norm": 0.9866871639209648, + "learning_rate": 9.346163986290537e-06, + "loss": 1.1495, + "step": 7090 + }, + { + "epoch": 0.2808044454112203, + "grad_norm": 1.1689281642663236, + "learning_rate": 9.359346163986291e-06, + "loss": 1.1625, + "step": 7100 + }, + { + "epoch": 0.28119994463010933, + "grad_norm": 1.1050639124194632, + "learning_rate": 9.372528341682046e-06, + "loss": 1.1667, + "step": 7110 + }, + { + "epoch": 0.2815954438489984, + "grad_norm": 0.9995140101531379, + "learning_rate": 9.385710519377802e-06, + "loss": 1.1497, + "step": 7120 + }, + { + "epoch": 0.28199094306788747, + "grad_norm": 1.0431243905150764, + "learning_rate": 9.398892697073557e-06, + "loss": 1.1503, + "step": 7130 + }, + { + "epoch": 0.2823864422867765, + "grad_norm": 1.046178458001966, + "learning_rate": 9.412074874769312e-06, + "loss": 1.1525, + "step": 7140 + }, + { + "epoch": 0.28278194150566555, + "grad_norm": 1.0699089833697082, + "learning_rate": 9.425257052465068e-06, + "loss": 1.1579, + "step": 7150 + }, + { + "epoch": 0.2831774407245546, + "grad_norm": 1.0047040437453831, + "learning_rate": 9.438439230160823e-06, + "loss": 1.1515, + "step": 7160 + }, + { + "epoch": 0.2835729399434436, + "grad_norm": 1.0708900004693, + "learning_rate": 9.451621407856578e-06, + "loss": 1.1586, + "step": 7170 + }, + { + "epoch": 0.28396843916233266, + "grad_norm": 1.0544833393424204, + "learning_rate": 9.464803585552334e-06, + "loss": 1.1338, + "step": 7180 + }, + { + "epoch": 0.2843639383812217, + "grad_norm": 1.0159631829306173, + "learning_rate": 9.477985763248089e-06, + "loss": 1.1386, + "step": 7190 + }, + { + "epoch": 0.28475943760011074, + "grad_norm": 1.102776336382458, + "learning_rate": 9.491167940943845e-06, + "loss": 1.1336, + "step": 7200 + }, + { + "epoch": 0.2851549368189998, + "grad_norm": 1.1372067535003214, + "learning_rate": 9.5043501186396e-06, + "loss": 1.1579, + "step": 7210 + }, + { + "epoch": 0.2855504360378888, + "grad_norm": 0.9649271222588772, + "learning_rate": 9.517532296335355e-06, + "loss": 1.1524, + "step": 7220 + }, + { + "epoch": 0.28594593525677786, + "grad_norm": 1.0182131376804553, + "learning_rate": 9.530714474031111e-06, + "loss": 1.1529, + "step": 7230 + }, + { + "epoch": 0.2863414344756669, + "grad_norm": 1.1246317122292944, + "learning_rate": 9.543896651726866e-06, + "loss": 1.1656, + "step": 7240 + }, + { + "epoch": 0.28673693369455594, + "grad_norm": 1.0689858875408487, + "learning_rate": 9.557078829422621e-06, + "loss": 1.1454, + "step": 7250 + }, + { + "epoch": 0.287132432913445, + "grad_norm": 1.0257278038771898, + "learning_rate": 9.570261007118377e-06, + "loss": 1.1582, + "step": 7260 + }, + { + "epoch": 0.287527932132334, + "grad_norm": 1.1166064339430195, + "learning_rate": 9.583443184814132e-06, + "loss": 1.155, + "step": 7270 + }, + { + "epoch": 0.28792343135122306, + "grad_norm": 1.1006012516646528, + "learning_rate": 9.596625362509887e-06, + "loss": 1.1435, + "step": 7280 + }, + { + "epoch": 0.2883189305701121, + "grad_norm": 1.0630534117150428, + "learning_rate": 9.609807540205642e-06, + "loss": 1.1535, + "step": 7290 + }, + { + "epoch": 0.2887144297890012, + "grad_norm": 0.9792269528417479, + "learning_rate": 9.622989717901398e-06, + "loss": 1.1738, + "step": 7300 + }, + { + "epoch": 0.28910992900789023, + "grad_norm": 1.0055040104078166, + "learning_rate": 9.636171895597154e-06, + "loss": 1.1681, + "step": 7310 + }, + { + "epoch": 0.28950542822677927, + "grad_norm": 1.035042302985885, + "learning_rate": 9.649354073292909e-06, + "loss": 1.1428, + "step": 7320 + }, + { + "epoch": 0.2899009274456683, + "grad_norm": 1.0450520011045796, + "learning_rate": 9.662536250988664e-06, + "loss": 1.1443, + "step": 7330 + }, + { + "epoch": 0.29029642666455735, + "grad_norm": 1.096449465283461, + "learning_rate": 9.67571842868442e-06, + "loss": 1.1716, + "step": 7340 + }, + { + "epoch": 0.2906919258834464, + "grad_norm": 1.0544634958059502, + "learning_rate": 9.688900606380175e-06, + "loss": 1.1576, + "step": 7350 + }, + { + "epoch": 0.29108742510233543, + "grad_norm": 0.9878027761108865, + "learning_rate": 9.70208278407593e-06, + "loss": 1.1575, + "step": 7360 + }, + { + "epoch": 0.29148292432122447, + "grad_norm": 1.0749500885717254, + "learning_rate": 9.715264961771686e-06, + "loss": 1.1507, + "step": 7370 + }, + { + "epoch": 0.2918784235401135, + "grad_norm": 1.1787827624737508, + "learning_rate": 9.728447139467441e-06, + "loss": 1.1478, + "step": 7380 + }, + { + "epoch": 0.29227392275900255, + "grad_norm": 1.1486963767973906, + "learning_rate": 9.741629317163196e-06, + "loss": 1.1243, + "step": 7390 + }, + { + "epoch": 0.2926694219778916, + "grad_norm": 1.0523146289593874, + "learning_rate": 9.75481149485895e-06, + "loss": 1.1423, + "step": 7400 + }, + { + "epoch": 0.2930649211967806, + "grad_norm": 1.0554276018279876, + "learning_rate": 9.767993672554707e-06, + "loss": 1.1471, + "step": 7410 + }, + { + "epoch": 0.29346042041566966, + "grad_norm": 1.10403398801149, + "learning_rate": 9.781175850250462e-06, + "loss": 1.1618, + "step": 7420 + }, + { + "epoch": 0.2938559196345587, + "grad_norm": 1.0766020677378985, + "learning_rate": 9.794358027946218e-06, + "loss": 1.1586, + "step": 7430 + }, + { + "epoch": 0.29425141885344774, + "grad_norm": 1.0753900326857253, + "learning_rate": 9.807540205641973e-06, + "loss": 1.1567, + "step": 7440 + }, + { + "epoch": 0.2946469180723368, + "grad_norm": 1.0239699626505532, + "learning_rate": 9.82072238333773e-06, + "loss": 1.1715, + "step": 7450 + }, + { + "epoch": 0.2950424172912258, + "grad_norm": 1.0402348596386024, + "learning_rate": 9.833904561033484e-06, + "loss": 1.1471, + "step": 7460 + }, + { + "epoch": 0.2954379165101149, + "grad_norm": 1.0817951163292203, + "learning_rate": 9.847086738729239e-06, + "loss": 1.1608, + "step": 7470 + }, + { + "epoch": 0.29583341572900396, + "grad_norm": 1.0308457244603786, + "learning_rate": 9.860268916424993e-06, + "loss": 1.1497, + "step": 7480 + }, + { + "epoch": 0.296228914947893, + "grad_norm": 1.0679981517404136, + "learning_rate": 9.87345109412075e-06, + "loss": 1.1527, + "step": 7490 + }, + { + "epoch": 0.29662441416678204, + "grad_norm": 1.13970432943047, + "learning_rate": 9.886633271816505e-06, + "loss": 1.1589, + "step": 7500 + }, + { + "epoch": 0.2970199133856711, + "grad_norm": 1.039400451031927, + "learning_rate": 9.89981544951226e-06, + "loss": 1.1567, + "step": 7510 + }, + { + "epoch": 0.2974154126045601, + "grad_norm": 1.0206511911027096, + "learning_rate": 9.912997627208016e-06, + "loss": 1.1498, + "step": 7520 + }, + { + "epoch": 0.29781091182344915, + "grad_norm": 1.0835664350756524, + "learning_rate": 9.92617980490377e-06, + "loss": 1.1374, + "step": 7530 + }, + { + "epoch": 0.2982064110423382, + "grad_norm": 1.0142405882753354, + "learning_rate": 9.939361982599525e-06, + "loss": 1.1528, + "step": 7540 + }, + { + "epoch": 0.29860191026122723, + "grad_norm": 1.0302749761246293, + "learning_rate": 9.952544160295282e-06, + "loss": 1.1481, + "step": 7550 + }, + { + "epoch": 0.29899740948011627, + "grad_norm": 1.1212356445302314, + "learning_rate": 9.965726337991038e-06, + "loss": 1.1401, + "step": 7560 + }, + { + "epoch": 0.2993929086990053, + "grad_norm": 1.105685089454397, + "learning_rate": 9.978908515686793e-06, + "loss": 1.1362, + "step": 7570 + }, + { + "epoch": 0.29978840791789435, + "grad_norm": 1.0348685039411056, + "learning_rate": 9.992090693382548e-06, + "loss": 1.1518, + "step": 7580 + }, + { + "epoch": 0.3001839071367834, + "grad_norm": 1.0772774004761567, + "learning_rate": 9.999999915286853e-06, + "loss": 1.1454, + "step": 7590 + }, + { + "epoch": 0.30057940635567243, + "grad_norm": 1.0868277976509395, + "learning_rate": 9.999998962263965e-06, + "loss": 1.1459, + "step": 7600 + }, + { + "epoch": 0.30097490557456147, + "grad_norm": 1.0396137891404242, + "learning_rate": 9.99999695032696e-06, + "loss": 1.1415, + "step": 7610 + }, + { + "epoch": 0.3013704047934505, + "grad_norm": 1.0467776564054676, + "learning_rate": 9.999993879476262e-06, + "loss": 1.1264, + "step": 7620 + }, + { + "epoch": 0.3017659040123396, + "grad_norm": 1.0919389356037232, + "learning_rate": 9.99998974971252e-06, + "loss": 1.1527, + "step": 7630 + }, + { + "epoch": 0.30216140323122864, + "grad_norm": 1.0133468523450062, + "learning_rate": 9.999984561036611e-06, + "loss": 1.1346, + "step": 7640 + }, + { + "epoch": 0.3025569024501177, + "grad_norm": 1.0173614276530554, + "learning_rate": 9.999978313449632e-06, + "loss": 1.1513, + "step": 7650 + }, + { + "epoch": 0.3029524016690067, + "grad_norm": 1.0073447720518742, + "learning_rate": 9.999971006952907e-06, + "loss": 1.1495, + "step": 7660 + }, + { + "epoch": 0.30334790088789576, + "grad_norm": 1.1021491774529075, + "learning_rate": 9.999962641547982e-06, + "loss": 1.1464, + "step": 7670 + }, + { + "epoch": 0.3037434001067848, + "grad_norm": 1.1547605405148325, + "learning_rate": 9.999953217236631e-06, + "loss": 1.1328, + "step": 7680 + }, + { + "epoch": 0.30413889932567384, + "grad_norm": 1.0611314072445008, + "learning_rate": 9.999942734020848e-06, + "loss": 1.1576, + "step": 7690 + }, + { + "epoch": 0.3045343985445629, + "grad_norm": 1.1532099248265681, + "learning_rate": 9.999931191902855e-06, + "loss": 1.1518, + "step": 7700 + }, + { + "epoch": 0.3049298977634519, + "grad_norm": 1.0540416158600223, + "learning_rate": 9.999918590885093e-06, + "loss": 1.1315, + "step": 7710 + }, + { + "epoch": 0.30532539698234096, + "grad_norm": 1.0225290209213587, + "learning_rate": 9.999904930970234e-06, + "loss": 1.1513, + "step": 7720 + }, + { + "epoch": 0.30572089620123, + "grad_norm": 1.0187755272406196, + "learning_rate": 9.999890212161172e-06, + "loss": 1.1581, + "step": 7730 + }, + { + "epoch": 0.30611639542011904, + "grad_norm": 1.0854726225349205, + "learning_rate": 9.999874434461021e-06, + "loss": 1.1554, + "step": 7740 + }, + { + "epoch": 0.3065118946390081, + "grad_norm": 1.0921745850081388, + "learning_rate": 9.999857597873123e-06, + "loss": 1.1368, + "step": 7750 + }, + { + "epoch": 0.3069073938578971, + "grad_norm": 1.078034111228919, + "learning_rate": 9.999839702401044e-06, + "loss": 1.1668, + "step": 7760 + }, + { + "epoch": 0.30730289307678615, + "grad_norm": 1.169657677509384, + "learning_rate": 9.999820748048574e-06, + "loss": 1.1439, + "step": 7770 + }, + { + "epoch": 0.3076983922956752, + "grad_norm": 1.0794079857575394, + "learning_rate": 9.999800734819729e-06, + "loss": 1.1483, + "step": 7780 + }, + { + "epoch": 0.30809389151456423, + "grad_norm": 1.0151254316682448, + "learning_rate": 9.999779662718745e-06, + "loss": 1.1407, + "step": 7790 + }, + { + "epoch": 0.30848939073345333, + "grad_norm": 1.0435703566894419, + "learning_rate": 9.999757531750086e-06, + "loss": 1.1224, + "step": 7800 + }, + { + "epoch": 0.30888488995234237, + "grad_norm": 1.1294248679309034, + "learning_rate": 9.999734341918437e-06, + "loss": 1.1351, + "step": 7810 + }, + { + "epoch": 0.3092803891712314, + "grad_norm": 1.0285488300699832, + "learning_rate": 9.999710093228713e-06, + "loss": 1.1524, + "step": 7820 + }, + { + "epoch": 0.30967588839012045, + "grad_norm": 1.1499788419716344, + "learning_rate": 9.999684785686045e-06, + "loss": 1.1392, + "step": 7830 + }, + { + "epoch": 0.3100713876090095, + "grad_norm": 1.0681045851482305, + "learning_rate": 9.999658419295797e-06, + "loss": 1.1432, + "step": 7840 + }, + { + "epoch": 0.3104668868278985, + "grad_norm": 1.0833857535609424, + "learning_rate": 9.99963099406355e-06, + "loss": 1.1348, + "step": 7850 + }, + { + "epoch": 0.31086238604678756, + "grad_norm": 1.0396442755395066, + "learning_rate": 9.999602509995114e-06, + "loss": 1.1368, + "step": 7860 + }, + { + "epoch": 0.3112578852656766, + "grad_norm": 1.1101757808227828, + "learning_rate": 9.99957296709652e-06, + "loss": 1.1544, + "step": 7870 + }, + { + "epoch": 0.31165338448456564, + "grad_norm": 1.058708898641461, + "learning_rate": 9.999542365374024e-06, + "loss": 1.1513, + "step": 7880 + }, + { + "epoch": 0.3120488837034547, + "grad_norm": 1.1142481473073882, + "learning_rate": 9.99951070483411e-06, + "loss": 1.1184, + "step": 7890 + }, + { + "epoch": 0.3124443829223437, + "grad_norm": 1.0971805791686748, + "learning_rate": 9.99947798548348e-06, + "loss": 1.1523, + "step": 7900 + }, + { + "epoch": 0.31283988214123276, + "grad_norm": 1.0301905359960508, + "learning_rate": 9.999444207329066e-06, + "loss": 1.1438, + "step": 7910 + }, + { + "epoch": 0.3132353813601218, + "grad_norm": 1.0542089135117807, + "learning_rate": 9.999409370378018e-06, + "loss": 1.1531, + "step": 7920 + }, + { + "epoch": 0.31363088057901084, + "grad_norm": 1.011441178475839, + "learning_rate": 9.999373474637716e-06, + "loss": 1.1269, + "step": 7930 + }, + { + "epoch": 0.3140263797978999, + "grad_norm": 0.9702712175004335, + "learning_rate": 9.999336520115766e-06, + "loss": 1.119, + "step": 7940 + }, + { + "epoch": 0.3144218790167889, + "grad_norm": 1.0501379630933108, + "learning_rate": 9.999298506819988e-06, + "loss": 1.1334, + "step": 7950 + }, + { + "epoch": 0.31481737823567796, + "grad_norm": 1.0196367582239143, + "learning_rate": 9.999259434758434e-06, + "loss": 1.1471, + "step": 7960 + }, + { + "epoch": 0.31521287745456705, + "grad_norm": 0.9874760325436281, + "learning_rate": 9.999219303939382e-06, + "loss": 1.1425, + "step": 7970 + }, + { + "epoch": 0.3156083766734561, + "grad_norm": 1.1043265488578693, + "learning_rate": 9.999178114371329e-06, + "loss": 1.1427, + "step": 7980 + }, + { + "epoch": 0.31600387589234513, + "grad_norm": 1.0763472120369255, + "learning_rate": 9.999135866062997e-06, + "loss": 1.1484, + "step": 7990 + }, + { + "epoch": 0.31639937511123417, + "grad_norm": 1.171183073904711, + "learning_rate": 9.999092559023336e-06, + "loss": 1.1341, + "step": 8000 + }, + { + "epoch": 0.3167948743301232, + "grad_norm": 0.9672680634862828, + "learning_rate": 9.999048193261516e-06, + "loss": 1.1447, + "step": 8010 + }, + { + "epoch": 0.31719037354901225, + "grad_norm": 0.9610529414344143, + "learning_rate": 9.999002768786934e-06, + "loss": 1.1487, + "step": 8020 + }, + { + "epoch": 0.3175858727679013, + "grad_norm": 1.097602664923663, + "learning_rate": 9.998956285609208e-06, + "loss": 1.1369, + "step": 8030 + }, + { + "epoch": 0.31798137198679033, + "grad_norm": 1.0709544024146305, + "learning_rate": 9.998908743738184e-06, + "loss": 1.147, + "step": 8040 + }, + { + "epoch": 0.31837687120567937, + "grad_norm": 1.0310070238319275, + "learning_rate": 9.998860143183932e-06, + "loss": 1.1244, + "step": 8050 + }, + { + "epoch": 0.3187723704245684, + "grad_norm": 1.0136016687133358, + "learning_rate": 9.99881048395674e-06, + "loss": 1.1614, + "step": 8060 + }, + { + "epoch": 0.31916786964345745, + "grad_norm": 1.0670458566600354, + "learning_rate": 9.99875976606713e-06, + "loss": 1.1466, + "step": 8070 + }, + { + "epoch": 0.3195633688623465, + "grad_norm": 1.0068615908917462, + "learning_rate": 9.998707989525843e-06, + "loss": 1.1265, + "step": 8080 + }, + { + "epoch": 0.3199588680812355, + "grad_norm": 1.0486530760868977, + "learning_rate": 9.99865515434384e-06, + "loss": 1.1256, + "step": 8090 + }, + { + "epoch": 0.32035436730012457, + "grad_norm": 1.0650042476163608, + "learning_rate": 9.998601260532314e-06, + "loss": 1.135, + "step": 8100 + }, + { + "epoch": 0.3207498665190136, + "grad_norm": 1.0213542768808401, + "learning_rate": 9.998546308102678e-06, + "loss": 1.1319, + "step": 8110 + }, + { + "epoch": 0.32114536573790264, + "grad_norm": 1.0696396928211043, + "learning_rate": 9.998490297066569e-06, + "loss": 1.1336, + "step": 8120 + }, + { + "epoch": 0.3215408649567917, + "grad_norm": 1.0680316900358602, + "learning_rate": 9.998433227435852e-06, + "loss": 1.1304, + "step": 8130 + }, + { + "epoch": 0.3219363641756808, + "grad_norm": 0.9966427438402035, + "learning_rate": 9.99837509922261e-06, + "loss": 1.1381, + "step": 8140 + }, + { + "epoch": 0.3223318633945698, + "grad_norm": 1.0016410299457212, + "learning_rate": 9.998315912439156e-06, + "loss": 1.1589, + "step": 8150 + }, + { + "epoch": 0.32272736261345886, + "grad_norm": 1.122299645061776, + "learning_rate": 9.998255667098025e-06, + "loss": 1.1318, + "step": 8160 + }, + { + "epoch": 0.3231228618323479, + "grad_norm": 1.049021357182291, + "learning_rate": 9.998194363211972e-06, + "loss": 1.1326, + "step": 8170 + }, + { + "epoch": 0.32351836105123694, + "grad_norm": 1.0976292616812775, + "learning_rate": 9.998132000793986e-06, + "loss": 1.1203, + "step": 8180 + }, + { + "epoch": 0.323913860270126, + "grad_norm": 1.0319956044056868, + "learning_rate": 9.998068579857269e-06, + "loss": 1.159, + "step": 8190 + }, + { + "epoch": 0.324309359489015, + "grad_norm": 1.0894378415867148, + "learning_rate": 9.998004100415255e-06, + "loss": 1.1515, + "step": 8200 + }, + { + "epoch": 0.32470485870790405, + "grad_norm": 1.049393195654963, + "learning_rate": 9.997938562481599e-06, + "loss": 1.1386, + "step": 8210 + }, + { + "epoch": 0.3251003579267931, + "grad_norm": 1.0516067365416315, + "learning_rate": 9.99787196607018e-06, + "loss": 1.1388, + "step": 8220 + }, + { + "epoch": 0.32549585714568213, + "grad_norm": 1.003688864228743, + "learning_rate": 9.997804311195106e-06, + "loss": 1.1303, + "step": 8230 + }, + { + "epoch": 0.32589135636457117, + "grad_norm": 1.0103497056664168, + "learning_rate": 9.997735597870701e-06, + "loss": 1.1371, + "step": 8240 + }, + { + "epoch": 0.3262868555834602, + "grad_norm": 1.05287598804644, + "learning_rate": 9.997665826111518e-06, + "loss": 1.1368, + "step": 8250 + }, + { + "epoch": 0.32668235480234925, + "grad_norm": 1.175642449912088, + "learning_rate": 9.997594995932333e-06, + "loss": 1.1427, + "step": 8260 + }, + { + "epoch": 0.3270778540212383, + "grad_norm": 1.0735047383599023, + "learning_rate": 9.99752310734815e-06, + "loss": 1.1267, + "step": 8270 + }, + { + "epoch": 0.32747335324012733, + "grad_norm": 0.9774478877035402, + "learning_rate": 9.99745016037419e-06, + "loss": 1.1474, + "step": 8280 + }, + { + "epoch": 0.32786885245901637, + "grad_norm": 1.0373222243376572, + "learning_rate": 9.997376155025904e-06, + "loss": 1.1299, + "step": 8290 + }, + { + "epoch": 0.32826435167790546, + "grad_norm": 1.0347723390073413, + "learning_rate": 9.997301091318964e-06, + "loss": 1.13, + "step": 8300 + }, + { + "epoch": 0.3286598508967945, + "grad_norm": 1.0572562809127977, + "learning_rate": 9.997224969269268e-06, + "loss": 1.1376, + "step": 8310 + }, + { + "epoch": 0.32905535011568354, + "grad_norm": 1.0487817572388178, + "learning_rate": 9.997147788892936e-06, + "loss": 1.1183, + "step": 8320 + }, + { + "epoch": 0.3294508493345726, + "grad_norm": 0.9624167447177824, + "learning_rate": 9.997069550206315e-06, + "loss": 1.1303, + "step": 8330 + }, + { + "epoch": 0.3298463485534616, + "grad_norm": 1.117192972497449, + "learning_rate": 9.996990253225973e-06, + "loss": 1.141, + "step": 8340 + }, + { + "epoch": 0.33024184777235066, + "grad_norm": 1.0512934689987978, + "learning_rate": 9.996909897968705e-06, + "loss": 1.1164, + "step": 8350 + }, + { + "epoch": 0.3306373469912397, + "grad_norm": 1.0639151487875418, + "learning_rate": 9.996828484451531e-06, + "loss": 1.1281, + "step": 8360 + }, + { + "epoch": 0.33103284621012874, + "grad_norm": 1.0482950706258876, + "learning_rate": 9.996746012691687e-06, + "loss": 1.1245, + "step": 8370 + }, + { + "epoch": 0.3314283454290178, + "grad_norm": 0.993302669544618, + "learning_rate": 9.996662482706644e-06, + "loss": 1.1254, + "step": 8380 + }, + { + "epoch": 0.3318238446479068, + "grad_norm": 1.0470210374550084, + "learning_rate": 9.996577894514093e-06, + "loss": 1.1174, + "step": 8390 + }, + { + "epoch": 0.33221934386679586, + "grad_norm": 1.0306278610152682, + "learning_rate": 9.996492248131944e-06, + "loss": 1.1455, + "step": 8400 + }, + { + "epoch": 0.3326148430856849, + "grad_norm": 1.062753211878572, + "learning_rate": 9.996405543578339e-06, + "loss": 1.1423, + "step": 8410 + }, + { + "epoch": 0.33301034230457394, + "grad_norm": 1.0500947550400117, + "learning_rate": 9.996317780871638e-06, + "loss": 1.1468, + "step": 8420 + }, + { + "epoch": 0.333405841523463, + "grad_norm": 1.0349615376872283, + "learning_rate": 9.99622896003043e-06, + "loss": 1.124, + "step": 8430 + }, + { + "epoch": 0.333801340742352, + "grad_norm": 1.0565068248985459, + "learning_rate": 9.996139081073524e-06, + "loss": 1.1457, + "step": 8440 + }, + { + "epoch": 0.33419683996124105, + "grad_norm": 1.0076749331975892, + "learning_rate": 9.996048144019957e-06, + "loss": 1.1264, + "step": 8450 + }, + { + "epoch": 0.3345923391801301, + "grad_norm": 1.016972936264471, + "learning_rate": 9.995956148888983e-06, + "loss": 1.1515, + "step": 8460 + }, + { + "epoch": 0.3349878383990192, + "grad_norm": 0.9848720500399132, + "learning_rate": 9.99586309570009e-06, + "loss": 1.1346, + "step": 8470 + }, + { + "epoch": 0.33538333761790823, + "grad_norm": 1.0678641620210505, + "learning_rate": 9.995768984472985e-06, + "loss": 1.121, + "step": 8480 + }, + { + "epoch": 0.33577883683679727, + "grad_norm": 1.0260256688449667, + "learning_rate": 9.995673815227596e-06, + "loss": 1.1432, + "step": 8490 + }, + { + "epoch": 0.3361743360556863, + "grad_norm": 1.0527333136612878, + "learning_rate": 9.99557758798408e-06, + "loss": 1.1533, + "step": 8500 + }, + { + "epoch": 0.33656983527457535, + "grad_norm": 1.0512450400570732, + "learning_rate": 9.995480302762816e-06, + "loss": 1.1211, + "step": 8510 + }, + { + "epoch": 0.3369653344934644, + "grad_norm": 0.9824509401570435, + "learning_rate": 9.995381959584407e-06, + "loss": 1.1213, + "step": 8520 + }, + { + "epoch": 0.3373608337123534, + "grad_norm": 1.0059149965826761, + "learning_rate": 9.995282558469681e-06, + "loss": 1.1261, + "step": 8530 + }, + { + "epoch": 0.33775633293124246, + "grad_norm": 1.024361140946889, + "learning_rate": 9.995182099439689e-06, + "loss": 1.1297, + "step": 8540 + }, + { + "epoch": 0.3381518321501315, + "grad_norm": 1.0198188759211742, + "learning_rate": 9.995080582515707e-06, + "loss": 1.1395, + "step": 8550 + }, + { + "epoch": 0.33854733136902054, + "grad_norm": 1.0720153277284645, + "learning_rate": 9.994978007719235e-06, + "loss": 1.1357, + "step": 8560 + }, + { + "epoch": 0.3389428305879096, + "grad_norm": 1.095374327537891, + "learning_rate": 9.994874375071995e-06, + "loss": 1.1204, + "step": 8570 + }, + { + "epoch": 0.3393383298067986, + "grad_norm": 1.0500355357486757, + "learning_rate": 9.994769684595934e-06, + "loss": 1.1193, + "step": 8580 + }, + { + "epoch": 0.33973382902568766, + "grad_norm": 1.0550273474361438, + "learning_rate": 9.994663936313228e-06, + "loss": 1.1327, + "step": 8590 + }, + { + "epoch": 0.3401293282445767, + "grad_norm": 1.0372600544669366, + "learning_rate": 9.994557130246269e-06, + "loss": 1.1109, + "step": 8600 + }, + { + "epoch": 0.34052482746346574, + "grad_norm": 1.0063154344165048, + "learning_rate": 9.994449266417679e-06, + "loss": 1.1425, + "step": 8610 + }, + { + "epoch": 0.3409203266823548, + "grad_norm": 1.003439581413564, + "learning_rate": 9.994340344850297e-06, + "loss": 1.1263, + "step": 8620 + }, + { + "epoch": 0.3413158259012438, + "grad_norm": 1.020846517256961, + "learning_rate": 9.994230365567196e-06, + "loss": 1.1342, + "step": 8630 + }, + { + "epoch": 0.3417113251201329, + "grad_norm": 0.9552756004207231, + "learning_rate": 9.994119328591665e-06, + "loss": 1.1352, + "step": 8640 + }, + { + "epoch": 0.34210682433902195, + "grad_norm": 1.029396470406454, + "learning_rate": 9.99400723394722e-06, + "loss": 1.1294, + "step": 8650 + }, + { + "epoch": 0.342502323557911, + "grad_norm": 0.9694119140311189, + "learning_rate": 9.993894081657603e-06, + "loss": 1.134, + "step": 8660 + }, + { + "epoch": 0.34289782277680003, + "grad_norm": 0.953088343990665, + "learning_rate": 9.993779871746773e-06, + "loss": 1.1296, + "step": 8670 + }, + { + "epoch": 0.34329332199568907, + "grad_norm": 1.0865767299958913, + "learning_rate": 9.993664604238922e-06, + "loss": 1.1428, + "step": 8680 + }, + { + "epoch": 0.3436888212145781, + "grad_norm": 1.0647804472961102, + "learning_rate": 9.99354827915846e-06, + "loss": 1.129, + "step": 8690 + }, + { + "epoch": 0.34408432043346715, + "grad_norm": 1.0618258705063175, + "learning_rate": 9.993430896530023e-06, + "loss": 1.1537, + "step": 8700 + }, + { + "epoch": 0.3444798196523562, + "grad_norm": 1.1019951904190435, + "learning_rate": 9.993312456378473e-06, + "loss": 1.1248, + "step": 8710 + }, + { + "epoch": 0.34487531887124523, + "grad_norm": 0.982679017120889, + "learning_rate": 9.993192958728887e-06, + "loss": 1.1227, + "step": 8720 + }, + { + "epoch": 0.34527081809013427, + "grad_norm": 1.0440148335700177, + "learning_rate": 9.993072403606579e-06, + "loss": 1.1243, + "step": 8730 + }, + { + "epoch": 0.3456663173090233, + "grad_norm": 1.0077523853357433, + "learning_rate": 9.992950791037077e-06, + "loss": 1.1447, + "step": 8740 + }, + { + "epoch": 0.34606181652791235, + "grad_norm": 1.0414658166393587, + "learning_rate": 9.992828121046138e-06, + "loss": 1.1117, + "step": 8750 + }, + { + "epoch": 0.3464573157468014, + "grad_norm": 1.1905362292596, + "learning_rate": 9.992704393659742e-06, + "loss": 1.1233, + "step": 8760 + }, + { + "epoch": 0.3468528149656904, + "grad_norm": 1.0416872679906661, + "learning_rate": 9.99257960890409e-06, + "loss": 1.1383, + "step": 8770 + }, + { + "epoch": 0.34724831418457947, + "grad_norm": 1.1049680371902348, + "learning_rate": 9.992453766805613e-06, + "loss": 1.1306, + "step": 8780 + }, + { + "epoch": 0.3476438134034685, + "grad_norm": 1.0051653624864054, + "learning_rate": 9.992326867390958e-06, + "loss": 1.133, + "step": 8790 + }, + { + "epoch": 0.34803931262235754, + "grad_norm": 0.9983549946079809, + "learning_rate": 9.992198910687e-06, + "loss": 1.1205, + "step": 8800 + }, + { + "epoch": 0.34843481184124664, + "grad_norm": 1.010146381281008, + "learning_rate": 9.992069896720844e-06, + "loss": 1.1259, + "step": 8810 + }, + { + "epoch": 0.3488303110601357, + "grad_norm": 0.9587943420000765, + "learning_rate": 9.991939825519808e-06, + "loss": 1.1407, + "step": 8820 + }, + { + "epoch": 0.3492258102790247, + "grad_norm": 1.02595893601764, + "learning_rate": 9.991808697111438e-06, + "loss": 1.1134, + "step": 8830 + }, + { + "epoch": 0.34962130949791376, + "grad_norm": 1.134712823025914, + "learning_rate": 9.991676511523507e-06, + "loss": 1.1299, + "step": 8840 + }, + { + "epoch": 0.3500168087168028, + "grad_norm": 0.9259903359373105, + "learning_rate": 9.99154326878401e-06, + "loss": 1.1278, + "step": 8850 + }, + { + "epoch": 0.35041230793569184, + "grad_norm": 0.999047646858415, + "learning_rate": 9.991408968921164e-06, + "loss": 1.1308, + "step": 8860 + }, + { + "epoch": 0.3508078071545809, + "grad_norm": 1.0416490559257132, + "learning_rate": 9.991273611963413e-06, + "loss": 1.1095, + "step": 8870 + }, + { + "epoch": 0.3512033063734699, + "grad_norm": 1.0280117276909009, + "learning_rate": 9.991137197939422e-06, + "loss": 1.1319, + "step": 8880 + }, + { + "epoch": 0.35159880559235895, + "grad_norm": 0.9683079811229817, + "learning_rate": 9.990999726878082e-06, + "loss": 1.1366, + "step": 8890 + }, + { + "epoch": 0.351994304811248, + "grad_norm": 1.084251411653226, + "learning_rate": 9.990861198808505e-06, + "loss": 1.1248, + "step": 8900 + }, + { + "epoch": 0.35238980403013703, + "grad_norm": 1.1429364337151708, + "learning_rate": 9.990721613760033e-06, + "loss": 1.1426, + "step": 8910 + }, + { + "epoch": 0.35278530324902607, + "grad_norm": 1.0592192729220058, + "learning_rate": 9.990580971762222e-06, + "loss": 1.1104, + "step": 8920 + }, + { + "epoch": 0.3531808024679151, + "grad_norm": 1.0719208180664814, + "learning_rate": 9.990439272844864e-06, + "loss": 1.1251, + "step": 8930 + }, + { + "epoch": 0.35357630168680415, + "grad_norm": 0.9872484475798681, + "learning_rate": 9.990296517037965e-06, + "loss": 1.1137, + "step": 8940 + }, + { + "epoch": 0.3539718009056932, + "grad_norm": 1.059548749398577, + "learning_rate": 9.990152704371757e-06, + "loss": 1.1403, + "step": 8950 + }, + { + "epoch": 0.35436730012458223, + "grad_norm": 1.0636950555553508, + "learning_rate": 9.990007834876698e-06, + "loss": 1.1236, + "step": 8960 + }, + { + "epoch": 0.35476279934347127, + "grad_norm": 0.9765624645329233, + "learning_rate": 9.98986190858347e-06, + "loss": 1.1381, + "step": 8970 + }, + { + "epoch": 0.35515829856236036, + "grad_norm": 1.0347913337210208, + "learning_rate": 9.989714925522978e-06, + "loss": 1.1273, + "step": 8980 + }, + { + "epoch": 0.3555537977812494, + "grad_norm": 0.9929768089068478, + "learning_rate": 9.989566885726348e-06, + "loss": 1.1327, + "step": 8990 + }, + { + "epoch": 0.35594929700013844, + "grad_norm": 1.0786578398767044, + "learning_rate": 9.989417789224933e-06, + "loss": 1.1317, + "step": 9000 + }, + { + "epoch": 0.3563447962190275, + "grad_norm": 1.0329841930485364, + "learning_rate": 9.989267636050312e-06, + "loss": 1.1167, + "step": 9010 + }, + { + "epoch": 0.3567402954379165, + "grad_norm": 1.089220080518237, + "learning_rate": 9.989116426234282e-06, + "loss": 1.1352, + "step": 9020 + }, + { + "epoch": 0.35713579465680556, + "grad_norm": 0.9763428588628974, + "learning_rate": 9.988964159808868e-06, + "loss": 1.1391, + "step": 9030 + }, + { + "epoch": 0.3575312938756946, + "grad_norm": 1.0717155765345276, + "learning_rate": 9.988810836806316e-06, + "loss": 1.1282, + "step": 9040 + }, + { + "epoch": 0.35792679309458364, + "grad_norm": 0.9923632888709168, + "learning_rate": 9.988656457259098e-06, + "loss": 1.1256, + "step": 9050 + }, + { + "epoch": 0.3583222923134727, + "grad_norm": 1.1279892149995216, + "learning_rate": 9.988501021199909e-06, + "loss": 1.1184, + "step": 9060 + }, + { + "epoch": 0.3587177915323617, + "grad_norm": 1.1052382595200194, + "learning_rate": 9.98834452866167e-06, + "loss": 1.0957, + "step": 9070 + }, + { + "epoch": 0.35911329075125076, + "grad_norm": 1.051273066699656, + "learning_rate": 9.988186979677516e-06, + "loss": 1.1223, + "step": 9080 + }, + { + "epoch": 0.3595087899701398, + "grad_norm": 1.1603777999114528, + "learning_rate": 9.988028374280823e-06, + "loss": 1.1247, + "step": 9090 + }, + { + "epoch": 0.35990428918902884, + "grad_norm": 1.0289379139218968, + "learning_rate": 9.987868712505173e-06, + "loss": 1.1153, + "step": 9100 + }, + { + "epoch": 0.3602997884079179, + "grad_norm": 1.01444739938161, + "learning_rate": 9.987707994384384e-06, + "loss": 1.1209, + "step": 9110 + }, + { + "epoch": 0.3606952876268069, + "grad_norm": 1.0022852699404514, + "learning_rate": 9.987546219952493e-06, + "loss": 1.1272, + "step": 9120 + }, + { + "epoch": 0.36109078684569595, + "grad_norm": 1.0095822304412834, + "learning_rate": 9.98738338924376e-06, + "loss": 1.1376, + "step": 9130 + }, + { + "epoch": 0.36148628606458505, + "grad_norm": 1.0494025047808524, + "learning_rate": 9.987219502292669e-06, + "loss": 1.1121, + "step": 9140 + }, + { + "epoch": 0.3618817852834741, + "grad_norm": 1.1322077048475407, + "learning_rate": 9.98705455913393e-06, + "loss": 1.126, + "step": 9150 + }, + { + "epoch": 0.36227728450236313, + "grad_norm": 1.1236124912176868, + "learning_rate": 9.986888559802475e-06, + "loss": 1.1224, + "step": 9160 + }, + { + "epoch": 0.36267278372125217, + "grad_norm": 0.9997482962234848, + "learning_rate": 9.986721504333459e-06, + "loss": 1.1196, + "step": 9170 + }, + { + "epoch": 0.3630682829401412, + "grad_norm": 0.9925651425548624, + "learning_rate": 9.98655339276226e-06, + "loss": 1.127, + "step": 9180 + }, + { + "epoch": 0.36346378215903025, + "grad_norm": 1.077678112281237, + "learning_rate": 9.986384225124486e-06, + "loss": 1.1072, + "step": 9190 + }, + { + "epoch": 0.3638592813779193, + "grad_norm": 1.073854043618709, + "learning_rate": 9.98621400145596e-06, + "loss": 1.1167, + "step": 9200 + }, + { + "epoch": 0.3642547805968083, + "grad_norm": 1.0539842850855157, + "learning_rate": 9.986042721792733e-06, + "loss": 1.1262, + "step": 9210 + }, + { + "epoch": 0.36465027981569736, + "grad_norm": 1.1017431606474788, + "learning_rate": 9.985870386171079e-06, + "loss": 1.134, + "step": 9220 + }, + { + "epoch": 0.3650457790345864, + "grad_norm": 1.0682422190223901, + "learning_rate": 9.985696994627495e-06, + "loss": 1.1041, + "step": 9230 + }, + { + "epoch": 0.36544127825347544, + "grad_norm": 1.0477678757749243, + "learning_rate": 9.985522547198705e-06, + "loss": 1.1007, + "step": 9240 + }, + { + "epoch": 0.3658367774723645, + "grad_norm": 1.1042800116769727, + "learning_rate": 9.985347043921651e-06, + "loss": 1.1043, + "step": 9250 + }, + { + "epoch": 0.3662322766912535, + "grad_norm": 0.997968398430382, + "learning_rate": 9.985170484833504e-06, + "loss": 1.1076, + "step": 9260 + }, + { + "epoch": 0.36662777591014256, + "grad_norm": 0.977491959015956, + "learning_rate": 9.984992869971656e-06, + "loss": 1.1237, + "step": 9270 + }, + { + "epoch": 0.3670232751290316, + "grad_norm": 1.1095048264133744, + "learning_rate": 9.98481419937372e-06, + "loss": 1.1371, + "step": 9280 + }, + { + "epoch": 0.36741877434792064, + "grad_norm": 1.037416916737659, + "learning_rate": 9.98463447307754e-06, + "loss": 1.1153, + "step": 9290 + }, + { + "epoch": 0.3678142735668097, + "grad_norm": 1.0086105496331816, + "learning_rate": 9.984453691121174e-06, + "loss": 1.1204, + "step": 9300 + }, + { + "epoch": 0.3682097727856988, + "grad_norm": 1.0816176705148972, + "learning_rate": 9.984271853542913e-06, + "loss": 1.1, + "step": 9310 + }, + { + "epoch": 0.3686052720045878, + "grad_norm": 0.9572557457076023, + "learning_rate": 9.984088960381262e-06, + "loss": 1.1277, + "step": 9320 + }, + { + "epoch": 0.36900077122347685, + "grad_norm": 1.0295004448675464, + "learning_rate": 9.98390501167496e-06, + "loss": 1.1171, + "step": 9330 + }, + { + "epoch": 0.3693962704423659, + "grad_norm": 1.0427358996392067, + "learning_rate": 9.98372000746296e-06, + "loss": 1.1357, + "step": 9340 + }, + { + "epoch": 0.36979176966125493, + "grad_norm": 1.116715702244539, + "learning_rate": 9.983533947784445e-06, + "loss": 1.1302, + "step": 9350 + }, + { + "epoch": 0.37018726888014397, + "grad_norm": 1.0522704178594473, + "learning_rate": 9.98334683267882e-06, + "loss": 1.1122, + "step": 9360 + }, + { + "epoch": 0.370582768099033, + "grad_norm": 1.0634456329625646, + "learning_rate": 9.983158662185711e-06, + "loss": 1.1228, + "step": 9370 + }, + { + "epoch": 0.37097826731792205, + "grad_norm": 1.0710293850768031, + "learning_rate": 9.98296943634497e-06, + "loss": 1.136, + "step": 9380 + }, + { + "epoch": 0.3713737665368111, + "grad_norm": 1.0353189911976883, + "learning_rate": 9.98277915519667e-06, + "loss": 1.1223, + "step": 9390 + }, + { + "epoch": 0.37176926575570013, + "grad_norm": 1.0050316874217902, + "learning_rate": 9.982587818781111e-06, + "loss": 1.127, + "step": 9400 + }, + { + "epoch": 0.37216476497458917, + "grad_norm": 1.0339994239160502, + "learning_rate": 9.982395427138816e-06, + "loss": 1.1095, + "step": 9410 + }, + { + "epoch": 0.3725602641934782, + "grad_norm": 1.104042746902873, + "learning_rate": 9.982201980310529e-06, + "loss": 1.1227, + "step": 9420 + }, + { + "epoch": 0.37295576341236725, + "grad_norm": 0.9584769582046361, + "learning_rate": 9.982007478337216e-06, + "loss": 1.1271, + "step": 9430 + }, + { + "epoch": 0.3733512626312563, + "grad_norm": 0.9976877638030883, + "learning_rate": 9.981811921260074e-06, + "loss": 1.1175, + "step": 9440 + }, + { + "epoch": 0.3737467618501453, + "grad_norm": 0.9805635456300361, + "learning_rate": 9.981615309120516e-06, + "loss": 1.1071, + "step": 9450 + }, + { + "epoch": 0.37414226106903437, + "grad_norm": 1.0048154974103545, + "learning_rate": 9.981417641960181e-06, + "loss": 1.1029, + "step": 9460 + }, + { + "epoch": 0.3745377602879234, + "grad_norm": 1.0439486224751233, + "learning_rate": 9.981218919820932e-06, + "loss": 1.1237, + "step": 9470 + }, + { + "epoch": 0.3749332595068125, + "grad_norm": 1.0164029900498224, + "learning_rate": 9.981019142744857e-06, + "loss": 1.1158, + "step": 9480 + }, + { + "epoch": 0.37532875872570154, + "grad_norm": 1.0254852052252992, + "learning_rate": 9.980818310774261e-06, + "loss": 1.1131, + "step": 9490 + }, + { + "epoch": 0.3757242579445906, + "grad_norm": 1.1757298652288735, + "learning_rate": 9.98061642395168e-06, + "loss": 1.1077, + "step": 9500 + }, + { + "epoch": 0.3761197571634796, + "grad_norm": 1.106176068094394, + "learning_rate": 9.98041348231987e-06, + "loss": 1.0995, + "step": 9510 + }, + { + "epoch": 0.37651525638236866, + "grad_norm": 1.0229181142716006, + "learning_rate": 9.980209485921808e-06, + "loss": 1.0947, + "step": 9520 + }, + { + "epoch": 0.3769107556012577, + "grad_norm": 1.05081095195992, + "learning_rate": 9.980004434800701e-06, + "loss": 1.1137, + "step": 9530 + }, + { + "epoch": 0.37730625482014674, + "grad_norm": 0.9923798410443914, + "learning_rate": 9.979798328999972e-06, + "loss": 1.1011, + "step": 9540 + }, + { + "epoch": 0.3777017540390358, + "grad_norm": 0.9660535259951161, + "learning_rate": 9.97959116856327e-06, + "loss": 1.1131, + "step": 9550 + }, + { + "epoch": 0.3780972532579248, + "grad_norm": 1.0503255304124983, + "learning_rate": 9.979382953534473e-06, + "loss": 1.1121, + "step": 9560 + }, + { + "epoch": 0.37849275247681385, + "grad_norm": 1.0367437186998356, + "learning_rate": 9.979173683957672e-06, + "loss": 1.1252, + "step": 9570 + }, + { + "epoch": 0.3788882516957029, + "grad_norm": 1.0852408275647987, + "learning_rate": 9.97896335987719e-06, + "loss": 1.0986, + "step": 9580 + }, + { + "epoch": 0.37928375091459193, + "grad_norm": 1.102797553310787, + "learning_rate": 9.978751981337567e-06, + "loss": 1.1327, + "step": 9590 + }, + { + "epoch": 0.37967925013348097, + "grad_norm": 1.0368463735572258, + "learning_rate": 9.978539548383573e-06, + "loss": 1.1194, + "step": 9600 + }, + { + "epoch": 0.38007474935237, + "grad_norm": 1.014238982571862, + "learning_rate": 9.978326061060195e-06, + "loss": 1.1146, + "step": 9610 + }, + { + "epoch": 0.38047024857125905, + "grad_norm": 1.08945631058018, + "learning_rate": 9.978111519412648e-06, + "loss": 1.1038, + "step": 9620 + }, + { + "epoch": 0.3808657477901481, + "grad_norm": 1.0525206968371366, + "learning_rate": 9.977895923486368e-06, + "loss": 1.1058, + "step": 9630 + }, + { + "epoch": 0.38126124700903713, + "grad_norm": 1.0878230503104729, + "learning_rate": 9.97767927332701e-06, + "loss": 1.1123, + "step": 9640 + }, + { + "epoch": 0.3816567462279262, + "grad_norm": 1.0400449299260532, + "learning_rate": 9.977461568980464e-06, + "loss": 1.1126, + "step": 9650 + }, + { + "epoch": 0.38205224544681526, + "grad_norm": 1.0395133989568093, + "learning_rate": 9.977242810492832e-06, + "loss": 1.1055, + "step": 9660 + }, + { + "epoch": 0.3824477446657043, + "grad_norm": 0.9849476878072775, + "learning_rate": 9.977022997910443e-06, + "loss": 1.1194, + "step": 9670 + }, + { + "epoch": 0.38284324388459334, + "grad_norm": 1.0628990205483286, + "learning_rate": 9.97680213127985e-06, + "loss": 1.1225, + "step": 9680 + }, + { + "epoch": 0.3832387431034824, + "grad_norm": 1.07299013404265, + "learning_rate": 9.97658021064783e-06, + "loss": 1.1099, + "step": 9690 + }, + { + "epoch": 0.3836342423223714, + "grad_norm": 1.0547605927236203, + "learning_rate": 9.97635723606138e-06, + "loss": 1.1022, + "step": 9700 + }, + { + "epoch": 0.38402974154126046, + "grad_norm": 1.0043065922738772, + "learning_rate": 9.976133207567724e-06, + "loss": 1.1013, + "step": 9710 + }, + { + "epoch": 0.3844252407601495, + "grad_norm": 1.0295427867283096, + "learning_rate": 9.975908125214306e-06, + "loss": 1.1064, + "step": 9720 + }, + { + "epoch": 0.38482073997903854, + "grad_norm": 1.0029088222979348, + "learning_rate": 9.975681989048797e-06, + "loss": 1.1157, + "step": 9730 + }, + { + "epoch": 0.3852162391979276, + "grad_norm": 1.0216732831230682, + "learning_rate": 9.975454799119086e-06, + "loss": 1.1314, + "step": 9740 + }, + { + "epoch": 0.3856117384168166, + "grad_norm": 0.9637745317609469, + "learning_rate": 9.975226555473289e-06, + "loss": 1.1358, + "step": 9750 + }, + { + "epoch": 0.38600723763570566, + "grad_norm": 1.0179867247304883, + "learning_rate": 9.974997258159744e-06, + "loss": 1.1123, + "step": 9760 + }, + { + "epoch": 0.3864027368545947, + "grad_norm": 1.0121486615203783, + "learning_rate": 9.974766907227012e-06, + "loss": 1.1238, + "step": 9770 + }, + { + "epoch": 0.38679823607348374, + "grad_norm": 0.9918105729938603, + "learning_rate": 9.974535502723878e-06, + "loss": 1.1036, + "step": 9780 + }, + { + "epoch": 0.3871937352923728, + "grad_norm": 1.0458694498923289, + "learning_rate": 9.97430304469935e-06, + "loss": 1.1226, + "step": 9790 + }, + { + "epoch": 0.3875892345112618, + "grad_norm": 0.9793050895150185, + "learning_rate": 9.974069533202656e-06, + "loss": 1.1171, + "step": 9800 + }, + { + "epoch": 0.3879847337301509, + "grad_norm": 0.9793473334784043, + "learning_rate": 9.973834968283253e-06, + "loss": 1.1346, + "step": 9810 + }, + { + "epoch": 0.38838023294903995, + "grad_norm": 1.001509588770705, + "learning_rate": 9.973599349990815e-06, + "loss": 1.1242, + "step": 9820 + }, + { + "epoch": 0.388775732167929, + "grad_norm": 0.9710508476739187, + "learning_rate": 9.973362678375245e-06, + "loss": 1.1104, + "step": 9830 + }, + { + "epoch": 0.38917123138681803, + "grad_norm": 1.0417830548414544, + "learning_rate": 9.973124953486664e-06, + "loss": 1.1068, + "step": 9840 + }, + { + "epoch": 0.38956673060570707, + "grad_norm": 0.9162198755636622, + "learning_rate": 9.972886175375418e-06, + "loss": 1.0954, + "step": 9850 + }, + { + "epoch": 0.3899622298245961, + "grad_norm": 1.0406694536486392, + "learning_rate": 9.972646344092075e-06, + "loss": 1.1072, + "step": 9860 + }, + { + "epoch": 0.39035772904348515, + "grad_norm": 1.0711449808285272, + "learning_rate": 9.972405459687432e-06, + "loss": 1.1126, + "step": 9870 + }, + { + "epoch": 0.3907532282623742, + "grad_norm": 0.9852329077584552, + "learning_rate": 9.9721635222125e-06, + "loss": 1.0983, + "step": 9880 + }, + { + "epoch": 0.3911487274812632, + "grad_norm": 1.0693895343040385, + "learning_rate": 9.971920531718515e-06, + "loss": 1.0997, + "step": 9890 + }, + { + "epoch": 0.39154422670015226, + "grad_norm": 0.9214177095460434, + "learning_rate": 9.971676488256944e-06, + "loss": 1.1064, + "step": 9900 + }, + { + "epoch": 0.3919397259190413, + "grad_norm": 1.0337024299120587, + "learning_rate": 9.971431391879467e-06, + "loss": 1.1204, + "step": 9910 + }, + { + "epoch": 0.39233522513793034, + "grad_norm": 0.9560437759606772, + "learning_rate": 9.971185242637994e-06, + "loss": 1.1122, + "step": 9920 + }, + { + "epoch": 0.3927307243568194, + "grad_norm": 1.0551011377239838, + "learning_rate": 9.970938040584654e-06, + "loss": 1.1075, + "step": 9930 + }, + { + "epoch": 0.3931262235757084, + "grad_norm": 1.151113115049536, + "learning_rate": 9.970689785771798e-06, + "loss": 1.0967, + "step": 9940 + }, + { + "epoch": 0.39352172279459746, + "grad_norm": 1.0201224138961769, + "learning_rate": 9.970440478252007e-06, + "loss": 1.1, + "step": 9950 + }, + { + "epoch": 0.3939172220134865, + "grad_norm": 1.0795605295628778, + "learning_rate": 9.970190118078076e-06, + "loss": 1.1205, + "step": 9960 + }, + { + "epoch": 0.39431272123237554, + "grad_norm": 1.0083725236350867, + "learning_rate": 9.969938705303027e-06, + "loss": 1.1073, + "step": 9970 + }, + { + "epoch": 0.39470822045126464, + "grad_norm": 1.121994911796126, + "learning_rate": 9.969686239980108e-06, + "loss": 1.1101, + "step": 9980 + }, + { + "epoch": 0.3951037196701537, + "grad_norm": 1.063411115963048, + "learning_rate": 9.969432722162783e-06, + "loss": 1.1193, + "step": 9990 + }, + { + "epoch": 0.3954992188890427, + "grad_norm": 0.9878103434786119, + "learning_rate": 9.969178151904747e-06, + "loss": 1.1162, + "step": 10000 + }, + { + "epoch": 0.39589471810793175, + "grad_norm": 1.0938036325499716, + "learning_rate": 9.96892252925991e-06, + "loss": 1.1101, + "step": 10010 + }, + { + "epoch": 0.3962902173268208, + "grad_norm": 1.0270892809102057, + "learning_rate": 9.96866585428241e-06, + "loss": 1.1021, + "step": 10020 + }, + { + "epoch": 0.39668571654570983, + "grad_norm": 1.0218555121076212, + "learning_rate": 9.968408127026607e-06, + "loss": 1.1063, + "step": 10030 + }, + { + "epoch": 0.39708121576459887, + "grad_norm": 1.0649632847639012, + "learning_rate": 9.96814934754708e-06, + "loss": 1.0916, + "step": 10040 + }, + { + "epoch": 0.3974767149834879, + "grad_norm": 0.9610939878087368, + "learning_rate": 9.967889515898639e-06, + "loss": 1.1209, + "step": 10050 + }, + { + "epoch": 0.39787221420237695, + "grad_norm": 0.9618619533903922, + "learning_rate": 9.967628632136309e-06, + "loss": 1.0864, + "step": 10060 + }, + { + "epoch": 0.398267713421266, + "grad_norm": 1.0807077777484355, + "learning_rate": 9.967366696315341e-06, + "loss": 1.1199, + "step": 10070 + }, + { + "epoch": 0.39866321264015503, + "grad_norm": 1.0834602594062668, + "learning_rate": 9.967103708491208e-06, + "loss": 1.1092, + "step": 10080 + }, + { + "epoch": 0.39905871185904407, + "grad_norm": 1.1191855887555566, + "learning_rate": 9.966839668719606e-06, + "loss": 1.1088, + "step": 10090 + }, + { + "epoch": 0.3994542110779331, + "grad_norm": 0.9072934458458796, + "learning_rate": 9.966574577056456e-06, + "loss": 1.0996, + "step": 10100 + }, + { + "epoch": 0.39984971029682215, + "grad_norm": 1.0565222435402193, + "learning_rate": 9.966308433557898e-06, + "loss": 1.087, + "step": 10110 + }, + { + "epoch": 0.4002452095157112, + "grad_norm": 0.9801589633119278, + "learning_rate": 9.9660412382803e-06, + "loss": 1.0973, + "step": 10120 + }, + { + "epoch": 0.4006407087346002, + "grad_norm": 1.0435532438776338, + "learning_rate": 9.965772991280245e-06, + "loss": 1.1139, + "step": 10130 + }, + { + "epoch": 0.40103620795348927, + "grad_norm": 0.9504987792409991, + "learning_rate": 9.965503692614546e-06, + "loss": 1.105, + "step": 10140 + }, + { + "epoch": 0.40143170717237836, + "grad_norm": 0.9888649298002512, + "learning_rate": 9.965233342340234e-06, + "loss": 1.1218, + "step": 10150 + }, + { + "epoch": 0.4018272063912674, + "grad_norm": 1.0699139847993098, + "learning_rate": 9.964961940514566e-06, + "loss": 1.1051, + "step": 10160 + }, + { + "epoch": 0.40222270561015644, + "grad_norm": 1.0575019427475523, + "learning_rate": 9.964689487195018e-06, + "loss": 1.0961, + "step": 10170 + }, + { + "epoch": 0.4026182048290455, + "grad_norm": 1.0329371927847701, + "learning_rate": 9.964415982439295e-06, + "loss": 1.1151, + "step": 10180 + }, + { + "epoch": 0.4030137040479345, + "grad_norm": 1.071340976149401, + "learning_rate": 9.964141426305317e-06, + "loss": 1.0811, + "step": 10190 + }, + { + "epoch": 0.40340920326682356, + "grad_norm": 1.004672370789076, + "learning_rate": 9.96386581885123e-06, + "loss": 1.1283, + "step": 10200 + }, + { + "epoch": 0.4038047024857126, + "grad_norm": 1.0359033718828674, + "learning_rate": 9.963589160135408e-06, + "loss": 1.1197, + "step": 10210 + }, + { + "epoch": 0.40420020170460164, + "grad_norm": 1.0139309760656154, + "learning_rate": 9.963311450216436e-06, + "loss": 1.1142, + "step": 10220 + }, + { + "epoch": 0.4045957009234907, + "grad_norm": 0.980819296100321, + "learning_rate": 9.963032689153133e-06, + "loss": 1.1149, + "step": 10230 + }, + { + "epoch": 0.4049912001423797, + "grad_norm": 1.0669016594371714, + "learning_rate": 9.962752877004533e-06, + "loss": 1.1158, + "step": 10240 + }, + { + "epoch": 0.40538669936126875, + "grad_norm": 1.009605538851146, + "learning_rate": 9.962472013829897e-06, + "loss": 1.1037, + "step": 10250 + }, + { + "epoch": 0.4057821985801578, + "grad_norm": 1.0624790508946451, + "learning_rate": 9.962190099688707e-06, + "loss": 1.0977, + "step": 10260 + }, + { + "epoch": 0.40617769779904683, + "grad_norm": 1.027668060928655, + "learning_rate": 9.961907134640665e-06, + "loss": 1.1112, + "step": 10270 + }, + { + "epoch": 0.4065731970179359, + "grad_norm": 0.9587393153295167, + "learning_rate": 9.961623118745702e-06, + "loss": 1.0933, + "step": 10280 + }, + { + "epoch": 0.4069686962368249, + "grad_norm": 0.9798009593140417, + "learning_rate": 9.961338052063966e-06, + "loss": 1.0934, + "step": 10290 + }, + { + "epoch": 0.40736419545571395, + "grad_norm": 1.0802394118511471, + "learning_rate": 9.961051934655829e-06, + "loss": 1.0963, + "step": 10300 + }, + { + "epoch": 0.407759694674603, + "grad_norm": 1.0214608892621604, + "learning_rate": 9.960764766581884e-06, + "loss": 1.112, + "step": 10310 + }, + { + "epoch": 0.4081551938934921, + "grad_norm": 1.0436194949255686, + "learning_rate": 9.960476547902954e-06, + "loss": 1.0966, + "step": 10320 + }, + { + "epoch": 0.4085506931123811, + "grad_norm": 0.9598798853908683, + "learning_rate": 9.960187278680071e-06, + "loss": 1.1026, + "step": 10330 + }, + { + "epoch": 0.40894619233127016, + "grad_norm": 0.9534817754362086, + "learning_rate": 9.959896958974504e-06, + "loss": 1.1222, + "step": 10340 + }, + { + "epoch": 0.4093416915501592, + "grad_norm": 1.0059454448103347, + "learning_rate": 9.959605588847734e-06, + "loss": 1.11, + "step": 10350 + }, + { + "epoch": 0.40973719076904824, + "grad_norm": 1.0241136722645863, + "learning_rate": 9.95931316836147e-06, + "loss": 1.1033, + "step": 10360 + }, + { + "epoch": 0.4101326899879373, + "grad_norm": 1.0233542649657095, + "learning_rate": 9.959019697577639e-06, + "loss": 1.0957, + "step": 10370 + }, + { + "epoch": 0.4105281892068263, + "grad_norm": 1.004489393605286, + "learning_rate": 9.958725176558397e-06, + "loss": 1.1182, + "step": 10380 + }, + { + "epoch": 0.41092368842571536, + "grad_norm": 1.066089084963295, + "learning_rate": 9.958429605366116e-06, + "loss": 1.1046, + "step": 10390 + }, + { + "epoch": 0.4113191876446044, + "grad_norm": 1.0556130220433249, + "learning_rate": 9.958132984063391e-06, + "loss": 1.1104, + "step": 10400 + }, + { + "epoch": 0.41171468686349344, + "grad_norm": 1.0577388426627141, + "learning_rate": 9.957835312713047e-06, + "loss": 1.1078, + "step": 10410 + }, + { + "epoch": 0.4121101860823825, + "grad_norm": 1.0253595957999095, + "learning_rate": 9.95753659137812e-06, + "loss": 1.0947, + "step": 10420 + }, + { + "epoch": 0.4125056853012715, + "grad_norm": 1.0002491533261828, + "learning_rate": 9.957236820121877e-06, + "loss": 1.092, + "step": 10430 + }, + { + "epoch": 0.41290118452016056, + "grad_norm": 0.9888996364654783, + "learning_rate": 9.956935999007804e-06, + "loss": 1.1111, + "step": 10440 + }, + { + "epoch": 0.4132966837390496, + "grad_norm": 0.9740027516031293, + "learning_rate": 9.95663412809961e-06, + "loss": 1.1146, + "step": 10450 + }, + { + "epoch": 0.41369218295793864, + "grad_norm": 1.009827198550138, + "learning_rate": 9.956331207461225e-06, + "loss": 1.0992, + "step": 10460 + }, + { + "epoch": 0.4140876821768277, + "grad_norm": 1.0188489363577975, + "learning_rate": 9.956027237156802e-06, + "loss": 1.1024, + "step": 10470 + }, + { + "epoch": 0.41448318139571677, + "grad_norm": 1.0173021251829502, + "learning_rate": 9.95572221725072e-06, + "loss": 1.1033, + "step": 10480 + }, + { + "epoch": 0.4148786806146058, + "grad_norm": 1.0002097050284078, + "learning_rate": 9.955416147807575e-06, + "loss": 1.0835, + "step": 10490 + }, + { + "epoch": 0.41527417983349485, + "grad_norm": 1.0642686991454433, + "learning_rate": 9.955109028892184e-06, + "loss": 1.1141, + "step": 10500 + }, + { + "epoch": 0.4156696790523839, + "grad_norm": 1.1188278694377543, + "learning_rate": 9.954800860569596e-06, + "loss": 1.1086, + "step": 10510 + }, + { + "epoch": 0.41606517827127293, + "grad_norm": 1.0944385863207664, + "learning_rate": 9.95449164290507e-06, + "loss": 1.0843, + "step": 10520 + }, + { + "epoch": 0.41646067749016197, + "grad_norm": 1.0768852933614141, + "learning_rate": 9.954181375964097e-06, + "loss": 1.098, + "step": 10530 + }, + { + "epoch": 0.416856176709051, + "grad_norm": 0.9951046848585466, + "learning_rate": 9.953870059812382e-06, + "loss": 1.1012, + "step": 10540 + }, + { + "epoch": 0.41725167592794005, + "grad_norm": 0.9933143998683207, + "learning_rate": 9.95355769451586e-06, + "loss": 1.0885, + "step": 10550 + }, + { + "epoch": 0.4176471751468291, + "grad_norm": 1.0131764592022374, + "learning_rate": 9.953244280140684e-06, + "loss": 1.1033, + "step": 10560 + }, + { + "epoch": 0.4180426743657181, + "grad_norm": 1.055615563609746, + "learning_rate": 9.952929816753229e-06, + "loss": 1.098, + "step": 10570 + }, + { + "epoch": 0.41843817358460716, + "grad_norm": 1.07471858065964, + "learning_rate": 9.952614304420096e-06, + "loss": 1.1043, + "step": 10580 + }, + { + "epoch": 0.4188336728034962, + "grad_norm": 1.1261206466260603, + "learning_rate": 9.952297743208099e-06, + "loss": 1.0996, + "step": 10590 + }, + { + "epoch": 0.41922917202238524, + "grad_norm": 1.0580983550302636, + "learning_rate": 9.951980133184285e-06, + "loss": 1.0838, + "step": 10600 + }, + { + "epoch": 0.4196246712412743, + "grad_norm": 1.0264943411677727, + "learning_rate": 9.951661474415917e-06, + "loss": 1.0974, + "step": 10610 + }, + { + "epoch": 0.4200201704601633, + "grad_norm": 0.9534541948641568, + "learning_rate": 9.951341766970481e-06, + "loss": 1.1084, + "step": 10620 + }, + { + "epoch": 0.42041566967905236, + "grad_norm": 0.982430076540803, + "learning_rate": 9.951021010915687e-06, + "loss": 1.0972, + "step": 10630 + }, + { + "epoch": 0.4208111688979414, + "grad_norm": 0.9654257368809844, + "learning_rate": 9.950699206319465e-06, + "loss": 1.0962, + "step": 10640 + }, + { + "epoch": 0.4212066681168305, + "grad_norm": 1.0502245195456388, + "learning_rate": 9.950376353249966e-06, + "loss": 1.088, + "step": 10650 + }, + { + "epoch": 0.42160216733571954, + "grad_norm": 0.9216094220145762, + "learning_rate": 9.950052451775566e-06, + "loss": 1.0977, + "step": 10660 + }, + { + "epoch": 0.4219976665546086, + "grad_norm": 0.9815229444734691, + "learning_rate": 9.949727501964865e-06, + "loss": 1.1054, + "step": 10670 + }, + { + "epoch": 0.4223931657734976, + "grad_norm": 1.082794253318771, + "learning_rate": 9.949401503886676e-06, + "loss": 1.1091, + "step": 10680 + }, + { + "epoch": 0.42278866499238665, + "grad_norm": 1.0199739190386292, + "learning_rate": 9.949074457610044e-06, + "loss": 1.0938, + "step": 10690 + }, + { + "epoch": 0.4231841642112757, + "grad_norm": 1.0513838285010413, + "learning_rate": 9.948746363204229e-06, + "loss": 1.0902, + "step": 10700 + }, + { + "epoch": 0.42357966343016473, + "grad_norm": 1.0671073155623783, + "learning_rate": 9.948417220738718e-06, + "loss": 1.0938, + "step": 10710 + }, + { + "epoch": 0.42397516264905377, + "grad_norm": 1.078713729070709, + "learning_rate": 9.948087030283215e-06, + "loss": 1.0994, + "step": 10720 + }, + { + "epoch": 0.4243706618679428, + "grad_norm": 1.0682584409664808, + "learning_rate": 9.947755791907654e-06, + "loss": 1.087, + "step": 10730 + }, + { + "epoch": 0.42476616108683185, + "grad_norm": 1.0971027622201872, + "learning_rate": 9.947423505682178e-06, + "loss": 1.0782, + "step": 10740 + }, + { + "epoch": 0.4251616603057209, + "grad_norm": 1.0131832229168385, + "learning_rate": 9.947090171677167e-06, + "loss": 1.1163, + "step": 10750 + }, + { + "epoch": 0.42555715952460993, + "grad_norm": 1.008127849914707, + "learning_rate": 9.946755789963211e-06, + "loss": 1.094, + "step": 10760 + }, + { + "epoch": 0.42595265874349897, + "grad_norm": 0.9861033669460224, + "learning_rate": 9.94642036061113e-06, + "loss": 1.0984, + "step": 10770 + }, + { + "epoch": 0.426348157962388, + "grad_norm": 0.9440359467572984, + "learning_rate": 9.946083883691955e-06, + "loss": 1.1009, + "step": 10780 + }, + { + "epoch": 0.42674365718127705, + "grad_norm": 0.9137583200995897, + "learning_rate": 9.945746359276954e-06, + "loss": 1.1187, + "step": 10790 + }, + { + "epoch": 0.4271391564001661, + "grad_norm": 1.0012169737165786, + "learning_rate": 9.945407787437604e-06, + "loss": 1.1036, + "step": 10800 + }, + { + "epoch": 0.4275346556190551, + "grad_norm": 1.1283936633576026, + "learning_rate": 9.94506816824561e-06, + "loss": 1.1035, + "step": 10810 + }, + { + "epoch": 0.4279301548379442, + "grad_norm": 1.0048217417799656, + "learning_rate": 9.9447275017729e-06, + "loss": 1.0896, + "step": 10820 + }, + { + "epoch": 0.42832565405683326, + "grad_norm": 0.94677519928578, + "learning_rate": 9.944385788091617e-06, + "loss": 1.0873, + "step": 10830 + }, + { + "epoch": 0.4287211532757223, + "grad_norm": 1.0430445396126522, + "learning_rate": 9.944043027274133e-06, + "loss": 1.0828, + "step": 10840 + }, + { + "epoch": 0.42911665249461134, + "grad_norm": 0.9860688108752107, + "learning_rate": 9.943699219393038e-06, + "loss": 1.1124, + "step": 10850 + }, + { + "epoch": 0.4295121517135004, + "grad_norm": 1.050354458261774, + "learning_rate": 9.943354364521145e-06, + "loss": 1.0949, + "step": 10860 + }, + { + "epoch": 0.4299076509323894, + "grad_norm": 0.9830839591794984, + "learning_rate": 9.943008462731487e-06, + "loss": 1.1098, + "step": 10870 + }, + { + "epoch": 0.43030315015127846, + "grad_norm": 1.0779875148884694, + "learning_rate": 9.942661514097322e-06, + "loss": 1.0864, + "step": 10880 + }, + { + "epoch": 0.4306986493701675, + "grad_norm": 0.9933059459800833, + "learning_rate": 9.942313518692126e-06, + "loss": 1.1168, + "step": 10890 + }, + { + "epoch": 0.43109414858905654, + "grad_norm": 1.0597642753884047, + "learning_rate": 9.9419644765896e-06, + "loss": 1.1016, + "step": 10900 + }, + { + "epoch": 0.4314896478079456, + "grad_norm": 1.037187833468306, + "learning_rate": 9.941614387863666e-06, + "loss": 1.1081, + "step": 10910 + }, + { + "epoch": 0.4318851470268346, + "grad_norm": 1.01108747221716, + "learning_rate": 9.941263252588465e-06, + "loss": 1.1043, + "step": 10920 + }, + { + "epoch": 0.43228064624572365, + "grad_norm": 1.0019618828263877, + "learning_rate": 9.94091107083836e-06, + "loss": 1.1053, + "step": 10930 + }, + { + "epoch": 0.4326761454646127, + "grad_norm": 0.964488851707323, + "learning_rate": 9.94055784268794e-06, + "loss": 1.1211, + "step": 10940 + }, + { + "epoch": 0.43307164468350173, + "grad_norm": 1.0847389521137731, + "learning_rate": 9.94020356821201e-06, + "loss": 1.0846, + "step": 10950 + }, + { + "epoch": 0.4334671439023908, + "grad_norm": 1.0803169854020769, + "learning_rate": 9.939848247485603e-06, + "loss": 1.0989, + "step": 10960 + }, + { + "epoch": 0.4338626431212798, + "grad_norm": 1.1564151971278853, + "learning_rate": 9.939491880583967e-06, + "loss": 1.0813, + "step": 10970 + }, + { + "epoch": 0.43425814234016885, + "grad_norm": 0.9969080807018494, + "learning_rate": 9.939134467582574e-06, + "loss": 1.1055, + "step": 10980 + }, + { + "epoch": 0.43465364155905795, + "grad_norm": 0.995488236802104, + "learning_rate": 9.93877600855712e-06, + "loss": 1.1032, + "step": 10990 + }, + { + "epoch": 0.435049140777947, + "grad_norm": 1.1401382616049591, + "learning_rate": 9.938416503583518e-06, + "loss": 1.1048, + "step": 11000 + }, + { + "epoch": 0.435444639996836, + "grad_norm": 0.9830834040407321, + "learning_rate": 9.938055952737908e-06, + "loss": 1.0994, + "step": 11010 + }, + { + "epoch": 0.43584013921572506, + "grad_norm": 0.966136786648485, + "learning_rate": 9.937694356096646e-06, + "loss": 1.0939, + "step": 11020 + }, + { + "epoch": 0.4362356384346141, + "grad_norm": 1.0545500702021946, + "learning_rate": 9.937331713736313e-06, + "loss": 1.094, + "step": 11030 + }, + { + "epoch": 0.43663113765350314, + "grad_norm": 1.0354111758715492, + "learning_rate": 9.93696802573371e-06, + "loss": 1.1124, + "step": 11040 + }, + { + "epoch": 0.4370266368723922, + "grad_norm": 1.0264134414827726, + "learning_rate": 9.93660329216586e-06, + "loss": 1.1046, + "step": 11050 + }, + { + "epoch": 0.4374221360912812, + "grad_norm": 1.0169260152512325, + "learning_rate": 9.936237513110009e-06, + "loss": 1.1036, + "step": 11060 + }, + { + "epoch": 0.43781763531017026, + "grad_norm": 0.9644859700148494, + "learning_rate": 9.935870688643621e-06, + "loss": 1.1112, + "step": 11070 + }, + { + "epoch": 0.4382131345290593, + "grad_norm": 1.0797495750563293, + "learning_rate": 9.935502818844382e-06, + "loss": 1.095, + "step": 11080 + }, + { + "epoch": 0.43860863374794834, + "grad_norm": 0.9794299641147116, + "learning_rate": 9.935133903790204e-06, + "loss": 1.092, + "step": 11090 + }, + { + "epoch": 0.4390041329668374, + "grad_norm": 1.103512613125175, + "learning_rate": 9.934763943559213e-06, + "loss": 1.099, + "step": 11100 + }, + { + "epoch": 0.4393996321857264, + "grad_norm": 0.9263846713150826, + "learning_rate": 9.934392938229765e-06, + "loss": 1.1171, + "step": 11110 + }, + { + "epoch": 0.43979513140461546, + "grad_norm": 1.0244173963950725, + "learning_rate": 9.934020887880427e-06, + "loss": 1.0991, + "step": 11120 + }, + { + "epoch": 0.4401906306235045, + "grad_norm": 1.0219006301989262, + "learning_rate": 9.933647792589996e-06, + "loss": 1.1033, + "step": 11130 + }, + { + "epoch": 0.44058612984239354, + "grad_norm": 0.9673422144227102, + "learning_rate": 9.933273652437485e-06, + "loss": 1.0825, + "step": 11140 + }, + { + "epoch": 0.44098162906128263, + "grad_norm": 1.051906770079977, + "learning_rate": 9.932898467502135e-06, + "loss": 1.0713, + "step": 11150 + }, + { + "epoch": 0.44137712828017167, + "grad_norm": 1.0131891534524688, + "learning_rate": 9.9325222378634e-06, + "loss": 1.1088, + "step": 11160 + }, + { + "epoch": 0.4417726274990607, + "grad_norm": 1.0437909035026047, + "learning_rate": 9.932144963600959e-06, + "loss": 1.0825, + "step": 11170 + }, + { + "epoch": 0.44216812671794975, + "grad_norm": 1.0552136599026627, + "learning_rate": 9.931766644794714e-06, + "loss": 1.1126, + "step": 11180 + }, + { + "epoch": 0.4425636259368388, + "grad_norm": 1.0173212695449023, + "learning_rate": 9.931387281524785e-06, + "loss": 1.0967, + "step": 11190 + }, + { + "epoch": 0.44295912515572783, + "grad_norm": 1.0344426914268472, + "learning_rate": 9.931006873871517e-06, + "loss": 1.0958, + "step": 11200 + }, + { + "epoch": 0.44335462437461687, + "grad_norm": 0.9938248423989837, + "learning_rate": 9.930625421915469e-06, + "loss": 1.0883, + "step": 11210 + }, + { + "epoch": 0.4437501235935059, + "grad_norm": 1.0781933853368326, + "learning_rate": 9.930242925737433e-06, + "loss": 1.1122, + "step": 11220 + }, + { + "epoch": 0.44414562281239495, + "grad_norm": 1.009762301149643, + "learning_rate": 9.929859385418408e-06, + "loss": 1.0655, + "step": 11230 + }, + { + "epoch": 0.444541122031284, + "grad_norm": 1.0237014475868262, + "learning_rate": 9.929474801039625e-06, + "loss": 1.1111, + "step": 11240 + }, + { + "epoch": 0.444936621250173, + "grad_norm": 0.9696411371836282, + "learning_rate": 9.929089172682533e-06, + "loss": 1.0891, + "step": 11250 + }, + { + "epoch": 0.44533212046906206, + "grad_norm": 1.0896299597864119, + "learning_rate": 9.928702500428799e-06, + "loss": 1.0847, + "step": 11260 + }, + { + "epoch": 0.4457276196879511, + "grad_norm": 1.0144938906422898, + "learning_rate": 9.928314784360315e-06, + "loss": 1.0873, + "step": 11270 + }, + { + "epoch": 0.44612311890684014, + "grad_norm": 1.0216108407321423, + "learning_rate": 9.927926024559193e-06, + "loss": 1.089, + "step": 11280 + }, + { + "epoch": 0.4465186181257292, + "grad_norm": 1.0269947541047353, + "learning_rate": 9.927536221107766e-06, + "loss": 1.1025, + "step": 11290 + }, + { + "epoch": 0.4469141173446182, + "grad_norm": 0.9971150678741842, + "learning_rate": 9.927145374088586e-06, + "loss": 1.0868, + "step": 11300 + }, + { + "epoch": 0.44730961656350726, + "grad_norm": 1.011487243390694, + "learning_rate": 9.926753483584428e-06, + "loss": 1.0785, + "step": 11310 + }, + { + "epoch": 0.44770511578239636, + "grad_norm": 1.0382933157283536, + "learning_rate": 9.926360549678288e-06, + "loss": 1.0807, + "step": 11320 + }, + { + "epoch": 0.4481006150012854, + "grad_norm": 0.9880253668282665, + "learning_rate": 9.925966572453385e-06, + "loss": 1.1003, + "step": 11330 + }, + { + "epoch": 0.44849611422017444, + "grad_norm": 1.135149114105303, + "learning_rate": 9.925571551993155e-06, + "loss": 1.0837, + "step": 11340 + }, + { + "epoch": 0.4488916134390635, + "grad_norm": 0.9486772039120146, + "learning_rate": 9.925175488381252e-06, + "loss": 1.0951, + "step": 11350 + }, + { + "epoch": 0.4492871126579525, + "grad_norm": 1.028428710304506, + "learning_rate": 9.924778381701562e-06, + "loss": 1.124, + "step": 11360 + }, + { + "epoch": 0.44968261187684155, + "grad_norm": 0.8649859516919115, + "learning_rate": 9.924380232038184e-06, + "loss": 1.0794, + "step": 11370 + }, + { + "epoch": 0.4500781110957306, + "grad_norm": 1.1226433354182574, + "learning_rate": 9.923981039475437e-06, + "loss": 1.0906, + "step": 11380 + }, + { + "epoch": 0.45047361031461963, + "grad_norm": 1.0296294561193022, + "learning_rate": 9.923580804097865e-06, + "loss": 1.0967, + "step": 11390 + }, + { + "epoch": 0.45086910953350867, + "grad_norm": 0.9556360939507376, + "learning_rate": 9.92317952599023e-06, + "loss": 1.0862, + "step": 11400 + }, + { + "epoch": 0.4512646087523977, + "grad_norm": 1.0777201300745027, + "learning_rate": 9.922777205237516e-06, + "loss": 1.0724, + "step": 11410 + }, + { + "epoch": 0.45166010797128675, + "grad_norm": 1.0349762935995492, + "learning_rate": 9.922373841924928e-06, + "loss": 1.0862, + "step": 11420 + }, + { + "epoch": 0.4520556071901758, + "grad_norm": 1.0079290348637329, + "learning_rate": 9.92196943613789e-06, + "loss": 1.0961, + "step": 11430 + }, + { + "epoch": 0.45245110640906483, + "grad_norm": 1.0456911072799369, + "learning_rate": 9.921563987962052e-06, + "loss": 1.1076, + "step": 11440 + }, + { + "epoch": 0.45284660562795387, + "grad_norm": 1.0847324487057688, + "learning_rate": 9.921157497483278e-06, + "loss": 1.0978, + "step": 11450 + }, + { + "epoch": 0.4532421048468429, + "grad_norm": 1.0073979749023843, + "learning_rate": 9.920749964787656e-06, + "loss": 1.1106, + "step": 11460 + }, + { + "epoch": 0.45363760406573195, + "grad_norm": 1.0591959526202663, + "learning_rate": 9.920341389961495e-06, + "loss": 1.0938, + "step": 11470 + }, + { + "epoch": 0.454033103284621, + "grad_norm": 0.9979334924011445, + "learning_rate": 9.919931773091322e-06, + "loss": 1.0857, + "step": 11480 + }, + { + "epoch": 0.4544286025035101, + "grad_norm": 1.008374374303569, + "learning_rate": 9.91952111426389e-06, + "loss": 1.0903, + "step": 11490 + }, + { + "epoch": 0.4548241017223991, + "grad_norm": 1.0096373802309044, + "learning_rate": 9.919109413566168e-06, + "loss": 1.0946, + "step": 11500 + }, + { + "epoch": 0.45521960094128816, + "grad_norm": 1.031545265902342, + "learning_rate": 9.918696671085349e-06, + "loss": 1.0902, + "step": 11510 + }, + { + "epoch": 0.4556151001601772, + "grad_norm": 1.033713201533962, + "learning_rate": 9.918282886908841e-06, + "loss": 1.0919, + "step": 11520 + }, + { + "epoch": 0.45601059937906624, + "grad_norm": 1.0285322950368352, + "learning_rate": 9.917868061124279e-06, + "loss": 1.1101, + "step": 11530 + }, + { + "epoch": 0.4564060985979553, + "grad_norm": 1.040993749385655, + "learning_rate": 9.917452193819515e-06, + "loss": 1.085, + "step": 11540 + }, + { + "epoch": 0.4568015978168443, + "grad_norm": 1.0617214951659109, + "learning_rate": 9.917035285082624e-06, + "loss": 1.0769, + "step": 11550 + }, + { + "epoch": 0.45719709703573336, + "grad_norm": 1.0777029523976296, + "learning_rate": 9.916617335001899e-06, + "loss": 1.1079, + "step": 11560 + }, + { + "epoch": 0.4575925962546224, + "grad_norm": 1.0088378485165441, + "learning_rate": 9.916198343665856e-06, + "loss": 1.097, + "step": 11570 + }, + { + "epoch": 0.45798809547351144, + "grad_norm": 1.0665297566664302, + "learning_rate": 9.915778311163227e-06, + "loss": 1.0801, + "step": 11580 + }, + { + "epoch": 0.4583835946924005, + "grad_norm": 1.0557086385187475, + "learning_rate": 9.91535723758297e-06, + "loss": 1.0861, + "step": 11590 + }, + { + "epoch": 0.4587790939112895, + "grad_norm": 0.9819357971990172, + "learning_rate": 9.914935123014263e-06, + "loss": 1.0906, + "step": 11600 + }, + { + "epoch": 0.45917459313017855, + "grad_norm": 1.0145791284930408, + "learning_rate": 9.914511967546498e-06, + "loss": 1.092, + "step": 11610 + }, + { + "epoch": 0.4595700923490676, + "grad_norm": 1.1044019745433686, + "learning_rate": 9.914087771269296e-06, + "loss": 1.072, + "step": 11620 + }, + { + "epoch": 0.45996559156795663, + "grad_norm": 1.0902321507184303, + "learning_rate": 9.913662534272492e-06, + "loss": 1.0934, + "step": 11630 + }, + { + "epoch": 0.4603610907868457, + "grad_norm": 1.100295762026507, + "learning_rate": 9.913236256646145e-06, + "loss": 1.0978, + "step": 11640 + }, + { + "epoch": 0.4607565900057347, + "grad_norm": 1.0044819453818326, + "learning_rate": 9.912808938480533e-06, + "loss": 1.0867, + "step": 11650 + }, + { + "epoch": 0.4611520892246238, + "grad_norm": 1.0662349452488766, + "learning_rate": 9.912380579866157e-06, + "loss": 1.1033, + "step": 11660 + }, + { + "epoch": 0.46154758844351285, + "grad_norm": 1.1200683203600492, + "learning_rate": 9.911951180893734e-06, + "loss": 1.0931, + "step": 11670 + }, + { + "epoch": 0.4619430876624019, + "grad_norm": 1.050212509308587, + "learning_rate": 9.911520741654201e-06, + "loss": 1.0767, + "step": 11680 + }, + { + "epoch": 0.4623385868812909, + "grad_norm": 1.0443608142168066, + "learning_rate": 9.911089262238723e-06, + "loss": 1.087, + "step": 11690 + }, + { + "epoch": 0.46273408610017996, + "grad_norm": 0.999757773447789, + "learning_rate": 9.910656742738676e-06, + "loss": 1.0918, + "step": 11700 + }, + { + "epoch": 0.463129585319069, + "grad_norm": 0.9564949020952047, + "learning_rate": 9.91022318324566e-06, + "loss": 1.1037, + "step": 11710 + }, + { + "epoch": 0.46352508453795804, + "grad_norm": 0.9691957741842717, + "learning_rate": 9.909788583851498e-06, + "loss": 1.0968, + "step": 11720 + }, + { + "epoch": 0.4639205837568471, + "grad_norm": 0.9503580891800856, + "learning_rate": 9.909352944648227e-06, + "loss": 1.0908, + "step": 11730 + }, + { + "epoch": 0.4643160829757361, + "grad_norm": 1.0460585072246669, + "learning_rate": 9.908916265728113e-06, + "loss": 1.1174, + "step": 11740 + }, + { + "epoch": 0.46471158219462516, + "grad_norm": 1.0394186563830252, + "learning_rate": 9.908478547183633e-06, + "loss": 1.0849, + "step": 11750 + }, + { + "epoch": 0.4651070814135142, + "grad_norm": 1.036767392066307, + "learning_rate": 9.90803978910749e-06, + "loss": 1.0686, + "step": 11760 + }, + { + "epoch": 0.46550258063240324, + "grad_norm": 1.155171777528196, + "learning_rate": 9.907599991592605e-06, + "loss": 1.0764, + "step": 11770 + }, + { + "epoch": 0.4658980798512923, + "grad_norm": 1.0892668738559457, + "learning_rate": 9.90715915473212e-06, + "loss": 1.0793, + "step": 11780 + }, + { + "epoch": 0.4662935790701813, + "grad_norm": 1.0133820746448876, + "learning_rate": 9.906717278619397e-06, + "loss": 1.0721, + "step": 11790 + }, + { + "epoch": 0.46668907828907036, + "grad_norm": 1.0344580017250842, + "learning_rate": 9.906274363348016e-06, + "loss": 1.0841, + "step": 11800 + }, + { + "epoch": 0.4670845775079594, + "grad_norm": 0.9279184665449852, + "learning_rate": 9.905830409011781e-06, + "loss": 1.0851, + "step": 11810 + }, + { + "epoch": 0.46748007672684844, + "grad_norm": 1.0650989169005847, + "learning_rate": 9.905385415704713e-06, + "loss": 1.0795, + "step": 11820 + }, + { + "epoch": 0.46787557594573753, + "grad_norm": 0.932389326389645, + "learning_rate": 9.904939383521052e-06, + "loss": 1.0898, + "step": 11830 + }, + { + "epoch": 0.46827107516462657, + "grad_norm": 0.952762616303843, + "learning_rate": 9.904492312555266e-06, + "loss": 1.0927, + "step": 11840 + }, + { + "epoch": 0.4686665743835156, + "grad_norm": 1.0762257012559517, + "learning_rate": 9.904044202902029e-06, + "loss": 1.0787, + "step": 11850 + }, + { + "epoch": 0.46906207360240465, + "grad_norm": 1.0494600090264288, + "learning_rate": 9.903595054656247e-06, + "loss": 1.0916, + "step": 11860 + }, + { + "epoch": 0.4694575728212937, + "grad_norm": 0.9725846913871297, + "learning_rate": 9.903144867913043e-06, + "loss": 1.0816, + "step": 11870 + }, + { + "epoch": 0.46985307204018273, + "grad_norm": 0.9722201634090373, + "learning_rate": 9.902693642767757e-06, + "loss": 1.0777, + "step": 11880 + }, + { + "epoch": 0.47024857125907177, + "grad_norm": 1.1207495758168449, + "learning_rate": 9.902241379315954e-06, + "loss": 1.0897, + "step": 11890 + }, + { + "epoch": 0.4706440704779608, + "grad_norm": 1.0329046376010542, + "learning_rate": 9.901788077653408e-06, + "loss": 1.0881, + "step": 11900 + }, + { + "epoch": 0.47103956969684985, + "grad_norm": 0.9455616199819888, + "learning_rate": 9.901333737876131e-06, + "loss": 1.0815, + "step": 11910 + }, + { + "epoch": 0.4714350689157389, + "grad_norm": 0.9277554527421996, + "learning_rate": 9.900878360080335e-06, + "loss": 1.0928, + "step": 11920 + }, + { + "epoch": 0.4718305681346279, + "grad_norm": 1.0253345272405776, + "learning_rate": 9.900421944362466e-06, + "loss": 1.0988, + "step": 11930 + }, + { + "epoch": 0.47222606735351697, + "grad_norm": 1.1110439566703343, + "learning_rate": 9.899964490819186e-06, + "loss": 1.0859, + "step": 11940 + }, + { + "epoch": 0.472621566572406, + "grad_norm": 1.078231731471087, + "learning_rate": 9.899505999547371e-06, + "loss": 1.0912, + "step": 11950 + }, + { + "epoch": 0.47301706579129504, + "grad_norm": 0.9087399551188918, + "learning_rate": 9.899046470644127e-06, + "loss": 1.052, + "step": 11960 + }, + { + "epoch": 0.4734125650101841, + "grad_norm": 1.0049353481340764, + "learning_rate": 9.89858590420677e-06, + "loss": 1.0857, + "step": 11970 + }, + { + "epoch": 0.4738080642290731, + "grad_norm": 0.9876838875099272, + "learning_rate": 9.898124300332843e-06, + "loss": 1.0824, + "step": 11980 + }, + { + "epoch": 0.4742035634479622, + "grad_norm": 1.0678057975124664, + "learning_rate": 9.897661659120106e-06, + "loss": 1.1015, + "step": 11990 + }, + { + "epoch": 0.47459906266685126, + "grad_norm": 1.0303887789275032, + "learning_rate": 9.897197980666536e-06, + "loss": 1.0855, + "step": 12000 + }, + { + "epoch": 0.4749945618857403, + "grad_norm": 1.0113675885216502, + "learning_rate": 9.896733265070333e-06, + "loss": 1.1062, + "step": 12010 + }, + { + "epoch": 0.47539006110462934, + "grad_norm": 0.98642841545544, + "learning_rate": 9.896267512429915e-06, + "loss": 1.0927, + "step": 12020 + }, + { + "epoch": 0.4757855603235184, + "grad_norm": 0.9956984024423865, + "learning_rate": 9.895800722843925e-06, + "loss": 1.0945, + "step": 12030 + }, + { + "epoch": 0.4761810595424074, + "grad_norm": 1.0842417022917363, + "learning_rate": 9.895332896411217e-06, + "loss": 1.0978, + "step": 12040 + }, + { + "epoch": 0.47657655876129645, + "grad_norm": 0.997743812847416, + "learning_rate": 9.894864033230867e-06, + "loss": 1.0745, + "step": 12050 + }, + { + "epoch": 0.4769720579801855, + "grad_norm": 1.0134286989652361, + "learning_rate": 9.894394133402175e-06, + "loss": 1.0684, + "step": 12060 + }, + { + "epoch": 0.47736755719907453, + "grad_norm": 0.9426141371159262, + "learning_rate": 9.89392319702466e-06, + "loss": 1.0811, + "step": 12070 + }, + { + "epoch": 0.47776305641796357, + "grad_norm": 1.0219322715914516, + "learning_rate": 9.893451224198051e-06, + "loss": 1.1027, + "step": 12080 + }, + { + "epoch": 0.4781585556368526, + "grad_norm": 1.0027324177164836, + "learning_rate": 9.892978215022312e-06, + "loss": 1.0756, + "step": 12090 + }, + { + "epoch": 0.47855405485574165, + "grad_norm": 1.0740016007133908, + "learning_rate": 9.892504169597614e-06, + "loss": 1.0891, + "step": 12100 + }, + { + "epoch": 0.4789495540746307, + "grad_norm": 0.9756711772273633, + "learning_rate": 9.89202908802435e-06, + "loss": 1.0742, + "step": 12110 + }, + { + "epoch": 0.47934505329351973, + "grad_norm": 0.9688987944855754, + "learning_rate": 9.891552970403137e-06, + "loss": 1.0797, + "step": 12120 + }, + { + "epoch": 0.47974055251240877, + "grad_norm": 0.9792967068846127, + "learning_rate": 9.891075816834809e-06, + "loss": 1.0819, + "step": 12130 + }, + { + "epoch": 0.4801360517312978, + "grad_norm": 1.028872658022042, + "learning_rate": 9.890597627420418e-06, + "loss": 1.085, + "step": 12140 + }, + { + "epoch": 0.48053155095018685, + "grad_norm": 1.057487460504427, + "learning_rate": 9.890118402261235e-06, + "loss": 1.1044, + "step": 12150 + }, + { + "epoch": 0.48092705016907594, + "grad_norm": 1.007728077149704, + "learning_rate": 9.889638141458754e-06, + "loss": 1.1037, + "step": 12160 + }, + { + "epoch": 0.481322549387965, + "grad_norm": 1.0173070331403782, + "learning_rate": 9.889156845114685e-06, + "loss": 1.0912, + "step": 12170 + }, + { + "epoch": 0.481718048606854, + "grad_norm": 0.9747683657362735, + "learning_rate": 9.888674513330956e-06, + "loss": 1.079, + "step": 12180 + }, + { + "epoch": 0.48211354782574306, + "grad_norm": 0.9919873981355115, + "learning_rate": 9.888191146209721e-06, + "loss": 1.0803, + "step": 12190 + }, + { + "epoch": 0.4825090470446321, + "grad_norm": 1.0100836974102454, + "learning_rate": 9.887706743853347e-06, + "loss": 1.0923, + "step": 12200 + }, + { + "epoch": 0.48290454626352114, + "grad_norm": 1.0758189062336616, + "learning_rate": 9.887221306364419e-06, + "loss": 1.0701, + "step": 12210 + }, + { + "epoch": 0.4833000454824102, + "grad_norm": 0.9652997832170335, + "learning_rate": 9.88673483384575e-06, + "loss": 1.0888, + "step": 12220 + }, + { + "epoch": 0.4836955447012992, + "grad_norm": 1.0114980971074492, + "learning_rate": 9.886247326400362e-06, + "loss": 1.0993, + "step": 12230 + }, + { + "epoch": 0.48409104392018826, + "grad_norm": 1.0405278323372296, + "learning_rate": 9.885758784131503e-06, + "loss": 1.0752, + "step": 12240 + }, + { + "epoch": 0.4844865431390773, + "grad_norm": 1.0254542050683517, + "learning_rate": 9.885269207142636e-06, + "loss": 1.0819, + "step": 12250 + }, + { + "epoch": 0.48488204235796634, + "grad_norm": 0.9964566407468562, + "learning_rate": 9.884778595537448e-06, + "loss": 1.1045, + "step": 12260 + }, + { + "epoch": 0.4852775415768554, + "grad_norm": 1.0637435439518692, + "learning_rate": 9.884286949419838e-06, + "loss": 1.0716, + "step": 12270 + }, + { + "epoch": 0.4856730407957444, + "grad_norm": 1.031654890592826, + "learning_rate": 9.883794268893933e-06, + "loss": 1.0898, + "step": 12280 + }, + { + "epoch": 0.48606854001463345, + "grad_norm": 1.135973706220895, + "learning_rate": 9.883300554064072e-06, + "loss": 1.0955, + "step": 12290 + }, + { + "epoch": 0.4864640392335225, + "grad_norm": 1.002511164046068, + "learning_rate": 9.882805805034816e-06, + "loss": 1.0698, + "step": 12300 + }, + { + "epoch": 0.48685953845241153, + "grad_norm": 0.9653185875255408, + "learning_rate": 9.88231002191094e-06, + "loss": 1.0899, + "step": 12310 + }, + { + "epoch": 0.4872550376713006, + "grad_norm": 0.9927404665577052, + "learning_rate": 9.88181320479745e-06, + "loss": 1.1031, + "step": 12320 + }, + { + "epoch": 0.48765053689018967, + "grad_norm": 1.022571424844845, + "learning_rate": 9.881315353799556e-06, + "loss": 1.0829, + "step": 12330 + }, + { + "epoch": 0.4880460361090787, + "grad_norm": 1.0251656545020469, + "learning_rate": 9.880816469022701e-06, + "loss": 1.0628, + "step": 12340 + }, + { + "epoch": 0.48844153532796775, + "grad_norm": 0.9778051734671916, + "learning_rate": 9.880316550572535e-06, + "loss": 1.0785, + "step": 12350 + }, + { + "epoch": 0.4888370345468568, + "grad_norm": 0.9479517726765861, + "learning_rate": 9.879815598554934e-06, + "loss": 1.0724, + "step": 12360 + }, + { + "epoch": 0.4892325337657458, + "grad_norm": 1.023416506979459, + "learning_rate": 9.879313613075992e-06, + "loss": 1.0821, + "step": 12370 + }, + { + "epoch": 0.48962803298463486, + "grad_norm": 1.0252808980561536, + "learning_rate": 9.878810594242019e-06, + "loss": 1.075, + "step": 12380 + }, + { + "epoch": 0.4900235322035239, + "grad_norm": 1.0328235630158107, + "learning_rate": 9.878306542159548e-06, + "loss": 1.0987, + "step": 12390 + }, + { + "epoch": 0.49041903142241294, + "grad_norm": 0.9459660658661184, + "learning_rate": 9.877801456935328e-06, + "loss": 1.097, + "step": 12400 + }, + { + "epoch": 0.490814530641302, + "grad_norm": 1.108492146052984, + "learning_rate": 9.877295338676325e-06, + "loss": 1.0783, + "step": 12410 + }, + { + "epoch": 0.491210029860191, + "grad_norm": 1.0365459150804477, + "learning_rate": 9.876788187489727e-06, + "loss": 1.0945, + "step": 12420 + }, + { + "epoch": 0.49160552907908006, + "grad_norm": 0.960860713701472, + "learning_rate": 9.876280003482943e-06, + "loss": 1.0867, + "step": 12430 + }, + { + "epoch": 0.4920010282979691, + "grad_norm": 1.0123915589609034, + "learning_rate": 9.875770786763596e-06, + "loss": 1.0888, + "step": 12440 + }, + { + "epoch": 0.49239652751685814, + "grad_norm": 0.9646965263241251, + "learning_rate": 9.875260537439528e-06, + "loss": 1.086, + "step": 12450 + }, + { + "epoch": 0.4927920267357472, + "grad_norm": 1.0441635488790992, + "learning_rate": 9.874749255618803e-06, + "loss": 1.0804, + "step": 12460 + }, + { + "epoch": 0.4931875259546362, + "grad_norm": 1.090821433254823, + "learning_rate": 9.8742369414097e-06, + "loss": 1.0867, + "step": 12470 + }, + { + "epoch": 0.49358302517352526, + "grad_norm": 1.0191128312170485, + "learning_rate": 9.873723594920719e-06, + "loss": 1.0885, + "step": 12480 + }, + { + "epoch": 0.4939785243924143, + "grad_norm": 1.0297257642552196, + "learning_rate": 9.873209216260578e-06, + "loss": 1.0658, + "step": 12490 + }, + { + "epoch": 0.4943740236113034, + "grad_norm": 1.1144393262572139, + "learning_rate": 9.872693805538215e-06, + "loss": 1.0895, + "step": 12500 + }, + { + "epoch": 0.49476952283019243, + "grad_norm": 0.9453693123598528, + "learning_rate": 9.872177362862783e-06, + "loss": 1.0734, + "step": 12510 + }, + { + "epoch": 0.49516502204908147, + "grad_norm": 1.0353432870673445, + "learning_rate": 9.871659888343656e-06, + "loss": 1.0973, + "step": 12520 + }, + { + "epoch": 0.4955605212679705, + "grad_norm": 1.0193367526957051, + "learning_rate": 9.871141382090428e-06, + "loss": 1.0773, + "step": 12530 + }, + { + "epoch": 0.49595602048685955, + "grad_norm": 1.0252116759825207, + "learning_rate": 9.87062184421291e-06, + "loss": 1.0914, + "step": 12540 + }, + { + "epoch": 0.4963515197057486, + "grad_norm": 1.0273843758175163, + "learning_rate": 9.870101274821127e-06, + "loss": 1.0737, + "step": 12550 + }, + { + "epoch": 0.49674701892463763, + "grad_norm": 0.9522271245472749, + "learning_rate": 9.86957967402533e-06, + "loss": 1.08, + "step": 12560 + }, + { + "epoch": 0.49714251814352667, + "grad_norm": 1.0063969810516513, + "learning_rate": 9.869057041935985e-06, + "loss": 1.0712, + "step": 12570 + }, + { + "epoch": 0.4975380173624157, + "grad_norm": 1.1048900741589638, + "learning_rate": 9.868533378663776e-06, + "loss": 1.0516, + "step": 12580 + }, + { + "epoch": 0.49793351658130475, + "grad_norm": 0.9709815003695493, + "learning_rate": 9.868008684319607e-06, + "loss": 1.0819, + "step": 12590 + }, + { + "epoch": 0.4983290158001938, + "grad_norm": 1.0520641839876166, + "learning_rate": 9.867482959014597e-06, + "loss": 1.0806, + "step": 12600 + }, + { + "epoch": 0.4987245150190828, + "grad_norm": 0.9938078714241106, + "learning_rate": 9.866956202860088e-06, + "loss": 1.0595, + "step": 12610 + }, + { + "epoch": 0.49912001423797187, + "grad_norm": 1.0592966209857555, + "learning_rate": 9.866428415967636e-06, + "loss": 1.0779, + "step": 12620 + }, + { + "epoch": 0.4995155134568609, + "grad_norm": 1.0003343099466233, + "learning_rate": 9.865899598449018e-06, + "loss": 1.0775, + "step": 12630 + }, + { + "epoch": 0.49991101267574994, + "grad_norm": 1.0334223822699393, + "learning_rate": 9.86536975041623e-06, + "loss": 1.0761, + "step": 12640 + }, + { + "epoch": 0.500306511894639, + "grad_norm": 1.0234692832340981, + "learning_rate": 9.864838871981481e-06, + "loss": 1.0833, + "step": 12650 + }, + { + "epoch": 0.5007020111135281, + "grad_norm": 0.9378572649147121, + "learning_rate": 9.864306963257207e-06, + "loss": 1.0758, + "step": 12660 + }, + { + "epoch": 0.5010975103324171, + "grad_norm": 0.9725097781603558, + "learning_rate": 9.863774024356052e-06, + "loss": 1.0577, + "step": 12670 + }, + { + "epoch": 0.5014930095513062, + "grad_norm": 1.112499062561878, + "learning_rate": 9.863240055390886e-06, + "loss": 1.0846, + "step": 12680 + }, + { + "epoch": 0.5018885087701952, + "grad_norm": 1.099532695654552, + "learning_rate": 9.862705056474795e-06, + "loss": 1.0781, + "step": 12690 + }, + { + "epoch": 0.5022840079890842, + "grad_norm": 1.0372090126460214, + "learning_rate": 9.862169027721083e-06, + "loss": 1.0711, + "step": 12700 + }, + { + "epoch": 0.5026795072079733, + "grad_norm": 1.0099421435902356, + "learning_rate": 9.861631969243268e-06, + "loss": 1.0762, + "step": 12710 + }, + { + "epoch": 0.5030750064268623, + "grad_norm": 0.9886273815878905, + "learning_rate": 9.861093881155092e-06, + "loss": 1.0633, + "step": 12720 + }, + { + "epoch": 0.5034705056457514, + "grad_norm": 1.005784117163544, + "learning_rate": 9.860554763570516e-06, + "loss": 1.0711, + "step": 12730 + }, + { + "epoch": 0.5038660048646404, + "grad_norm": 1.0873888018070612, + "learning_rate": 9.860014616603713e-06, + "loss": 1.092, + "step": 12740 + }, + { + "epoch": 0.5042615040835294, + "grad_norm": 0.9515312149483481, + "learning_rate": 9.859473440369074e-06, + "loss": 1.0731, + "step": 12750 + }, + { + "epoch": 0.5046570033024185, + "grad_norm": 1.0059155401757616, + "learning_rate": 9.858931234981215e-06, + "loss": 1.0684, + "step": 12760 + }, + { + "epoch": 0.5050525025213075, + "grad_norm": 0.9716542106768279, + "learning_rate": 9.858388000554967e-06, + "loss": 1.1067, + "step": 12770 + }, + { + "epoch": 0.5054480017401966, + "grad_norm": 1.0200551515159013, + "learning_rate": 9.857843737205371e-06, + "loss": 1.0891, + "step": 12780 + }, + { + "epoch": 0.5058435009590856, + "grad_norm": 0.9934024401412571, + "learning_rate": 9.857298445047701e-06, + "loss": 1.0563, + "step": 12790 + }, + { + "epoch": 0.5062390001779746, + "grad_norm": 1.0152758677203917, + "learning_rate": 9.856752124197433e-06, + "loss": 1.0762, + "step": 12800 + }, + { + "epoch": 0.5066344993968637, + "grad_norm": 0.9018296429061226, + "learning_rate": 9.856204774770274e-06, + "loss": 1.0842, + "step": 12810 + }, + { + "epoch": 0.5070299986157527, + "grad_norm": 1.0514237704455383, + "learning_rate": 9.85565639688214e-06, + "loss": 1.0886, + "step": 12820 + }, + { + "epoch": 0.5074254978346417, + "grad_norm": 0.9925557847659466, + "learning_rate": 9.85510699064917e-06, + "loss": 1.0674, + "step": 12830 + }, + { + "epoch": 0.5078209970535308, + "grad_norm": 0.9886757906812879, + "learning_rate": 9.854556556187717e-06, + "loss": 1.1064, + "step": 12840 + }, + { + "epoch": 0.5082164962724198, + "grad_norm": 1.0201916255835866, + "learning_rate": 9.854005093614355e-06, + "loss": 1.079, + "step": 12850 + }, + { + "epoch": 0.5086119954913089, + "grad_norm": 0.9685907478835968, + "learning_rate": 9.853452603045876e-06, + "loss": 1.0853, + "step": 12860 + }, + { + "epoch": 0.5090074947101979, + "grad_norm": 0.9868830824251713, + "learning_rate": 9.852899084599285e-06, + "loss": 1.0847, + "step": 12870 + }, + { + "epoch": 0.509402993929087, + "grad_norm": 0.9849429435244901, + "learning_rate": 9.852344538391808e-06, + "loss": 1.0796, + "step": 12880 + }, + { + "epoch": 0.509798493147976, + "grad_norm": 0.9661313036071937, + "learning_rate": 9.851788964540888e-06, + "loss": 1.0683, + "step": 12890 + }, + { + "epoch": 0.510193992366865, + "grad_norm": 0.9423262812388347, + "learning_rate": 9.851232363164188e-06, + "loss": 1.0856, + "step": 12900 + }, + { + "epoch": 0.5105894915857541, + "grad_norm": 0.9254842741163509, + "learning_rate": 9.850674734379586e-06, + "loss": 1.0662, + "step": 12910 + }, + { + "epoch": 0.5109849908046432, + "grad_norm": 0.9547498054163909, + "learning_rate": 9.850116078305178e-06, + "loss": 1.0741, + "step": 12920 + }, + { + "epoch": 0.5113804900235323, + "grad_norm": 0.915368379638883, + "learning_rate": 9.849556395059278e-06, + "loss": 1.0737, + "step": 12930 + }, + { + "epoch": 0.5117759892424213, + "grad_norm": 0.9595236029594361, + "learning_rate": 9.848995684760416e-06, + "loss": 1.0722, + "step": 12940 + }, + { + "epoch": 0.5121714884613103, + "grad_norm": 0.9548736074393787, + "learning_rate": 9.848433947527342e-06, + "loss": 1.0899, + "step": 12950 + }, + { + "epoch": 0.5125669876801994, + "grad_norm": 1.0109914361919559, + "learning_rate": 9.847871183479024e-06, + "loss": 1.0788, + "step": 12960 + }, + { + "epoch": 0.5129624868990884, + "grad_norm": 1.112177288890901, + "learning_rate": 9.847307392734641e-06, + "loss": 1.0593, + "step": 12970 + }, + { + "epoch": 0.5133579861179774, + "grad_norm": 0.9738999245385244, + "learning_rate": 9.8467425754136e-06, + "loss": 1.0778, + "step": 12980 + }, + { + "epoch": 0.5137534853368665, + "grad_norm": 0.9555220462997933, + "learning_rate": 9.846176731635515e-06, + "loss": 1.0782, + "step": 12990 + }, + { + "epoch": 0.5141489845557555, + "grad_norm": 1.0841923023542257, + "learning_rate": 9.845609861520225e-06, + "loss": 1.0888, + "step": 13000 + }, + { + "epoch": 0.5145444837746446, + "grad_norm": 1.036629669285172, + "learning_rate": 9.84504196518778e-06, + "loss": 1.0719, + "step": 13010 + }, + { + "epoch": 0.5149399829935336, + "grad_norm": 1.108205866109674, + "learning_rate": 9.844473042758455e-06, + "loss": 1.0777, + "step": 13020 + }, + { + "epoch": 0.5153354822124226, + "grad_norm": 0.9357968593070068, + "learning_rate": 9.843903094352735e-06, + "loss": 1.0808, + "step": 13030 + }, + { + "epoch": 0.5157309814313117, + "grad_norm": 0.9932924897904928, + "learning_rate": 9.843332120091329e-06, + "loss": 1.0673, + "step": 13040 + }, + { + "epoch": 0.5161264806502007, + "grad_norm": 1.0597709572544896, + "learning_rate": 9.842760120095154e-06, + "loss": 1.0927, + "step": 13050 + }, + { + "epoch": 0.5165219798690898, + "grad_norm": 0.9442667849128606, + "learning_rate": 9.842187094485354e-06, + "loss": 1.0646, + "step": 13060 + }, + { + "epoch": 0.5169174790879788, + "grad_norm": 1.0092561087087946, + "learning_rate": 9.841613043383282e-06, + "loss": 1.0831, + "step": 13070 + }, + { + "epoch": 0.5173129783068678, + "grad_norm": 0.9993460583781132, + "learning_rate": 9.841037966910519e-06, + "loss": 1.0765, + "step": 13080 + }, + { + "epoch": 0.5177084775257569, + "grad_norm": 1.0109837742862735, + "learning_rate": 9.840461865188848e-06, + "loss": 1.0869, + "step": 13090 + }, + { + "epoch": 0.5181039767446459, + "grad_norm": 1.0476423284084753, + "learning_rate": 9.839884738340285e-06, + "loss": 1.0728, + "step": 13100 + }, + { + "epoch": 0.518499475963535, + "grad_norm": 1.0160294431898018, + "learning_rate": 9.839306586487051e-06, + "loss": 1.0694, + "step": 13110 + }, + { + "epoch": 0.518894975182424, + "grad_norm": 0.9614342828711414, + "learning_rate": 9.83872740975159e-06, + "loss": 1.0872, + "step": 13120 + }, + { + "epoch": 0.519290474401313, + "grad_norm": 0.9661915296610115, + "learning_rate": 9.83814720825656e-06, + "loss": 1.0563, + "step": 13130 + }, + { + "epoch": 0.5196859736202021, + "grad_norm": 0.9646793509423565, + "learning_rate": 9.837565982124841e-06, + "loss": 1.0714, + "step": 13140 + }, + { + "epoch": 0.5200814728390911, + "grad_norm": 0.9951242152490484, + "learning_rate": 9.836983731479526e-06, + "loss": 1.0566, + "step": 13150 + }, + { + "epoch": 0.5204769720579802, + "grad_norm": 0.9958068144095807, + "learning_rate": 9.836400456443924e-06, + "loss": 1.0729, + "step": 13160 + }, + { + "epoch": 0.5208724712768692, + "grad_norm": 0.9246449785521856, + "learning_rate": 9.835816157141563e-06, + "loss": 1.0686, + "step": 13170 + }, + { + "epoch": 0.5212679704957582, + "grad_norm": 1.0332848229000948, + "learning_rate": 9.835230833696187e-06, + "loss": 1.0781, + "step": 13180 + }, + { + "epoch": 0.5216634697146473, + "grad_norm": 1.0657823734331777, + "learning_rate": 9.834644486231761e-06, + "loss": 1.0671, + "step": 13190 + }, + { + "epoch": 0.5220589689335363, + "grad_norm": 0.9561827585668331, + "learning_rate": 9.834057114872459e-06, + "loss": 1.0656, + "step": 13200 + }, + { + "epoch": 0.5224544681524254, + "grad_norm": 1.0524846967020414, + "learning_rate": 9.83346871974268e-06, + "loss": 1.0744, + "step": 13210 + }, + { + "epoch": 0.5228499673713144, + "grad_norm": 1.1182652016129095, + "learning_rate": 9.832879300967031e-06, + "loss": 1.0566, + "step": 13220 + }, + { + "epoch": 0.5232454665902034, + "grad_norm": 1.0429235656233988, + "learning_rate": 9.832288858670348e-06, + "loss": 1.0677, + "step": 13230 + }, + { + "epoch": 0.5236409658090925, + "grad_norm": 1.0119110608263238, + "learning_rate": 9.831697392977668e-06, + "loss": 1.0588, + "step": 13240 + }, + { + "epoch": 0.5240364650279816, + "grad_norm": 1.077962292429758, + "learning_rate": 9.831104904014259e-06, + "loss": 1.087, + "step": 13250 + }, + { + "epoch": 0.5244319642468707, + "grad_norm": 1.0243997136978984, + "learning_rate": 9.8305113919056e-06, + "loss": 1.0952, + "step": 13260 + }, + { + "epoch": 0.5248274634657597, + "grad_norm": 1.0678484907203238, + "learning_rate": 9.829916856777386e-06, + "loss": 1.1014, + "step": 13270 + }, + { + "epoch": 0.5252229626846487, + "grad_norm": 0.9836947307676847, + "learning_rate": 9.829321298755527e-06, + "loss": 1.0888, + "step": 13280 + }, + { + "epoch": 0.5256184619035378, + "grad_norm": 0.9950540732279035, + "learning_rate": 9.828724717966153e-06, + "loss": 1.0759, + "step": 13290 + }, + { + "epoch": 0.5260139611224268, + "grad_norm": 0.9644969878304526, + "learning_rate": 9.828127114535613e-06, + "loss": 1.0767, + "step": 13300 + }, + { + "epoch": 0.5264094603413159, + "grad_norm": 1.0450309983137005, + "learning_rate": 9.827528488590466e-06, + "loss": 1.0779, + "step": 13310 + }, + { + "epoch": 0.5268049595602049, + "grad_norm": 0.9885703761695057, + "learning_rate": 9.82692884025749e-06, + "loss": 1.0786, + "step": 13320 + }, + { + "epoch": 0.5272004587790939, + "grad_norm": 1.0016227168515768, + "learning_rate": 9.826328169663682e-06, + "loss": 1.0771, + "step": 13330 + }, + { + "epoch": 0.527595957997983, + "grad_norm": 0.9706655976674615, + "learning_rate": 9.825726476936254e-06, + "loss": 1.0811, + "step": 13340 + }, + { + "epoch": 0.527991457216872, + "grad_norm": 1.0890777461097025, + "learning_rate": 9.825123762202633e-06, + "loss": 1.085, + "step": 13350 + }, + { + "epoch": 0.5283869564357611, + "grad_norm": 1.0412103121994987, + "learning_rate": 9.824520025590463e-06, + "loss": 1.068, + "step": 13360 + }, + { + "epoch": 0.5287824556546501, + "grad_norm": 1.0695103028933133, + "learning_rate": 9.823915267227606e-06, + "loss": 1.0691, + "step": 13370 + }, + { + "epoch": 0.5291779548735391, + "grad_norm": 1.0425166666925885, + "learning_rate": 9.823309487242141e-06, + "loss": 1.0719, + "step": 13380 + }, + { + "epoch": 0.5295734540924282, + "grad_norm": 1.0970921716275426, + "learning_rate": 9.82270268576236e-06, + "loss": 1.0692, + "step": 13390 + }, + { + "epoch": 0.5299689533113172, + "grad_norm": 0.9969672706890884, + "learning_rate": 9.822094862916774e-06, + "loss": 1.0768, + "step": 13400 + }, + { + "epoch": 0.5303644525302063, + "grad_norm": 1.044696734619016, + "learning_rate": 9.82148601883411e-06, + "loss": 1.083, + "step": 13410 + }, + { + "epoch": 0.5307599517490953, + "grad_norm": 1.0632580665858382, + "learning_rate": 9.820876153643308e-06, + "loss": 1.0879, + "step": 13420 + }, + { + "epoch": 0.5311554509679843, + "grad_norm": 1.0862186220045404, + "learning_rate": 9.82026526747353e-06, + "loss": 1.081, + "step": 13430 + }, + { + "epoch": 0.5315509501868734, + "grad_norm": 0.9631305119660133, + "learning_rate": 9.819653360454149e-06, + "loss": 1.0743, + "step": 13440 + }, + { + "epoch": 0.5319464494057624, + "grad_norm": 1.191609859345613, + "learning_rate": 9.819040432714757e-06, + "loss": 1.0536, + "step": 13450 + }, + { + "epoch": 0.5323419486246515, + "grad_norm": 0.9710466738269231, + "learning_rate": 9.818426484385164e-06, + "loss": 1.0804, + "step": 13460 + }, + { + "epoch": 0.5327374478435405, + "grad_norm": 1.0098095888638832, + "learning_rate": 9.81781151559539e-06, + "loss": 1.0761, + "step": 13470 + }, + { + "epoch": 0.5331329470624295, + "grad_norm": 0.9982685961266377, + "learning_rate": 9.817195526475677e-06, + "loss": 1.0819, + "step": 13480 + }, + { + "epoch": 0.5335284462813186, + "grad_norm": 0.969676179766563, + "learning_rate": 9.816578517156483e-06, + "loss": 1.08, + "step": 13490 + }, + { + "epoch": 0.5339239455002076, + "grad_norm": 0.9625389617591572, + "learning_rate": 9.815960487768474e-06, + "loss": 1.08, + "step": 13500 + }, + { + "epoch": 0.5343194447190966, + "grad_norm": 1.0139124926163963, + "learning_rate": 9.815341438442544e-06, + "loss": 1.0703, + "step": 13510 + }, + { + "epoch": 0.5347149439379857, + "grad_norm": 1.035596062944271, + "learning_rate": 9.814721369309794e-06, + "loss": 1.0792, + "step": 13520 + }, + { + "epoch": 0.5351104431568747, + "grad_norm": 1.0629783791460925, + "learning_rate": 9.814100280501543e-06, + "loss": 1.0575, + "step": 13530 + }, + { + "epoch": 0.5355059423757638, + "grad_norm": 1.0046572807335932, + "learning_rate": 9.813478172149331e-06, + "loss": 1.0795, + "step": 13540 + }, + { + "epoch": 0.5359014415946528, + "grad_norm": 1.1034935233440906, + "learning_rate": 9.812855044384908e-06, + "loss": 1.0667, + "step": 13550 + }, + { + "epoch": 0.5362969408135418, + "grad_norm": 1.1075149324824336, + "learning_rate": 9.812230897340241e-06, + "loss": 1.0759, + "step": 13560 + }, + { + "epoch": 0.5366924400324309, + "grad_norm": 0.9761437375440437, + "learning_rate": 9.811605731147512e-06, + "loss": 1.0489, + "step": 13570 + }, + { + "epoch": 0.5370879392513199, + "grad_norm": 1.0111784611012464, + "learning_rate": 9.810979545939124e-06, + "loss": 1.0718, + "step": 13580 + }, + { + "epoch": 0.5374834384702091, + "grad_norm": 1.0244542089780235, + "learning_rate": 9.81035234184769e-06, + "loss": 1.0796, + "step": 13590 + }, + { + "epoch": 0.5378789376890981, + "grad_norm": 1.0702299166427474, + "learning_rate": 9.809724119006043e-06, + "loss": 1.0683, + "step": 13600 + }, + { + "epoch": 0.5382744369079872, + "grad_norm": 1.0636495729496882, + "learning_rate": 9.809094877547227e-06, + "loss": 1.063, + "step": 13610 + }, + { + "epoch": 0.5386699361268762, + "grad_norm": 0.9735941525994887, + "learning_rate": 9.808464617604508e-06, + "loss": 1.0706, + "step": 13620 + }, + { + "epoch": 0.5390654353457652, + "grad_norm": 0.9882813540711624, + "learning_rate": 9.807833339311363e-06, + "loss": 1.0798, + "step": 13630 + }, + { + "epoch": 0.5394609345646543, + "grad_norm": 1.0743232892680614, + "learning_rate": 9.807201042801484e-06, + "loss": 1.0734, + "step": 13640 + }, + { + "epoch": 0.5398564337835433, + "grad_norm": 1.013358151680488, + "learning_rate": 9.806567728208782e-06, + "loss": 1.0613, + "step": 13650 + }, + { + "epoch": 0.5402519330024323, + "grad_norm": 1.1232389903512228, + "learning_rate": 9.805933395667381e-06, + "loss": 1.0573, + "step": 13660 + }, + { + "epoch": 0.5406474322213214, + "grad_norm": 1.0170648287379531, + "learning_rate": 9.805298045311626e-06, + "loss": 1.0754, + "step": 13670 + }, + { + "epoch": 0.5410429314402104, + "grad_norm": 0.9140112227865539, + "learning_rate": 9.804661677276068e-06, + "loss": 1.0635, + "step": 13680 + }, + { + "epoch": 0.5414384306590995, + "grad_norm": 1.0621555243875256, + "learning_rate": 9.804024291695482e-06, + "loss": 1.0771, + "step": 13690 + }, + { + "epoch": 0.5418339298779885, + "grad_norm": 1.0017512691148713, + "learning_rate": 9.803385888704855e-06, + "loss": 1.0694, + "step": 13700 + }, + { + "epoch": 0.5422294290968775, + "grad_norm": 0.9926049921328485, + "learning_rate": 9.802746468439389e-06, + "loss": 1.0632, + "step": 13710 + }, + { + "epoch": 0.5426249283157666, + "grad_norm": 1.1010638965079955, + "learning_rate": 9.802106031034501e-06, + "loss": 1.071, + "step": 13720 + }, + { + "epoch": 0.5430204275346556, + "grad_norm": 0.968280822571627, + "learning_rate": 9.801464576625828e-06, + "loss": 1.0489, + "step": 13730 + }, + { + "epoch": 0.5434159267535447, + "grad_norm": 1.0154394712132147, + "learning_rate": 9.800822105349218e-06, + "loss": 1.0821, + "step": 13740 + }, + { + "epoch": 0.5438114259724337, + "grad_norm": 0.9905098788062335, + "learning_rate": 9.800178617340732e-06, + "loss": 1.0693, + "step": 13750 + }, + { + "epoch": 0.5442069251913227, + "grad_norm": 0.9738302284129928, + "learning_rate": 9.799534112736654e-06, + "loss": 1.0412, + "step": 13760 + }, + { + "epoch": 0.5446024244102118, + "grad_norm": 1.0335390563495404, + "learning_rate": 9.798888591673477e-06, + "loss": 1.064, + "step": 13770 + }, + { + "epoch": 0.5449979236291008, + "grad_norm": 1.1729412428291819, + "learning_rate": 9.798242054287912e-06, + "loss": 1.0632, + "step": 13780 + }, + { + "epoch": 0.5453934228479899, + "grad_norm": 1.014657384320453, + "learning_rate": 9.797594500716885e-06, + "loss": 1.0575, + "step": 13790 + }, + { + "epoch": 0.5457889220668789, + "grad_norm": 0.9999494695274581, + "learning_rate": 9.796945931097534e-06, + "loss": 1.0726, + "step": 13800 + }, + { + "epoch": 0.5461844212857679, + "grad_norm": 1.0289273225918913, + "learning_rate": 9.796296345567218e-06, + "loss": 1.0642, + "step": 13810 + }, + { + "epoch": 0.546579920504657, + "grad_norm": 1.106102173833945, + "learning_rate": 9.795645744263508e-06, + "loss": 1.0418, + "step": 13820 + }, + { + "epoch": 0.546975419723546, + "grad_norm": 1.0469389940927747, + "learning_rate": 9.794994127324189e-06, + "loss": 1.0749, + "step": 13830 + }, + { + "epoch": 0.5473709189424351, + "grad_norm": 0.9827986030176245, + "learning_rate": 9.794341494887262e-06, + "loss": 1.0552, + "step": 13840 + }, + { + "epoch": 0.5477664181613241, + "grad_norm": 1.00101825826828, + "learning_rate": 9.793687847090946e-06, + "loss": 1.0657, + "step": 13850 + }, + { + "epoch": 0.5481619173802131, + "grad_norm": 1.0129658463299986, + "learning_rate": 9.793033184073667e-06, + "loss": 1.0807, + "step": 13860 + }, + { + "epoch": 0.5485574165991022, + "grad_norm": 1.082875174178682, + "learning_rate": 9.792377505974076e-06, + "loss": 1.0688, + "step": 13870 + }, + { + "epoch": 0.5489529158179912, + "grad_norm": 1.0662794223857854, + "learning_rate": 9.791720812931034e-06, + "loss": 1.0775, + "step": 13880 + }, + { + "epoch": 0.5493484150368803, + "grad_norm": 0.9168213954046683, + "learning_rate": 9.791063105083616e-06, + "loss": 1.0532, + "step": 13890 + }, + { + "epoch": 0.5497439142557693, + "grad_norm": 1.031366588327019, + "learning_rate": 9.790404382571112e-06, + "loss": 1.07, + "step": 13900 + }, + { + "epoch": 0.5501394134746583, + "grad_norm": 0.9518098858135989, + "learning_rate": 9.789744645533032e-06, + "loss": 1.0562, + "step": 13910 + }, + { + "epoch": 0.5505349126935475, + "grad_norm": 0.993824500131525, + "learning_rate": 9.789083894109095e-06, + "loss": 1.068, + "step": 13920 + }, + { + "epoch": 0.5509304119124365, + "grad_norm": 0.9409445662655898, + "learning_rate": 9.788422128439237e-06, + "loss": 1.0706, + "step": 13930 + }, + { + "epoch": 0.5513259111313256, + "grad_norm": 1.1203258334991153, + "learning_rate": 9.787759348663607e-06, + "loss": 1.0535, + "step": 13940 + }, + { + "epoch": 0.5517214103502146, + "grad_norm": 0.9611409075486226, + "learning_rate": 9.787095554922573e-06, + "loss": 1.053, + "step": 13950 + }, + { + "epoch": 0.5521169095691036, + "grad_norm": 1.1071554899345504, + "learning_rate": 9.786430747356713e-06, + "loss": 1.0686, + "step": 13960 + }, + { + "epoch": 0.5525124087879927, + "grad_norm": 1.0078986838759314, + "learning_rate": 9.785764926106822e-06, + "loss": 1.0673, + "step": 13970 + }, + { + "epoch": 0.5529079080068817, + "grad_norm": 0.9674594540100612, + "learning_rate": 9.785098091313911e-06, + "loss": 1.0575, + "step": 13980 + }, + { + "epoch": 0.5533034072257708, + "grad_norm": 1.0299207339728886, + "learning_rate": 9.784430243119204e-06, + "loss": 1.0674, + "step": 13990 + }, + { + "epoch": 0.5536989064446598, + "grad_norm": 0.978476623511374, + "learning_rate": 9.783761381664138e-06, + "loss": 1.0602, + "step": 14000 + }, + { + "epoch": 0.5540944056635488, + "grad_norm": 0.9691692440883812, + "learning_rate": 9.78309150709037e-06, + "loss": 1.08, + "step": 14010 + }, + { + "epoch": 0.5544899048824379, + "grad_norm": 1.0221520315677104, + "learning_rate": 9.782420619539763e-06, + "loss": 1.0598, + "step": 14020 + }, + { + "epoch": 0.5548854041013269, + "grad_norm": 0.9986080154606959, + "learning_rate": 9.781748719154404e-06, + "loss": 1.0658, + "step": 14030 + }, + { + "epoch": 0.555280903320216, + "grad_norm": 1.1064300422894056, + "learning_rate": 9.781075806076587e-06, + "loss": 1.0517, + "step": 14040 + }, + { + "epoch": 0.555676402539105, + "grad_norm": 0.9499336876699358, + "learning_rate": 9.780401880448825e-06, + "loss": 1.0456, + "step": 14050 + }, + { + "epoch": 0.556071901757994, + "grad_norm": 0.9624555898317938, + "learning_rate": 9.779726942413844e-06, + "loss": 1.0625, + "step": 14060 + }, + { + "epoch": 0.5564674009768831, + "grad_norm": 0.9436090058305142, + "learning_rate": 9.779050992114583e-06, + "loss": 1.0696, + "step": 14070 + }, + { + "epoch": 0.5568629001957721, + "grad_norm": 0.9584763365172448, + "learning_rate": 9.778374029694197e-06, + "loss": 1.0516, + "step": 14080 + }, + { + "epoch": 0.5572583994146612, + "grad_norm": 0.964007302187305, + "learning_rate": 9.777696055296058e-06, + "loss": 1.0744, + "step": 14090 + }, + { + "epoch": 0.5576538986335502, + "grad_norm": 0.9322258697955476, + "learning_rate": 9.777017069063744e-06, + "loss": 1.0612, + "step": 14100 + }, + { + "epoch": 0.5580493978524392, + "grad_norm": 1.0084551723697883, + "learning_rate": 9.776337071141058e-06, + "loss": 1.0582, + "step": 14110 + }, + { + "epoch": 0.5584448970713283, + "grad_norm": 1.065298363827714, + "learning_rate": 9.775656061672008e-06, + "loss": 1.0799, + "step": 14120 + }, + { + "epoch": 0.5588403962902173, + "grad_norm": 1.0439123906506067, + "learning_rate": 9.774974040800822e-06, + "loss": 1.0602, + "step": 14130 + }, + { + "epoch": 0.5592358955091064, + "grad_norm": 1.0885594656958613, + "learning_rate": 9.77429100867194e-06, + "loss": 1.0682, + "step": 14140 + }, + { + "epoch": 0.5596313947279954, + "grad_norm": 1.0707397259869602, + "learning_rate": 9.773606965430015e-06, + "loss": 1.0678, + "step": 14150 + }, + { + "epoch": 0.5600268939468844, + "grad_norm": 1.044888408792444, + "learning_rate": 9.772921911219918e-06, + "loss": 1.069, + "step": 14160 + }, + { + "epoch": 0.5604223931657735, + "grad_norm": 0.9941824802702044, + "learning_rate": 9.772235846186731e-06, + "loss": 1.065, + "step": 14170 + }, + { + "epoch": 0.5608178923846625, + "grad_norm": 1.0578392031701096, + "learning_rate": 9.77154877047575e-06, + "loss": 1.0436, + "step": 14180 + }, + { + "epoch": 0.5612133916035515, + "grad_norm": 1.0463928978967134, + "learning_rate": 9.770860684232489e-06, + "loss": 1.0538, + "step": 14190 + }, + { + "epoch": 0.5616088908224406, + "grad_norm": 1.0104383891446775, + "learning_rate": 9.770171587602667e-06, + "loss": 1.0683, + "step": 14200 + }, + { + "epoch": 0.5620043900413296, + "grad_norm": 0.9270930435001443, + "learning_rate": 9.769481480732228e-06, + "loss": 1.059, + "step": 14210 + }, + { + "epoch": 0.5623998892602187, + "grad_norm": 1.0064178890569941, + "learning_rate": 9.768790363767321e-06, + "loss": 1.0541, + "step": 14220 + }, + { + "epoch": 0.5627953884791077, + "grad_norm": 0.950426170647676, + "learning_rate": 9.768098236854317e-06, + "loss": 1.0895, + "step": 14230 + }, + { + "epoch": 0.5631908876979967, + "grad_norm": 1.1040013044632542, + "learning_rate": 9.767405100139795e-06, + "loss": 1.0723, + "step": 14240 + }, + { + "epoch": 0.5635863869168858, + "grad_norm": 1.1705841153079175, + "learning_rate": 9.766710953770547e-06, + "loss": 1.0472, + "step": 14250 + }, + { + "epoch": 0.5639818861357749, + "grad_norm": 0.9712752067057575, + "learning_rate": 9.766015797893585e-06, + "loss": 1.0745, + "step": 14260 + }, + { + "epoch": 0.564377385354664, + "grad_norm": 0.9569803366536023, + "learning_rate": 9.765319632656127e-06, + "loss": 1.0667, + "step": 14270 + }, + { + "epoch": 0.564772884573553, + "grad_norm": 1.0538959133057306, + "learning_rate": 9.764622458205613e-06, + "loss": 1.0803, + "step": 14280 + }, + { + "epoch": 0.565168383792442, + "grad_norm": 1.0279622216414128, + "learning_rate": 9.76392427468969e-06, + "loss": 1.0537, + "step": 14290 + }, + { + "epoch": 0.5655638830113311, + "grad_norm": 1.1047931819810026, + "learning_rate": 9.763225082256222e-06, + "loss": 1.0642, + "step": 14300 + }, + { + "epoch": 0.5659593822302201, + "grad_norm": 1.094562900905085, + "learning_rate": 9.762524881053286e-06, + "loss": 1.0645, + "step": 14310 + }, + { + "epoch": 0.5663548814491092, + "grad_norm": 1.2120506375073645, + "learning_rate": 9.761823671229174e-06, + "loss": 1.0855, + "step": 14320 + }, + { + "epoch": 0.5667503806679982, + "grad_norm": 1.0465353063688134, + "learning_rate": 9.761121452932388e-06, + "loss": 1.0594, + "step": 14330 + }, + { + "epoch": 0.5671458798868872, + "grad_norm": 1.0059219324897901, + "learning_rate": 9.760418226311645e-06, + "loss": 1.0612, + "step": 14340 + }, + { + "epoch": 0.5675413791057763, + "grad_norm": 1.0816828350475822, + "learning_rate": 9.75971399151588e-06, + "loss": 1.0629, + "step": 14350 + }, + { + "epoch": 0.5679368783246653, + "grad_norm": 1.0336399381857253, + "learning_rate": 9.759008748694236e-06, + "loss": 1.047, + "step": 14360 + }, + { + "epoch": 0.5683323775435544, + "grad_norm": 0.9435486631404181, + "learning_rate": 9.75830249799607e-06, + "loss": 1.0451, + "step": 14370 + }, + { + "epoch": 0.5687278767624434, + "grad_norm": 1.0058353522521832, + "learning_rate": 9.757595239570956e-06, + "loss": 1.0663, + "step": 14380 + }, + { + "epoch": 0.5691233759813324, + "grad_norm": 1.0917494701626327, + "learning_rate": 9.756886973568678e-06, + "loss": 1.0628, + "step": 14390 + }, + { + "epoch": 0.5695188752002215, + "grad_norm": 0.9750835519172512, + "learning_rate": 9.756177700139235e-06, + "loss": 1.0648, + "step": 14400 + }, + { + "epoch": 0.5699143744191105, + "grad_norm": 0.979520248566433, + "learning_rate": 9.75546741943284e-06, + "loss": 1.0469, + "step": 14410 + }, + { + "epoch": 0.5703098736379996, + "grad_norm": 1.065799273200916, + "learning_rate": 9.754756131599916e-06, + "loss": 1.0796, + "step": 14420 + }, + { + "epoch": 0.5707053728568886, + "grad_norm": 1.1191241359780404, + "learning_rate": 9.754043836791102e-06, + "loss": 1.0693, + "step": 14430 + }, + { + "epoch": 0.5711008720757776, + "grad_norm": 1.1901337483605807, + "learning_rate": 9.753330535157251e-06, + "loss": 1.0558, + "step": 14440 + }, + { + "epoch": 0.5714963712946667, + "grad_norm": 1.0271929833606757, + "learning_rate": 9.752616226849429e-06, + "loss": 1.0498, + "step": 14450 + }, + { + "epoch": 0.5718918705135557, + "grad_norm": 0.9220748471813301, + "learning_rate": 9.751900912018911e-06, + "loss": 1.076, + "step": 14460 + }, + { + "epoch": 0.5722873697324448, + "grad_norm": 1.0016342467326624, + "learning_rate": 9.75118459081719e-06, + "loss": 1.081, + "step": 14470 + }, + { + "epoch": 0.5726828689513338, + "grad_norm": 0.9945342400947232, + "learning_rate": 9.750467263395973e-06, + "loss": 1.0581, + "step": 14480 + }, + { + "epoch": 0.5730783681702228, + "grad_norm": 1.0776017816556556, + "learning_rate": 9.749748929907175e-06, + "loss": 1.0552, + "step": 14490 + }, + { + "epoch": 0.5734738673891119, + "grad_norm": 1.0570637214839262, + "learning_rate": 9.749029590502926e-06, + "loss": 1.0491, + "step": 14500 + }, + { + "epoch": 0.5738693666080009, + "grad_norm": 1.048554258133626, + "learning_rate": 9.748309245335572e-06, + "loss": 1.0664, + "step": 14510 + }, + { + "epoch": 0.57426486582689, + "grad_norm": 1.0277972902139945, + "learning_rate": 9.747587894557668e-06, + "loss": 1.0731, + "step": 14520 + }, + { + "epoch": 0.574660365045779, + "grad_norm": 1.0466018342543084, + "learning_rate": 9.746865538321985e-06, + "loss": 1.0622, + "step": 14530 + }, + { + "epoch": 0.575055864264668, + "grad_norm": 1.0700970016881668, + "learning_rate": 9.746142176781505e-06, + "loss": 1.0761, + "step": 14540 + }, + { + "epoch": 0.5754513634835571, + "grad_norm": 1.0542186557957431, + "learning_rate": 9.745417810089424e-06, + "loss": 1.0603, + "step": 14550 + }, + { + "epoch": 0.5758468627024461, + "grad_norm": 1.0097740192347697, + "learning_rate": 9.74469243839915e-06, + "loss": 1.0478, + "step": 14560 + }, + { + "epoch": 0.5762423619213352, + "grad_norm": 1.0658781968262367, + "learning_rate": 9.743966061864305e-06, + "loss": 1.0635, + "step": 14570 + }, + { + "epoch": 0.5766378611402242, + "grad_norm": 1.04669305300436, + "learning_rate": 9.743238680638723e-06, + "loss": 1.0496, + "step": 14580 + }, + { + "epoch": 0.5770333603591133, + "grad_norm": 1.0859311684312374, + "learning_rate": 9.742510294876448e-06, + "loss": 1.0651, + "step": 14590 + }, + { + "epoch": 0.5774288595780024, + "grad_norm": 1.0365819330251662, + "learning_rate": 9.741780904731745e-06, + "loss": 1.0693, + "step": 14600 + }, + { + "epoch": 0.5778243587968914, + "grad_norm": 1.0669536495448684, + "learning_rate": 9.741050510359083e-06, + "loss": 1.0691, + "step": 14610 + }, + { + "epoch": 0.5782198580157805, + "grad_norm": 1.0259895326595243, + "learning_rate": 9.740319111913147e-06, + "loss": 1.0801, + "step": 14620 + }, + { + "epoch": 0.5786153572346695, + "grad_norm": 0.9263003103008343, + "learning_rate": 9.739586709548833e-06, + "loss": 1.0552, + "step": 14630 + }, + { + "epoch": 0.5790108564535585, + "grad_norm": 1.0363470442134415, + "learning_rate": 9.738853303421256e-06, + "loss": 1.0681, + "step": 14640 + }, + { + "epoch": 0.5794063556724476, + "grad_norm": 0.9658290966503398, + "learning_rate": 9.738118893685737e-06, + "loss": 1.062, + "step": 14650 + }, + { + "epoch": 0.5798018548913366, + "grad_norm": 1.0533008924353124, + "learning_rate": 9.73738348049781e-06, + "loss": 1.0508, + "step": 14660 + }, + { + "epoch": 0.5801973541102257, + "grad_norm": 0.9851834646191139, + "learning_rate": 9.736647064013222e-06, + "loss": 1.0586, + "step": 14670 + }, + { + "epoch": 0.5805928533291147, + "grad_norm": 1.009778085247017, + "learning_rate": 9.735909644387935e-06, + "loss": 1.0615, + "step": 14680 + }, + { + "epoch": 0.5809883525480037, + "grad_norm": 1.0037987651638043, + "learning_rate": 9.735171221778124e-06, + "loss": 1.0615, + "step": 14690 + }, + { + "epoch": 0.5813838517668928, + "grad_norm": 1.0372469593333695, + "learning_rate": 9.73443179634017e-06, + "loss": 1.082, + "step": 14700 + }, + { + "epoch": 0.5817793509857818, + "grad_norm": 1.0512479846633207, + "learning_rate": 9.733691368230674e-06, + "loss": 1.0425, + "step": 14710 + }, + { + "epoch": 0.5821748502046709, + "grad_norm": 1.0047544364879402, + "learning_rate": 9.732949937606443e-06, + "loss": 1.0559, + "step": 14720 + }, + { + "epoch": 0.5825703494235599, + "grad_norm": 1.059647319417386, + "learning_rate": 9.732207504624502e-06, + "loss": 1.0492, + "step": 14730 + }, + { + "epoch": 0.5829658486424489, + "grad_norm": 1.0417221530055403, + "learning_rate": 9.731464069442085e-06, + "loss": 1.0594, + "step": 14740 + }, + { + "epoch": 0.583361347861338, + "grad_norm": 1.0665569096635807, + "learning_rate": 9.730719632216639e-06, + "loss": 1.0523, + "step": 14750 + }, + { + "epoch": 0.583756847080227, + "grad_norm": 1.0608520584836654, + "learning_rate": 9.729974193105821e-06, + "loss": 1.0702, + "step": 14760 + }, + { + "epoch": 0.584152346299116, + "grad_norm": 0.994228321768493, + "learning_rate": 9.729227752267505e-06, + "loss": 1.0664, + "step": 14770 + }, + { + "epoch": 0.5845478455180051, + "grad_norm": 1.0294958669837573, + "learning_rate": 9.728480309859772e-06, + "loss": 1.0598, + "step": 14780 + }, + { + "epoch": 0.5849433447368941, + "grad_norm": 1.0246580273768906, + "learning_rate": 9.727731866040919e-06, + "loss": 1.0885, + "step": 14790 + }, + { + "epoch": 0.5853388439557832, + "grad_norm": 1.000945043460481, + "learning_rate": 9.726982420969453e-06, + "loss": 1.0444, + "step": 14800 + }, + { + "epoch": 0.5857343431746722, + "grad_norm": 1.048116849601965, + "learning_rate": 9.726231974804095e-06, + "loss": 1.0578, + "step": 14810 + }, + { + "epoch": 0.5861298423935613, + "grad_norm": 0.9969029486415593, + "learning_rate": 9.725480527703775e-06, + "loss": 1.0746, + "step": 14820 + }, + { + "epoch": 0.5865253416124503, + "grad_norm": 1.1077102508394456, + "learning_rate": 9.724728079827636e-06, + "loss": 1.0643, + "step": 14830 + }, + { + "epoch": 0.5869208408313393, + "grad_norm": 0.9665468573186794, + "learning_rate": 9.723974631335036e-06, + "loss": 1.0538, + "step": 14840 + }, + { + "epoch": 0.5873163400502284, + "grad_norm": 1.0920165976368272, + "learning_rate": 9.72322018238554e-06, + "loss": 1.0557, + "step": 14850 + }, + { + "epoch": 0.5877118392691174, + "grad_norm": 1.0406138810349053, + "learning_rate": 9.722464733138929e-06, + "loss": 1.0533, + "step": 14860 + }, + { + "epoch": 0.5881073384880064, + "grad_norm": 0.985438850282144, + "learning_rate": 9.721708283755193e-06, + "loss": 1.0551, + "step": 14870 + }, + { + "epoch": 0.5885028377068955, + "grad_norm": 0.942142456971354, + "learning_rate": 9.720950834394535e-06, + "loss": 1.0613, + "step": 14880 + }, + { + "epoch": 0.5888983369257845, + "grad_norm": 1.0650758372360574, + "learning_rate": 9.720192385217373e-06, + "loss": 1.0735, + "step": 14890 + }, + { + "epoch": 0.5892938361446736, + "grad_norm": 0.9986753318280494, + "learning_rate": 9.71943293638433e-06, + "loss": 1.057, + "step": 14900 + }, + { + "epoch": 0.5896893353635626, + "grad_norm": 0.9695326569791033, + "learning_rate": 9.718672488056245e-06, + "loss": 1.0681, + "step": 14910 + }, + { + "epoch": 0.5900848345824516, + "grad_norm": 0.981208355486021, + "learning_rate": 9.717911040394168e-06, + "loss": 1.0614, + "step": 14920 + }, + { + "epoch": 0.5904803338013408, + "grad_norm": 1.0830526413000467, + "learning_rate": 9.717148593559361e-06, + "loss": 1.0409, + "step": 14930 + }, + { + "epoch": 0.5908758330202298, + "grad_norm": 1.030443542194117, + "learning_rate": 9.716385147713296e-06, + "loss": 1.0527, + "step": 14940 + }, + { + "epoch": 0.5912713322391189, + "grad_norm": 0.9855361413370963, + "learning_rate": 9.715620703017662e-06, + "loss": 1.0646, + "step": 14950 + }, + { + "epoch": 0.5916668314580079, + "grad_norm": 0.9036056328509712, + "learning_rate": 9.714855259634348e-06, + "loss": 1.0544, + "step": 14960 + }, + { + "epoch": 0.592062330676897, + "grad_norm": 1.1764351894929956, + "learning_rate": 9.71408881772547e-06, + "loss": 1.0483, + "step": 14970 + }, + { + "epoch": 0.592457829895786, + "grad_norm": 1.0102986546707016, + "learning_rate": 9.71332137745334e-06, + "loss": 1.0631, + "step": 14980 + }, + { + "epoch": 0.592853329114675, + "grad_norm": 0.977979001140588, + "learning_rate": 9.71255293898049e-06, + "loss": 1.0519, + "step": 14990 + }, + { + "epoch": 0.5932488283335641, + "grad_norm": 0.9904635712879952, + "learning_rate": 9.711783502469667e-06, + "loss": 1.057, + "step": 15000 + }, + { + "epoch": 0.5936443275524531, + "grad_norm": 1.0094491684538063, + "learning_rate": 9.711013068083822e-06, + "loss": 1.0558, + "step": 15010 + }, + { + "epoch": 0.5940398267713421, + "grad_norm": 0.9579777214000441, + "learning_rate": 9.710241635986118e-06, + "loss": 1.0507, + "step": 15020 + }, + { + "epoch": 0.5944353259902312, + "grad_norm": 1.026933614967618, + "learning_rate": 9.70946920633993e-06, + "loss": 1.0542, + "step": 15030 + }, + { + "epoch": 0.5948308252091202, + "grad_norm": 1.0228934255346838, + "learning_rate": 9.70869577930885e-06, + "loss": 1.0458, + "step": 15040 + }, + { + "epoch": 0.5952263244280093, + "grad_norm": 1.0487346580375194, + "learning_rate": 9.707921355056674e-06, + "loss": 1.0539, + "step": 15050 + }, + { + "epoch": 0.5956218236468983, + "grad_norm": 0.9507838990940268, + "learning_rate": 9.707145933747412e-06, + "loss": 1.0519, + "step": 15060 + }, + { + "epoch": 0.5960173228657873, + "grad_norm": 1.0200610160859969, + "learning_rate": 9.706369515545285e-06, + "loss": 1.036, + "step": 15070 + }, + { + "epoch": 0.5964128220846764, + "grad_norm": 0.9810991452812137, + "learning_rate": 9.705592100614724e-06, + "loss": 1.0584, + "step": 15080 + }, + { + "epoch": 0.5968083213035654, + "grad_norm": 1.0084529514505371, + "learning_rate": 9.704813689120374e-06, + "loss": 1.0555, + "step": 15090 + }, + { + "epoch": 0.5972038205224545, + "grad_norm": 1.232769552998101, + "learning_rate": 9.704034281227089e-06, + "loss": 1.0543, + "step": 15100 + }, + { + "epoch": 0.5975993197413435, + "grad_norm": 1.0423027039004151, + "learning_rate": 9.703253877099932e-06, + "loss": 1.0576, + "step": 15110 + }, + { + "epoch": 0.5979948189602325, + "grad_norm": 1.071786517794014, + "learning_rate": 9.702472476904183e-06, + "loss": 1.0414, + "step": 15120 + }, + { + "epoch": 0.5983903181791216, + "grad_norm": 0.9894882961863555, + "learning_rate": 9.701690080805325e-06, + "loss": 1.0532, + "step": 15130 + }, + { + "epoch": 0.5987858173980106, + "grad_norm": 1.0409108316873947, + "learning_rate": 9.70090668896906e-06, + "loss": 1.0551, + "step": 15140 + }, + { + "epoch": 0.5991813166168997, + "grad_norm": 0.9896086036179519, + "learning_rate": 9.700122301561294e-06, + "loss": 1.0563, + "step": 15150 + }, + { + "epoch": 0.5995768158357887, + "grad_norm": 1.0695496378107894, + "learning_rate": 9.699336918748148e-06, + "loss": 1.0412, + "step": 15160 + }, + { + "epoch": 0.5999723150546777, + "grad_norm": 1.0684644238315584, + "learning_rate": 9.698550540695952e-06, + "loss": 1.0342, + "step": 15170 + }, + { + "epoch": 0.6003678142735668, + "grad_norm": 0.9932752202379643, + "learning_rate": 9.69776316757125e-06, + "loss": 1.044, + "step": 15180 + }, + { + "epoch": 0.6007633134924558, + "grad_norm": 0.9335087742220949, + "learning_rate": 9.69697479954079e-06, + "loss": 1.065, + "step": 15190 + }, + { + "epoch": 0.6011588127113449, + "grad_norm": 0.9471058124439672, + "learning_rate": 9.696185436771537e-06, + "loss": 1.051, + "step": 15200 + }, + { + "epoch": 0.6015543119302339, + "grad_norm": 1.1107762981191798, + "learning_rate": 9.695395079430665e-06, + "loss": 1.0739, + "step": 15210 + }, + { + "epoch": 0.6019498111491229, + "grad_norm": 1.0135798265705005, + "learning_rate": 9.694603727685558e-06, + "loss": 1.0464, + "step": 15220 + }, + { + "epoch": 0.602345310368012, + "grad_norm": 1.0994180124371509, + "learning_rate": 9.69381138170381e-06, + "loss": 1.0383, + "step": 15230 + }, + { + "epoch": 0.602740809586901, + "grad_norm": 1.0086090208781033, + "learning_rate": 9.693018041653226e-06, + "loss": 1.0597, + "step": 15240 + }, + { + "epoch": 0.6031363088057901, + "grad_norm": 0.9852983261676425, + "learning_rate": 9.692223707701823e-06, + "loss": 1.0536, + "step": 15250 + }, + { + "epoch": 0.6035318080246792, + "grad_norm": 0.9596718925605977, + "learning_rate": 9.691428380017827e-06, + "loss": 1.0365, + "step": 15260 + }, + { + "epoch": 0.6039273072435682, + "grad_norm": 1.0507673134614033, + "learning_rate": 9.690632058769673e-06, + "loss": 1.0461, + "step": 15270 + }, + { + "epoch": 0.6043228064624573, + "grad_norm": 1.1726382182933972, + "learning_rate": 9.689834744126013e-06, + "loss": 1.0563, + "step": 15280 + }, + { + "epoch": 0.6047183056813463, + "grad_norm": 1.035216247921188, + "learning_rate": 9.689036436255698e-06, + "loss": 1.0551, + "step": 15290 + }, + { + "epoch": 0.6051138049002354, + "grad_norm": 1.0455376045216924, + "learning_rate": 9.6882371353278e-06, + "loss": 1.0523, + "step": 15300 + }, + { + "epoch": 0.6055093041191244, + "grad_norm": 0.9260890212225377, + "learning_rate": 9.687436841511598e-06, + "loss": 1.0386, + "step": 15310 + }, + { + "epoch": 0.6059048033380134, + "grad_norm": 1.0409545590750842, + "learning_rate": 9.686635554976577e-06, + "loss": 1.0686, + "step": 15320 + }, + { + "epoch": 0.6063003025569025, + "grad_norm": 1.0109137515069873, + "learning_rate": 9.685833275892441e-06, + "loss": 1.0684, + "step": 15330 + }, + { + "epoch": 0.6066958017757915, + "grad_norm": 1.0402733994453395, + "learning_rate": 9.685030004429093e-06, + "loss": 1.0603, + "step": 15340 + }, + { + "epoch": 0.6070913009946806, + "grad_norm": 1.0978306075614528, + "learning_rate": 9.684225740756655e-06, + "loss": 1.046, + "step": 15350 + }, + { + "epoch": 0.6074868002135696, + "grad_norm": 1.0396316491266315, + "learning_rate": 9.683420485045458e-06, + "loss": 1.0312, + "step": 15360 + }, + { + "epoch": 0.6078822994324586, + "grad_norm": 1.0253989712894895, + "learning_rate": 9.682614237466037e-06, + "loss": 1.076, + "step": 15370 + }, + { + "epoch": 0.6082777986513477, + "grad_norm": 1.072807339417918, + "learning_rate": 9.681806998189145e-06, + "loss": 1.0611, + "step": 15380 + }, + { + "epoch": 0.6086732978702367, + "grad_norm": 1.0045632014713812, + "learning_rate": 9.68099876738574e-06, + "loss": 1.0589, + "step": 15390 + }, + { + "epoch": 0.6090687970891258, + "grad_norm": 1.0831603732613064, + "learning_rate": 9.680189545226993e-06, + "loss": 1.0382, + "step": 15400 + }, + { + "epoch": 0.6094642963080148, + "grad_norm": 1.041366522961604, + "learning_rate": 9.679379331884282e-06, + "loss": 1.0433, + "step": 15410 + }, + { + "epoch": 0.6098597955269038, + "grad_norm": 0.9781163961992932, + "learning_rate": 9.678568127529196e-06, + "loss": 1.046, + "step": 15420 + }, + { + "epoch": 0.6102552947457929, + "grad_norm": 1.1066566274300034, + "learning_rate": 9.677755932333535e-06, + "loss": 1.0579, + "step": 15430 + }, + { + "epoch": 0.6106507939646819, + "grad_norm": 1.0437910028356834, + "learning_rate": 9.676942746469308e-06, + "loss": 1.0518, + "step": 15440 + }, + { + "epoch": 0.611046293183571, + "grad_norm": 0.9342493840171474, + "learning_rate": 9.676128570108732e-06, + "loss": 1.0265, + "step": 15450 + }, + { + "epoch": 0.61144179240246, + "grad_norm": 1.1150442198080608, + "learning_rate": 9.67531340342424e-06, + "loss": 1.057, + "step": 15460 + }, + { + "epoch": 0.611837291621349, + "grad_norm": 1.0510810001841868, + "learning_rate": 9.674497246588464e-06, + "loss": 1.0262, + "step": 15470 + }, + { + "epoch": 0.6122327908402381, + "grad_norm": 1.07587190794317, + "learning_rate": 9.67368009977426e-06, + "loss": 1.0686, + "step": 15480 + }, + { + "epoch": 0.6126282900591271, + "grad_norm": 1.0719162711619243, + "learning_rate": 9.672861963154676e-06, + "loss": 1.0535, + "step": 15490 + }, + { + "epoch": 0.6130237892780162, + "grad_norm": 1.097635840680599, + "learning_rate": 9.672042836902989e-06, + "loss": 1.0575, + "step": 15500 + }, + { + "epoch": 0.6134192884969052, + "grad_norm": 0.9399799153538689, + "learning_rate": 9.671222721192667e-06, + "loss": 1.0388, + "step": 15510 + }, + { + "epoch": 0.6138147877157942, + "grad_norm": 1.0576549713820989, + "learning_rate": 9.670401616197405e-06, + "loss": 1.0558, + "step": 15520 + }, + { + "epoch": 0.6142102869346833, + "grad_norm": 0.984195688053569, + "learning_rate": 9.669579522091092e-06, + "loss": 1.0526, + "step": 15530 + }, + { + "epoch": 0.6146057861535723, + "grad_norm": 1.084314040671667, + "learning_rate": 9.66875643904784e-06, + "loss": 1.0453, + "step": 15540 + }, + { + "epoch": 0.6150012853724613, + "grad_norm": 0.8960575809026324, + "learning_rate": 9.667932367241956e-06, + "loss": 1.0423, + "step": 15550 + }, + { + "epoch": 0.6153967845913504, + "grad_norm": 1.0143928544253158, + "learning_rate": 9.667107306847972e-06, + "loss": 1.0317, + "step": 15560 + }, + { + "epoch": 0.6157922838102394, + "grad_norm": 1.0832845046931834, + "learning_rate": 9.666281258040616e-06, + "loss": 1.0371, + "step": 15570 + }, + { + "epoch": 0.6161877830291285, + "grad_norm": 1.05790424790035, + "learning_rate": 9.665454220994835e-06, + "loss": 1.064, + "step": 15580 + }, + { + "epoch": 0.6165832822480175, + "grad_norm": 1.0221071763048168, + "learning_rate": 9.664626195885778e-06, + "loss": 1.0299, + "step": 15590 + }, + { + "epoch": 0.6169787814669067, + "grad_norm": 1.0297733962805455, + "learning_rate": 9.663797182888808e-06, + "loss": 1.0679, + "step": 15600 + }, + { + "epoch": 0.6173742806857957, + "grad_norm": 1.0087097348831306, + "learning_rate": 9.662967182179496e-06, + "loss": 1.0576, + "step": 15610 + }, + { + "epoch": 0.6177697799046847, + "grad_norm": 0.9769581711431045, + "learning_rate": 9.662136193933621e-06, + "loss": 1.0482, + "step": 15620 + }, + { + "epoch": 0.6181652791235738, + "grad_norm": 0.9336445849395211, + "learning_rate": 9.661304218327175e-06, + "loss": 1.0424, + "step": 15630 + }, + { + "epoch": 0.6185607783424628, + "grad_norm": 0.9897697543891534, + "learning_rate": 9.660471255536351e-06, + "loss": 1.0499, + "step": 15640 + }, + { + "epoch": 0.6189562775613519, + "grad_norm": 1.032804884253202, + "learning_rate": 9.65963730573756e-06, + "loss": 1.0512, + "step": 15650 + }, + { + "epoch": 0.6193517767802409, + "grad_norm": 1.099633749274004, + "learning_rate": 9.65880236910742e-06, + "loss": 1.0676, + "step": 15660 + }, + { + "epoch": 0.6197472759991299, + "grad_norm": 1.0084198908906983, + "learning_rate": 9.65796644582275e-06, + "loss": 1.0656, + "step": 15670 + }, + { + "epoch": 0.620142775218019, + "grad_norm": 0.9732233670313296, + "learning_rate": 9.65712953606059e-06, + "loss": 1.0412, + "step": 15680 + }, + { + "epoch": 0.620538274436908, + "grad_norm": 1.038136679894662, + "learning_rate": 9.65629163999818e-06, + "loss": 1.0384, + "step": 15690 + }, + { + "epoch": 0.620933773655797, + "grad_norm": 1.1081802595384365, + "learning_rate": 9.655452757812973e-06, + "loss": 1.0491, + "step": 15700 + }, + { + "epoch": 0.6213292728746861, + "grad_norm": 1.0638382984713737, + "learning_rate": 9.65461288968263e-06, + "loss": 1.0499, + "step": 15710 + }, + { + "epoch": 0.6217247720935751, + "grad_norm": 1.0214782133966622, + "learning_rate": 9.653772035785023e-06, + "loss": 1.0634, + "step": 15720 + }, + { + "epoch": 0.6221202713124642, + "grad_norm": 1.012611138352532, + "learning_rate": 9.652930196298226e-06, + "loss": 1.0578, + "step": 15730 + }, + { + "epoch": 0.6225157705313532, + "grad_norm": 0.8993044991401044, + "learning_rate": 9.65208737140053e-06, + "loss": 1.043, + "step": 15740 + }, + { + "epoch": 0.6229112697502422, + "grad_norm": 1.0319283157964163, + "learning_rate": 9.651243561270427e-06, + "loss": 1.0505, + "step": 15750 + }, + { + "epoch": 0.6233067689691313, + "grad_norm": 1.032345416509229, + "learning_rate": 9.650398766086624e-06, + "loss": 1.0415, + "step": 15760 + }, + { + "epoch": 0.6237022681880203, + "grad_norm": 1.0426650875127512, + "learning_rate": 9.649552986028035e-06, + "loss": 1.0532, + "step": 15770 + }, + { + "epoch": 0.6240977674069094, + "grad_norm": 0.9625948990820244, + "learning_rate": 9.64870622127378e-06, + "loss": 1.0467, + "step": 15780 + }, + { + "epoch": 0.6244932666257984, + "grad_norm": 0.9608687302340226, + "learning_rate": 9.64785847200319e-06, + "loss": 1.0649, + "step": 15790 + }, + { + "epoch": 0.6248887658446874, + "grad_norm": 1.0803737059788474, + "learning_rate": 9.647009738395804e-06, + "loss": 1.0502, + "step": 15800 + }, + { + "epoch": 0.6252842650635765, + "grad_norm": 1.0114238021072945, + "learning_rate": 9.646160020631368e-06, + "loss": 1.0232, + "step": 15810 + }, + { + "epoch": 0.6256797642824655, + "grad_norm": 1.0317233012910534, + "learning_rate": 9.64530931888984e-06, + "loss": 1.0721, + "step": 15820 + }, + { + "epoch": 0.6260752635013546, + "grad_norm": 1.0238292035055239, + "learning_rate": 9.644457633351381e-06, + "loss": 1.0485, + "step": 15830 + }, + { + "epoch": 0.6264707627202436, + "grad_norm": 0.9969319682162873, + "learning_rate": 9.643604964196365e-06, + "loss": 1.0682, + "step": 15840 + }, + { + "epoch": 0.6268662619391326, + "grad_norm": 1.0217425046667716, + "learning_rate": 9.642751311605374e-06, + "loss": 1.0454, + "step": 15850 + }, + { + "epoch": 0.6272617611580217, + "grad_norm": 1.0961234155411588, + "learning_rate": 9.641896675759195e-06, + "loss": 1.0327, + "step": 15860 + }, + { + "epoch": 0.6276572603769107, + "grad_norm": 1.1694052823840964, + "learning_rate": 9.641041056838826e-06, + "loss": 1.0332, + "step": 15870 + }, + { + "epoch": 0.6280527595957998, + "grad_norm": 0.9475213666808329, + "learning_rate": 9.640184455025472e-06, + "loss": 1.0481, + "step": 15880 + }, + { + "epoch": 0.6284482588146888, + "grad_norm": 1.040034342977265, + "learning_rate": 9.639326870500548e-06, + "loss": 1.0691, + "step": 15890 + }, + { + "epoch": 0.6288437580335778, + "grad_norm": 0.9950895716174126, + "learning_rate": 9.638468303445672e-06, + "loss": 1.0389, + "step": 15900 + }, + { + "epoch": 0.6292392572524669, + "grad_norm": 1.0371939192552084, + "learning_rate": 9.63760875404268e-06, + "loss": 1.0547, + "step": 15910 + }, + { + "epoch": 0.6296347564713559, + "grad_norm": 1.0533304795531735, + "learning_rate": 9.636748222473603e-06, + "loss": 1.0313, + "step": 15920 + }, + { + "epoch": 0.6300302556902451, + "grad_norm": 1.0260420868800268, + "learning_rate": 9.635886708920692e-06, + "loss": 1.0554, + "step": 15930 + }, + { + "epoch": 0.6304257549091341, + "grad_norm": 1.003458445035818, + "learning_rate": 9.635024213566399e-06, + "loss": 1.0469, + "step": 15940 + }, + { + "epoch": 0.6308212541280231, + "grad_norm": 1.0051505810602934, + "learning_rate": 9.634160736593385e-06, + "loss": 1.0326, + "step": 15950 + }, + { + "epoch": 0.6312167533469122, + "grad_norm": 1.0415000505886955, + "learning_rate": 9.633296278184521e-06, + "loss": 1.0459, + "step": 15960 + }, + { + "epoch": 0.6316122525658012, + "grad_norm": 1.066981934396122, + "learning_rate": 9.632430838522883e-06, + "loss": 1.0509, + "step": 15970 + }, + { + "epoch": 0.6320077517846903, + "grad_norm": 1.065692254459778, + "learning_rate": 9.631564417791758e-06, + "loss": 1.0424, + "step": 15980 + }, + { + "epoch": 0.6324032510035793, + "grad_norm": 0.9537518530300072, + "learning_rate": 9.630697016174637e-06, + "loss": 1.0395, + "step": 15990 + }, + { + "epoch": 0.6327987502224683, + "grad_norm": 0.9796569512744613, + "learning_rate": 9.629828633855223e-06, + "loss": 1.0698, + "step": 16000 + }, + { + "epoch": 0.6331942494413574, + "grad_norm": 1.0188668524559308, + "learning_rate": 9.628959271017424e-06, + "loss": 1.0451, + "step": 16010 + }, + { + "epoch": 0.6335897486602464, + "grad_norm": 1.0699027224160875, + "learning_rate": 9.628088927845354e-06, + "loss": 1.0577, + "step": 16020 + }, + { + "epoch": 0.6339852478791355, + "grad_norm": 1.0219284183748003, + "learning_rate": 9.627217604523338e-06, + "loss": 1.0667, + "step": 16030 + }, + { + "epoch": 0.6343807470980245, + "grad_norm": 1.0872981572529605, + "learning_rate": 9.62634530123591e-06, + "loss": 1.0473, + "step": 16040 + }, + { + "epoch": 0.6347762463169135, + "grad_norm": 0.9700775524587388, + "learning_rate": 9.625472018167804e-06, + "loss": 1.0593, + "step": 16050 + }, + { + "epoch": 0.6351717455358026, + "grad_norm": 1.0837049158295895, + "learning_rate": 9.62459775550397e-06, + "loss": 1.0609, + "step": 16060 + }, + { + "epoch": 0.6355672447546916, + "grad_norm": 1.0506032682878106, + "learning_rate": 9.623722513429562e-06, + "loss": 1.0415, + "step": 16070 + }, + { + "epoch": 0.6359627439735807, + "grad_norm": 1.179324388814712, + "learning_rate": 9.62284629212994e-06, + "loss": 1.0162, + "step": 16080 + }, + { + "epoch": 0.6363582431924697, + "grad_norm": 1.036813566203232, + "learning_rate": 9.621969091790672e-06, + "loss": 1.0634, + "step": 16090 + }, + { + "epoch": 0.6367537424113587, + "grad_norm": 1.0528915136774362, + "learning_rate": 9.621090912597535e-06, + "loss": 1.0428, + "step": 16100 + }, + { + "epoch": 0.6371492416302478, + "grad_norm": 1.0214701916658446, + "learning_rate": 9.620211754736512e-06, + "loss": 1.0527, + "step": 16110 + }, + { + "epoch": 0.6375447408491368, + "grad_norm": 1.0035423295669172, + "learning_rate": 9.619331618393794e-06, + "loss": 1.0373, + "step": 16120 + }, + { + "epoch": 0.6379402400680259, + "grad_norm": 1.120083020024742, + "learning_rate": 9.618450503755779e-06, + "loss": 1.0591, + "step": 16130 + }, + { + "epoch": 0.6383357392869149, + "grad_norm": 1.0010025289837234, + "learning_rate": 9.61756841100907e-06, + "loss": 1.044, + "step": 16140 + }, + { + "epoch": 0.6387312385058039, + "grad_norm": 0.9919865506696257, + "learning_rate": 9.616685340340482e-06, + "loss": 1.0375, + "step": 16150 + }, + { + "epoch": 0.639126737724693, + "grad_norm": 1.0185732230458162, + "learning_rate": 9.615801291937032e-06, + "loss": 1.0691, + "step": 16160 + }, + { + "epoch": 0.639522236943582, + "grad_norm": 1.0779945605665409, + "learning_rate": 9.614916265985947e-06, + "loss": 1.0377, + "step": 16170 + }, + { + "epoch": 0.639917736162471, + "grad_norm": 0.9367820876470461, + "learning_rate": 9.614030262674661e-06, + "loss": 1.0639, + "step": 16180 + }, + { + "epoch": 0.6403132353813601, + "grad_norm": 1.1075055883952594, + "learning_rate": 9.613143282190814e-06, + "loss": 1.0518, + "step": 16190 + }, + { + "epoch": 0.6407087346002491, + "grad_norm": 1.0475157895495932, + "learning_rate": 9.612255324722254e-06, + "loss": 1.0439, + "step": 16200 + }, + { + "epoch": 0.6411042338191382, + "grad_norm": 1.1059680822283366, + "learning_rate": 9.611366390457031e-06, + "loss": 1.0539, + "step": 16210 + }, + { + "epoch": 0.6414997330380272, + "grad_norm": 0.9794011283509074, + "learning_rate": 9.610476479583412e-06, + "loss": 1.0431, + "step": 16220 + }, + { + "epoch": 0.6418952322569162, + "grad_norm": 1.1107008485505965, + "learning_rate": 9.609585592289861e-06, + "loss": 1.0207, + "step": 16230 + }, + { + "epoch": 0.6422907314758053, + "grad_norm": 0.9671393494701901, + "learning_rate": 9.608693728765055e-06, + "loss": 1.0341, + "step": 16240 + }, + { + "epoch": 0.6426862306946943, + "grad_norm": 1.0762878397019289, + "learning_rate": 9.607800889197874e-06, + "loss": 1.0496, + "step": 16250 + }, + { + "epoch": 0.6430817299135834, + "grad_norm": 0.994784312567831, + "learning_rate": 9.606907073777407e-06, + "loss": 1.0462, + "step": 16260 + }, + { + "epoch": 0.6434772291324725, + "grad_norm": 1.0866974926686934, + "learning_rate": 9.606012282692945e-06, + "loss": 1.046, + "step": 16270 + }, + { + "epoch": 0.6438727283513616, + "grad_norm": 1.1295947062325606, + "learning_rate": 9.605116516133996e-06, + "loss": 1.0395, + "step": 16280 + }, + { + "epoch": 0.6442682275702506, + "grad_norm": 0.9511426189829553, + "learning_rate": 9.604219774290263e-06, + "loss": 1.049, + "step": 16290 + }, + { + "epoch": 0.6446637267891396, + "grad_norm": 1.033441089120287, + "learning_rate": 9.603322057351663e-06, + "loss": 1.0486, + "step": 16300 + }, + { + "epoch": 0.6450592260080287, + "grad_norm": 1.087141724206363, + "learning_rate": 9.602423365508316e-06, + "loss": 1.0286, + "step": 16310 + }, + { + "epoch": 0.6454547252269177, + "grad_norm": 1.0754477185360494, + "learning_rate": 9.60152369895055e-06, + "loss": 1.0429, + "step": 16320 + }, + { + "epoch": 0.6458502244458068, + "grad_norm": 0.8964924547534487, + "learning_rate": 9.600623057868897e-06, + "loss": 1.0425, + "step": 16330 + }, + { + "epoch": 0.6462457236646958, + "grad_norm": 1.0467445736018623, + "learning_rate": 9.5997214424541e-06, + "loss": 1.0566, + "step": 16340 + }, + { + "epoch": 0.6466412228835848, + "grad_norm": 1.0949200556415695, + "learning_rate": 9.598818852897107e-06, + "loss": 1.0559, + "step": 16350 + }, + { + "epoch": 0.6470367221024739, + "grad_norm": 0.9976408476191808, + "learning_rate": 9.597915289389067e-06, + "loss": 1.0463, + "step": 16360 + }, + { + "epoch": 0.6474322213213629, + "grad_norm": 1.0333798283970315, + "learning_rate": 9.59701075212134e-06, + "loss": 1.0312, + "step": 16370 + }, + { + "epoch": 0.647827720540252, + "grad_norm": 0.9829699655987383, + "learning_rate": 9.596105241285493e-06, + "loss": 1.0375, + "step": 16380 + }, + { + "epoch": 0.648223219759141, + "grad_norm": 1.0892861684008417, + "learning_rate": 9.595198757073299e-06, + "loss": 1.0554, + "step": 16390 + }, + { + "epoch": 0.64861871897803, + "grad_norm": 0.999061829672844, + "learning_rate": 9.594291299676732e-06, + "loss": 1.0378, + "step": 16400 + }, + { + "epoch": 0.6490142181969191, + "grad_norm": 1.0281322051068504, + "learning_rate": 9.59338286928798e-06, + "loss": 1.0491, + "step": 16410 + }, + { + "epoch": 0.6494097174158081, + "grad_norm": 1.0056547011966825, + "learning_rate": 9.59247346609943e-06, + "loss": 1.0389, + "step": 16420 + }, + { + "epoch": 0.6498052166346971, + "grad_norm": 1.0199144442063073, + "learning_rate": 9.591563090303679e-06, + "loss": 1.0243, + "step": 16430 + }, + { + "epoch": 0.6502007158535862, + "grad_norm": 0.9826809854077958, + "learning_rate": 9.59065174209353e-06, + "loss": 1.0548, + "step": 16440 + }, + { + "epoch": 0.6505962150724752, + "grad_norm": 1.0946445597528371, + "learning_rate": 9.589739421661987e-06, + "loss": 1.0564, + "step": 16450 + }, + { + "epoch": 0.6509917142913643, + "grad_norm": 0.9093306324224982, + "learning_rate": 9.588826129202269e-06, + "loss": 1.0359, + "step": 16460 + }, + { + "epoch": 0.6513872135102533, + "grad_norm": 1.032452702384602, + "learning_rate": 9.587911864907792e-06, + "loss": 1.0209, + "step": 16470 + }, + { + "epoch": 0.6517827127291423, + "grad_norm": 0.932606847135522, + "learning_rate": 9.586996628972185e-06, + "loss": 1.0423, + "step": 16480 + }, + { + "epoch": 0.6521782119480314, + "grad_norm": 1.0801753190629404, + "learning_rate": 9.586080421589277e-06, + "loss": 1.0416, + "step": 16490 + }, + { + "epoch": 0.6525737111669204, + "grad_norm": 1.0208328941703664, + "learning_rate": 9.585163242953103e-06, + "loss": 1.0451, + "step": 16500 + }, + { + "epoch": 0.6529692103858095, + "grad_norm": 1.017005203066039, + "learning_rate": 9.584245093257911e-06, + "loss": 1.0597, + "step": 16510 + }, + { + "epoch": 0.6533647096046985, + "grad_norm": 1.0374710579553934, + "learning_rate": 9.583325972698146e-06, + "loss": 1.0512, + "step": 16520 + }, + { + "epoch": 0.6537602088235875, + "grad_norm": 1.0556906737819625, + "learning_rate": 9.582405881468459e-06, + "loss": 1.0419, + "step": 16530 + }, + { + "epoch": 0.6541557080424766, + "grad_norm": 1.131883843769043, + "learning_rate": 9.581484819763717e-06, + "loss": 1.0403, + "step": 16540 + }, + { + "epoch": 0.6545512072613656, + "grad_norm": 1.0525520418730143, + "learning_rate": 9.58056278777898e-06, + "loss": 1.0486, + "step": 16550 + }, + { + "epoch": 0.6549467064802547, + "grad_norm": 1.013699744559807, + "learning_rate": 9.579639785709518e-06, + "loss": 1.0506, + "step": 16560 + }, + { + "epoch": 0.6553422056991437, + "grad_norm": 1.0837720964627076, + "learning_rate": 9.57871581375081e-06, + "loss": 1.0433, + "step": 16570 + }, + { + "epoch": 0.6557377049180327, + "grad_norm": 1.0012290947394766, + "learning_rate": 9.577790872098535e-06, + "loss": 1.0548, + "step": 16580 + }, + { + "epoch": 0.6561332041369218, + "grad_norm": 1.0050172501786707, + "learning_rate": 9.576864960948582e-06, + "loss": 1.042, + "step": 16590 + }, + { + "epoch": 0.6565287033558109, + "grad_norm": 0.9174429412546484, + "learning_rate": 9.575938080497042e-06, + "loss": 1.0386, + "step": 16600 + }, + { + "epoch": 0.6569242025747, + "grad_norm": 1.0086971851572313, + "learning_rate": 9.575010230940212e-06, + "loss": 1.0493, + "step": 16610 + }, + { + "epoch": 0.657319701793589, + "grad_norm": 0.9999297545927885, + "learning_rate": 9.574081412474596e-06, + "loss": 1.0311, + "step": 16620 + }, + { + "epoch": 0.657715201012478, + "grad_norm": 0.9703798632788256, + "learning_rate": 9.573151625296899e-06, + "loss": 1.0464, + "step": 16630 + }, + { + "epoch": 0.6581107002313671, + "grad_norm": 1.0220247718537434, + "learning_rate": 9.572220869604037e-06, + "loss": 1.0508, + "step": 16640 + }, + { + "epoch": 0.6585061994502561, + "grad_norm": 1.0162383358983795, + "learning_rate": 9.571289145593128e-06, + "loss": 1.0526, + "step": 16650 + }, + { + "epoch": 0.6589016986691452, + "grad_norm": 0.9960813334797269, + "learning_rate": 9.570356453461491e-06, + "loss": 1.0221, + "step": 16660 + }, + { + "epoch": 0.6592971978880342, + "grad_norm": 1.1137802002905846, + "learning_rate": 9.56942279340666e-06, + "loss": 1.0535, + "step": 16670 + }, + { + "epoch": 0.6596926971069232, + "grad_norm": 0.9464157499925506, + "learning_rate": 9.568488165626365e-06, + "loss": 1.0383, + "step": 16680 + }, + { + "epoch": 0.6600881963258123, + "grad_norm": 1.1130569372194852, + "learning_rate": 9.567552570318546e-06, + "loss": 1.0268, + "step": 16690 + }, + { + "epoch": 0.6604836955447013, + "grad_norm": 1.1228395235041873, + "learning_rate": 9.566616007681342e-06, + "loss": 1.0211, + "step": 16700 + }, + { + "epoch": 0.6608791947635904, + "grad_norm": 1.0249738775299035, + "learning_rate": 9.565678477913104e-06, + "loss": 1.0332, + "step": 16710 + }, + { + "epoch": 0.6612746939824794, + "grad_norm": 0.9400537428708831, + "learning_rate": 9.564739981212387e-06, + "loss": 1.0432, + "step": 16720 + }, + { + "epoch": 0.6616701932013684, + "grad_norm": 0.9582281152859005, + "learning_rate": 9.563800517777945e-06, + "loss": 1.0394, + "step": 16730 + }, + { + "epoch": 0.6620656924202575, + "grad_norm": 1.1088532835887537, + "learning_rate": 9.562860087808741e-06, + "loss": 1.0339, + "step": 16740 + }, + { + "epoch": 0.6624611916391465, + "grad_norm": 1.0273843070238693, + "learning_rate": 9.561918691503942e-06, + "loss": 1.0172, + "step": 16750 + }, + { + "epoch": 0.6628566908580356, + "grad_norm": 1.08441126176365, + "learning_rate": 9.560976329062918e-06, + "loss": 1.027, + "step": 16760 + }, + { + "epoch": 0.6632521900769246, + "grad_norm": 0.9853686375308722, + "learning_rate": 9.56003300068525e-06, + "loss": 1.0255, + "step": 16770 + }, + { + "epoch": 0.6636476892958136, + "grad_norm": 1.030571846165974, + "learning_rate": 9.559088706570714e-06, + "loss": 1.0322, + "step": 16780 + }, + { + "epoch": 0.6640431885147027, + "grad_norm": 0.9130511100608083, + "learning_rate": 9.558143446919298e-06, + "loss": 1.0336, + "step": 16790 + }, + { + "epoch": 0.6644386877335917, + "grad_norm": 1.1826514552688003, + "learning_rate": 9.55719722193119e-06, + "loss": 1.039, + "step": 16800 + }, + { + "epoch": 0.6648341869524808, + "grad_norm": 0.9902724700866942, + "learning_rate": 9.556250031806787e-06, + "loss": 1.0381, + "step": 16810 + }, + { + "epoch": 0.6652296861713698, + "grad_norm": 1.072785169008779, + "learning_rate": 9.555301876746683e-06, + "loss": 1.0312, + "step": 16820 + }, + { + "epoch": 0.6656251853902588, + "grad_norm": 0.9455694513191211, + "learning_rate": 9.554352756951686e-06, + "loss": 1.0436, + "step": 16830 + }, + { + "epoch": 0.6660206846091479, + "grad_norm": 1.0543375101177483, + "learning_rate": 9.5534026726228e-06, + "loss": 1.0439, + "step": 16840 + }, + { + "epoch": 0.6664161838280369, + "grad_norm": 1.0506783493197784, + "learning_rate": 9.552451623961238e-06, + "loss": 1.0373, + "step": 16850 + }, + { + "epoch": 0.666811683046926, + "grad_norm": 1.0599694755798597, + "learning_rate": 9.551499611168416e-06, + "loss": 1.0509, + "step": 16860 + }, + { + "epoch": 0.667207182265815, + "grad_norm": 1.0608087564396351, + "learning_rate": 9.550546634445953e-06, + "loss": 1.0555, + "step": 16870 + }, + { + "epoch": 0.667602681484704, + "grad_norm": 1.0642032289851944, + "learning_rate": 9.549592693995675e-06, + "loss": 1.0394, + "step": 16880 + }, + { + "epoch": 0.6679981807035931, + "grad_norm": 1.100815126634963, + "learning_rate": 9.548637790019607e-06, + "loss": 1.0464, + "step": 16890 + }, + { + "epoch": 0.6683936799224821, + "grad_norm": 1.0902493624158174, + "learning_rate": 9.547681922719984e-06, + "loss": 1.0379, + "step": 16900 + }, + { + "epoch": 0.6687891791413711, + "grad_norm": 1.0635566496235134, + "learning_rate": 9.546725092299242e-06, + "loss": 1.0451, + "step": 16910 + }, + { + "epoch": 0.6691846783602602, + "grad_norm": 0.9584549255738936, + "learning_rate": 9.54576729896002e-06, + "loss": 1.0391, + "step": 16920 + }, + { + "epoch": 0.6695801775791492, + "grad_norm": 1.070995269486395, + "learning_rate": 9.544808542905163e-06, + "loss": 1.0272, + "step": 16930 + }, + { + "epoch": 0.6699756767980384, + "grad_norm": 0.9790783814739064, + "learning_rate": 9.54384882433772e-06, + "loss": 1.0494, + "step": 16940 + }, + { + "epoch": 0.6703711760169274, + "grad_norm": 0.9465810737305382, + "learning_rate": 9.54288814346094e-06, + "loss": 1.0363, + "step": 16950 + }, + { + "epoch": 0.6707666752358165, + "grad_norm": 0.975396321916311, + "learning_rate": 9.541926500478284e-06, + "loss": 1.0542, + "step": 16960 + }, + { + "epoch": 0.6711621744547055, + "grad_norm": 1.0161602431604948, + "learning_rate": 9.540963895593407e-06, + "loss": 1.0564, + "step": 16970 + }, + { + "epoch": 0.6715576736735945, + "grad_norm": 0.9735537016129909, + "learning_rate": 9.540000329010172e-06, + "loss": 1.0238, + "step": 16980 + }, + { + "epoch": 0.6719531728924836, + "grad_norm": 0.9914811651341365, + "learning_rate": 9.53903580093265e-06, + "loss": 1.0561, + "step": 16990 + }, + { + "epoch": 0.6723486721113726, + "grad_norm": 1.1713963865178358, + "learning_rate": 9.538070311565107e-06, + "loss": 1.0223, + "step": 17000 + }, + { + "epoch": 0.6727441713302617, + "grad_norm": 0.9790382232291204, + "learning_rate": 9.53710386111202e-06, + "loss": 1.0351, + "step": 17010 + }, + { + "epoch": 0.6731396705491507, + "grad_norm": 1.0488028967827665, + "learning_rate": 9.536136449778065e-06, + "loss": 1.0388, + "step": 17020 + }, + { + "epoch": 0.6735351697680397, + "grad_norm": 0.9584055404084623, + "learning_rate": 9.535168077768125e-06, + "loss": 1.0415, + "step": 17030 + }, + { + "epoch": 0.6739306689869288, + "grad_norm": 1.0335393573503142, + "learning_rate": 9.534198745287282e-06, + "loss": 1.0378, + "step": 17040 + }, + { + "epoch": 0.6743261682058178, + "grad_norm": 1.0191950817036952, + "learning_rate": 9.533228452540827e-06, + "loss": 1.0341, + "step": 17050 + }, + { + "epoch": 0.6747216674247069, + "grad_norm": 1.0336588465213403, + "learning_rate": 9.53225719973425e-06, + "loss": 1.0291, + "step": 17060 + }, + { + "epoch": 0.6751171666435959, + "grad_norm": 1.059136311458042, + "learning_rate": 9.531284987073244e-06, + "loss": 1.0265, + "step": 17070 + }, + { + "epoch": 0.6755126658624849, + "grad_norm": 1.0433570581974203, + "learning_rate": 9.530311814763709e-06, + "loss": 1.0355, + "step": 17080 + }, + { + "epoch": 0.675908165081374, + "grad_norm": 1.0300428249677438, + "learning_rate": 9.529337683011746e-06, + "loss": 1.0409, + "step": 17090 + }, + { + "epoch": 0.676303664300263, + "grad_norm": 1.0033435144712768, + "learning_rate": 9.52836259202366e-06, + "loss": 1.0236, + "step": 17100 + }, + { + "epoch": 0.676699163519152, + "grad_norm": 1.1260516296950704, + "learning_rate": 9.527386542005956e-06, + "loss": 1.0409, + "step": 17110 + }, + { + "epoch": 0.6770946627380411, + "grad_norm": 1.0577775935051679, + "learning_rate": 9.526409533165348e-06, + "loss": 1.0375, + "step": 17120 + }, + { + "epoch": 0.6774901619569301, + "grad_norm": 1.1119972148675605, + "learning_rate": 9.52543156570875e-06, + "loss": 1.0368, + "step": 17130 + }, + { + "epoch": 0.6778856611758192, + "grad_norm": 1.0437027342498233, + "learning_rate": 9.524452639843273e-06, + "loss": 1.0314, + "step": 17140 + }, + { + "epoch": 0.6782811603947082, + "grad_norm": 1.13984957954556, + "learning_rate": 9.523472755776243e-06, + "loss": 1.0279, + "step": 17150 + }, + { + "epoch": 0.6786766596135972, + "grad_norm": 0.941921033197251, + "learning_rate": 9.52249191371518e-06, + "loss": 1.0372, + "step": 17160 + }, + { + "epoch": 0.6790721588324863, + "grad_norm": 1.029538895641745, + "learning_rate": 9.521510113867809e-06, + "loss": 1.0514, + "step": 17170 + }, + { + "epoch": 0.6794676580513753, + "grad_norm": 1.073026086443101, + "learning_rate": 9.52052735644206e-06, + "loss": 1.0293, + "step": 17180 + }, + { + "epoch": 0.6798631572702644, + "grad_norm": 1.0974218286313009, + "learning_rate": 9.519543641646064e-06, + "loss": 1.0277, + "step": 17190 + }, + { + "epoch": 0.6802586564891534, + "grad_norm": 0.9680793636990874, + "learning_rate": 9.518558969688154e-06, + "loss": 1.0394, + "step": 17200 + }, + { + "epoch": 0.6806541557080424, + "grad_norm": 1.0693855084118786, + "learning_rate": 9.517573340776865e-06, + "loss": 1.0393, + "step": 17210 + }, + { + "epoch": 0.6810496549269315, + "grad_norm": 1.083877243258302, + "learning_rate": 9.51658675512094e-06, + "loss": 1.028, + "step": 17220 + }, + { + "epoch": 0.6814451541458205, + "grad_norm": 1.078167232659275, + "learning_rate": 9.51559921292932e-06, + "loss": 1.0307, + "step": 17230 + }, + { + "epoch": 0.6818406533647096, + "grad_norm": 1.0774628650055083, + "learning_rate": 9.514610714411148e-06, + "loss": 1.0449, + "step": 17240 + }, + { + "epoch": 0.6822361525835986, + "grad_norm": 1.0685616706304606, + "learning_rate": 9.513621259775771e-06, + "loss": 1.0424, + "step": 17250 + }, + { + "epoch": 0.6826316518024876, + "grad_norm": 1.0697177888713763, + "learning_rate": 9.512630849232742e-06, + "loss": 1.0091, + "step": 17260 + }, + { + "epoch": 0.6830271510213768, + "grad_norm": 0.9881047420361636, + "learning_rate": 9.511639482991807e-06, + "loss": 1.0589, + "step": 17270 + }, + { + "epoch": 0.6834226502402658, + "grad_norm": 1.1784659215278936, + "learning_rate": 9.510647161262927e-06, + "loss": 1.0443, + "step": 17280 + }, + { + "epoch": 0.6838181494591549, + "grad_norm": 0.9895397807354788, + "learning_rate": 9.509653884256253e-06, + "loss": 1.0523, + "step": 17290 + }, + { + "epoch": 0.6842136486780439, + "grad_norm": 0.978584969504384, + "learning_rate": 9.508659652182148e-06, + "loss": 1.0253, + "step": 17300 + }, + { + "epoch": 0.684609147896933, + "grad_norm": 1.0203673517879768, + "learning_rate": 9.507664465251171e-06, + "loss": 1.0302, + "step": 17310 + }, + { + "epoch": 0.685004647115822, + "grad_norm": 1.0028477911852194, + "learning_rate": 9.506668323674089e-06, + "loss": 1.0585, + "step": 17320 + }, + { + "epoch": 0.685400146334711, + "grad_norm": 0.9760105742719073, + "learning_rate": 9.505671227661862e-06, + "loss": 1.0587, + "step": 17330 + }, + { + "epoch": 0.6857956455536001, + "grad_norm": 0.9562174540190501, + "learning_rate": 9.504673177425662e-06, + "loss": 1.0462, + "step": 17340 + }, + { + "epoch": 0.6861911447724891, + "grad_norm": 1.050267449722087, + "learning_rate": 9.503674173176857e-06, + "loss": 1.0068, + "step": 17350 + }, + { + "epoch": 0.6865866439913781, + "grad_norm": 1.0074076526995512, + "learning_rate": 9.502674215127021e-06, + "loss": 1.015, + "step": 17360 + }, + { + "epoch": 0.6869821432102672, + "grad_norm": 0.9719617490095388, + "learning_rate": 9.501673303487928e-06, + "loss": 1.0235, + "step": 17370 + }, + { + "epoch": 0.6873776424291562, + "grad_norm": 0.967603469030969, + "learning_rate": 9.500671438471551e-06, + "loss": 1.0266, + "step": 17380 + }, + { + "epoch": 0.6877731416480453, + "grad_norm": 1.0375852797271878, + "learning_rate": 9.49966862029007e-06, + "loss": 1.0442, + "step": 17390 + }, + { + "epoch": 0.6881686408669343, + "grad_norm": 1.0278951507874752, + "learning_rate": 9.498664849155865e-06, + "loss": 1.0312, + "step": 17400 + }, + { + "epoch": 0.6885641400858233, + "grad_norm": 1.1270130472250348, + "learning_rate": 9.497660125281517e-06, + "loss": 1.0487, + "step": 17410 + }, + { + "epoch": 0.6889596393047124, + "grad_norm": 1.0695361143024629, + "learning_rate": 9.496654448879809e-06, + "loss": 1.0208, + "step": 17420 + }, + { + "epoch": 0.6893551385236014, + "grad_norm": 0.9980861835302752, + "learning_rate": 9.495647820163725e-06, + "loss": 1.0176, + "step": 17430 + }, + { + "epoch": 0.6897506377424905, + "grad_norm": 0.9820629459969173, + "learning_rate": 9.494640239346456e-06, + "loss": 1.0517, + "step": 17440 + }, + { + "epoch": 0.6901461369613795, + "grad_norm": 0.9718859000115515, + "learning_rate": 9.493631706641385e-06, + "loss": 1.0342, + "step": 17450 + }, + { + "epoch": 0.6905416361802685, + "grad_norm": 1.0149575779819844, + "learning_rate": 9.492622222262104e-06, + "loss": 1.0298, + "step": 17460 + }, + { + "epoch": 0.6909371353991576, + "grad_norm": 0.9541546409456523, + "learning_rate": 9.491611786422406e-06, + "loss": 1.0183, + "step": 17470 + }, + { + "epoch": 0.6913326346180466, + "grad_norm": 0.8854836045074562, + "learning_rate": 9.490600399336282e-06, + "loss": 1.026, + "step": 17480 + }, + { + "epoch": 0.6917281338369357, + "grad_norm": 1.0141332501116707, + "learning_rate": 9.489588061217928e-06, + "loss": 1.0196, + "step": 17490 + }, + { + "epoch": 0.6921236330558247, + "grad_norm": 1.1396103122405974, + "learning_rate": 9.488574772281737e-06, + "loss": 1.0381, + "step": 17500 + }, + { + "epoch": 0.6925191322747137, + "grad_norm": 1.006966576458375, + "learning_rate": 9.487560532742312e-06, + "loss": 1.0423, + "step": 17510 + }, + { + "epoch": 0.6929146314936028, + "grad_norm": 1.0507603763860547, + "learning_rate": 9.486545342814445e-06, + "loss": 1.0113, + "step": 17520 + }, + { + "epoch": 0.6933101307124918, + "grad_norm": 1.1074279872076913, + "learning_rate": 9.48552920271314e-06, + "loss": 1.0393, + "step": 17530 + }, + { + "epoch": 0.6937056299313809, + "grad_norm": 0.9270751798264574, + "learning_rate": 9.484512112653596e-06, + "loss": 1.0542, + "step": 17540 + }, + { + "epoch": 0.6941011291502699, + "grad_norm": 1.0144057641303537, + "learning_rate": 9.483494072851215e-06, + "loss": 1.0301, + "step": 17550 + }, + { + "epoch": 0.6944966283691589, + "grad_norm": 0.9510545519842236, + "learning_rate": 9.482475083521605e-06, + "loss": 1.0418, + "step": 17560 + }, + { + "epoch": 0.694892127588048, + "grad_norm": 1.047926367227602, + "learning_rate": 9.481455144880565e-06, + "loss": 1.0341, + "step": 17570 + }, + { + "epoch": 0.695287626806937, + "grad_norm": 1.1945063966810148, + "learning_rate": 9.480434257144101e-06, + "loss": 1.0288, + "step": 17580 + }, + { + "epoch": 0.695683126025826, + "grad_norm": 1.0395791244806956, + "learning_rate": 9.479412420528421e-06, + "loss": 1.0305, + "step": 17590 + }, + { + "epoch": 0.6960786252447151, + "grad_norm": 0.9915137493645249, + "learning_rate": 9.478389635249935e-06, + "loss": 1.0462, + "step": 17600 + }, + { + "epoch": 0.6964741244636042, + "grad_norm": 0.9979567244351009, + "learning_rate": 9.477365901525248e-06, + "loss": 1.0195, + "step": 17610 + }, + { + "epoch": 0.6968696236824933, + "grad_norm": 1.0445929720914613, + "learning_rate": 9.47634121957117e-06, + "loss": 1.0311, + "step": 17620 + }, + { + "epoch": 0.6972651229013823, + "grad_norm": 0.9371362435676859, + "learning_rate": 9.475315589604711e-06, + "loss": 1.0279, + "step": 17630 + }, + { + "epoch": 0.6976606221202714, + "grad_norm": 1.0071781675755407, + "learning_rate": 9.474289011843083e-06, + "loss": 1.0402, + "step": 17640 + }, + { + "epoch": 0.6980561213391604, + "grad_norm": 1.001393852960122, + "learning_rate": 9.473261486503695e-06, + "loss": 1.0336, + "step": 17650 + }, + { + "epoch": 0.6984516205580494, + "grad_norm": 1.0062506554495403, + "learning_rate": 9.472233013804161e-06, + "loss": 1.0427, + "step": 17660 + }, + { + "epoch": 0.6988471197769385, + "grad_norm": 1.122064684887129, + "learning_rate": 9.471203593962295e-06, + "loss": 1.0094, + "step": 17670 + }, + { + "epoch": 0.6992426189958275, + "grad_norm": 1.0641755859905655, + "learning_rate": 9.47017322719611e-06, + "loss": 1.0521, + "step": 17680 + }, + { + "epoch": 0.6996381182147166, + "grad_norm": 0.996838203937507, + "learning_rate": 9.469141913723816e-06, + "loss": 1.0245, + "step": 17690 + }, + { + "epoch": 0.7000336174336056, + "grad_norm": 0.9977499259345749, + "learning_rate": 9.468109653763833e-06, + "loss": 1.0392, + "step": 17700 + }, + { + "epoch": 0.7004291166524946, + "grad_norm": 1.1159570566024226, + "learning_rate": 9.467076447534776e-06, + "loss": 1.0526, + "step": 17710 + }, + { + "epoch": 0.7008246158713837, + "grad_norm": 1.0381074540651054, + "learning_rate": 9.466042295255455e-06, + "loss": 1.0102, + "step": 17720 + }, + { + "epoch": 0.7012201150902727, + "grad_norm": 0.9714731712930081, + "learning_rate": 9.46500719714489e-06, + "loss": 1.0586, + "step": 17730 + }, + { + "epoch": 0.7016156143091618, + "grad_norm": 1.0007087728722992, + "learning_rate": 9.463971153422295e-06, + "loss": 1.0315, + "step": 17740 + }, + { + "epoch": 0.7020111135280508, + "grad_norm": 1.0839473564019655, + "learning_rate": 9.462934164307089e-06, + "loss": 1.025, + "step": 17750 + }, + { + "epoch": 0.7024066127469398, + "grad_norm": 1.0755172683031924, + "learning_rate": 9.461896230018886e-06, + "loss": 1.0277, + "step": 17760 + }, + { + "epoch": 0.7028021119658289, + "grad_norm": 1.009446176094145, + "learning_rate": 9.460857350777504e-06, + "loss": 1.0403, + "step": 17770 + }, + { + "epoch": 0.7031976111847179, + "grad_norm": 1.1764653577076687, + "learning_rate": 9.459817526802958e-06, + "loss": 1.0277, + "step": 17780 + }, + { + "epoch": 0.703593110403607, + "grad_norm": 1.0299138971363238, + "learning_rate": 9.458776758315468e-06, + "loss": 1.0182, + "step": 17790 + }, + { + "epoch": 0.703988609622496, + "grad_norm": 1.1607430809418804, + "learning_rate": 9.457735045535448e-06, + "loss": 1.0393, + "step": 17800 + }, + { + "epoch": 0.704384108841385, + "grad_norm": 0.952550365342849, + "learning_rate": 9.456692388683518e-06, + "loss": 1.0225, + "step": 17810 + }, + { + "epoch": 0.7047796080602741, + "grad_norm": 0.9704323216931742, + "learning_rate": 9.45564878798049e-06, + "loss": 1.0422, + "step": 17820 + }, + { + "epoch": 0.7051751072791631, + "grad_norm": 1.0917470952685522, + "learning_rate": 9.454604243647384e-06, + "loss": 1.0345, + "step": 17830 + }, + { + "epoch": 0.7055706064980521, + "grad_norm": 1.0811136087893778, + "learning_rate": 9.453558755905418e-06, + "loss": 1.0422, + "step": 17840 + }, + { + "epoch": 0.7059661057169412, + "grad_norm": 1.0564672808578592, + "learning_rate": 9.452512324976007e-06, + "loss": 1.0332, + "step": 17850 + }, + { + "epoch": 0.7063616049358302, + "grad_norm": 0.9802906844311005, + "learning_rate": 9.451464951080766e-06, + "loss": 1.012, + "step": 17860 + }, + { + "epoch": 0.7067571041547193, + "grad_norm": 1.1015278578599281, + "learning_rate": 9.450416634441512e-06, + "loss": 1.0363, + "step": 17870 + }, + { + "epoch": 0.7071526033736083, + "grad_norm": 1.0112824386472745, + "learning_rate": 9.44936737528026e-06, + "loss": 1.0244, + "step": 17880 + }, + { + "epoch": 0.7075481025924973, + "grad_norm": 1.0526738425427273, + "learning_rate": 9.448317173819225e-06, + "loss": 1.0109, + "step": 17890 + }, + { + "epoch": 0.7079436018113864, + "grad_norm": 0.9684882693538964, + "learning_rate": 9.447266030280824e-06, + "loss": 1.0378, + "step": 17900 + }, + { + "epoch": 0.7083391010302754, + "grad_norm": 1.0369246106958165, + "learning_rate": 9.446213944887667e-06, + "loss": 1.016, + "step": 17910 + }, + { + "epoch": 0.7087346002491645, + "grad_norm": 1.0507790450131054, + "learning_rate": 9.44516091786257e-06, + "loss": 1.032, + "step": 17920 + }, + { + "epoch": 0.7091300994680535, + "grad_norm": 1.0429180670917753, + "learning_rate": 9.444106949428546e-06, + "loss": 1.0403, + "step": 17930 + }, + { + "epoch": 0.7095255986869425, + "grad_norm": 1.0357371700575062, + "learning_rate": 9.443052039808809e-06, + "loss": 1.0297, + "step": 17940 + }, + { + "epoch": 0.7099210979058317, + "grad_norm": 1.001997955675374, + "learning_rate": 9.441996189226767e-06, + "loss": 1.027, + "step": 17950 + }, + { + "epoch": 0.7103165971247207, + "grad_norm": 1.0411255534164106, + "learning_rate": 9.440939397906034e-06, + "loss": 1.0437, + "step": 17960 + }, + { + "epoch": 0.7107120963436098, + "grad_norm": 1.0430212772449607, + "learning_rate": 9.43988166607042e-06, + "loss": 1.0406, + "step": 17970 + }, + { + "epoch": 0.7111075955624988, + "grad_norm": 1.0476161191690738, + "learning_rate": 9.438822993943933e-06, + "loss": 1.0385, + "step": 17980 + }, + { + "epoch": 0.7115030947813878, + "grad_norm": 0.9425366081957547, + "learning_rate": 9.437763381750783e-06, + "loss": 1.0203, + "step": 17990 + }, + { + "epoch": 0.7118985940002769, + "grad_norm": 0.9470071520270256, + "learning_rate": 9.436702829715378e-06, + "loss": 1.0378, + "step": 18000 + }, + { + "epoch": 0.7122940932191659, + "grad_norm": 1.0032513032935346, + "learning_rate": 9.435641338062325e-06, + "loss": 1.0335, + "step": 18010 + }, + { + "epoch": 0.712689592438055, + "grad_norm": 1.071046239224927, + "learning_rate": 9.434578907016427e-06, + "loss": 1.0183, + "step": 18020 + }, + { + "epoch": 0.713085091656944, + "grad_norm": 1.0072679422841402, + "learning_rate": 9.433515536802692e-06, + "loss": 1.0321, + "step": 18030 + }, + { + "epoch": 0.713480590875833, + "grad_norm": 1.0274835588693285, + "learning_rate": 9.432451227646321e-06, + "loss": 1.0286, + "step": 18040 + }, + { + "epoch": 0.7138760900947221, + "grad_norm": 1.1990568507239372, + "learning_rate": 9.431385979772719e-06, + "loss": 1.0249, + "step": 18050 + }, + { + "epoch": 0.7142715893136111, + "grad_norm": 0.9597939768988392, + "learning_rate": 9.430319793407483e-06, + "loss": 1.0284, + "step": 18060 + }, + { + "epoch": 0.7146670885325002, + "grad_norm": 0.9904046966771386, + "learning_rate": 9.429252668776419e-06, + "loss": 1.0383, + "step": 18070 + }, + { + "epoch": 0.7150625877513892, + "grad_norm": 1.075580767185948, + "learning_rate": 9.42818460610552e-06, + "loss": 1.0349, + "step": 18080 + }, + { + "epoch": 0.7154580869702782, + "grad_norm": 0.9251623183639143, + "learning_rate": 9.427115605620987e-06, + "loss": 1.0117, + "step": 18090 + }, + { + "epoch": 0.7158535861891673, + "grad_norm": 1.0155844395529972, + "learning_rate": 9.426045667549216e-06, + "loss": 1.0187, + "step": 18100 + }, + { + "epoch": 0.7162490854080563, + "grad_norm": 1.0912143845014943, + "learning_rate": 9.424974792116799e-06, + "loss": 1.0258, + "step": 18110 + }, + { + "epoch": 0.7166445846269454, + "grad_norm": 0.983404683787228, + "learning_rate": 9.42390297955053e-06, + "loss": 1.0415, + "step": 18120 + }, + { + "epoch": 0.7170400838458344, + "grad_norm": 1.0520129458409133, + "learning_rate": 9.422830230077402e-06, + "loss": 1.0362, + "step": 18130 + }, + { + "epoch": 0.7174355830647234, + "grad_norm": 1.0798743638958643, + "learning_rate": 9.421756543924606e-06, + "loss": 1.0202, + "step": 18140 + }, + { + "epoch": 0.7178310822836125, + "grad_norm": 1.0330469051053421, + "learning_rate": 9.420681921319525e-06, + "loss": 1.0146, + "step": 18150 + }, + { + "epoch": 0.7182265815025015, + "grad_norm": 0.9803559327822846, + "learning_rate": 9.41960636248975e-06, + "loss": 1.0032, + "step": 18160 + }, + { + "epoch": 0.7186220807213906, + "grad_norm": 1.0594579196245595, + "learning_rate": 9.418529867663066e-06, + "loss": 1.0392, + "step": 18170 + }, + { + "epoch": 0.7190175799402796, + "grad_norm": 1.073378619708236, + "learning_rate": 9.417452437067454e-06, + "loss": 1.034, + "step": 18180 + }, + { + "epoch": 0.7194130791591686, + "grad_norm": 0.996061568152992, + "learning_rate": 9.416374070931097e-06, + "loss": 1.0448, + "step": 18190 + }, + { + "epoch": 0.7198085783780577, + "grad_norm": 1.0149747384680066, + "learning_rate": 9.415294769482375e-06, + "loss": 1.0318, + "step": 18200 + }, + { + "epoch": 0.7202040775969467, + "grad_norm": 1.015167129650081, + "learning_rate": 9.414214532949863e-06, + "loss": 1.0321, + "step": 18210 + }, + { + "epoch": 0.7205995768158358, + "grad_norm": 1.0785394541752948, + "learning_rate": 9.41313336156234e-06, + "loss": 1.0324, + "step": 18220 + }, + { + "epoch": 0.7209950760347248, + "grad_norm": 0.9699972936380293, + "learning_rate": 9.412051255548775e-06, + "loss": 1.0289, + "step": 18230 + }, + { + "epoch": 0.7213905752536138, + "grad_norm": 1.0030239467096689, + "learning_rate": 9.410968215138343e-06, + "loss": 1.0295, + "step": 18240 + }, + { + "epoch": 0.7217860744725029, + "grad_norm": 1.0795386550609039, + "learning_rate": 9.409884240560412e-06, + "loss": 1.0282, + "step": 18250 + }, + { + "epoch": 0.7221815736913919, + "grad_norm": 1.0953579009158714, + "learning_rate": 9.408799332044552e-06, + "loss": 1.0201, + "step": 18260 + }, + { + "epoch": 0.722577072910281, + "grad_norm": 1.1092584838240036, + "learning_rate": 9.407713489820524e-06, + "loss": 1.0079, + "step": 18270 + }, + { + "epoch": 0.7229725721291701, + "grad_norm": 1.0798434259311, + "learning_rate": 9.406626714118292e-06, + "loss": 1.0328, + "step": 18280 + }, + { + "epoch": 0.7233680713480591, + "grad_norm": 0.9493139819260271, + "learning_rate": 9.405539005168019e-06, + "loss": 1.0406, + "step": 18290 + }, + { + "epoch": 0.7237635705669482, + "grad_norm": 0.986110374566842, + "learning_rate": 9.404450363200062e-06, + "loss": 1.0424, + "step": 18300 + }, + { + "epoch": 0.7241590697858372, + "grad_norm": 1.0529415191863654, + "learning_rate": 9.403360788444974e-06, + "loss": 1.0403, + "step": 18310 + }, + { + "epoch": 0.7245545690047263, + "grad_norm": 1.078810215390035, + "learning_rate": 9.40227028113351e-06, + "loss": 1.0233, + "step": 18320 + }, + { + "epoch": 0.7249500682236153, + "grad_norm": 1.0146987780513816, + "learning_rate": 9.401178841496622e-06, + "loss": 1.0082, + "step": 18330 + }, + { + "epoch": 0.7253455674425043, + "grad_norm": 1.0674040526570738, + "learning_rate": 9.400086469765457e-06, + "loss": 1.025, + "step": 18340 + }, + { + "epoch": 0.7257410666613934, + "grad_norm": 1.0661304663925135, + "learning_rate": 9.39899316617136e-06, + "loss": 1.0044, + "step": 18350 + }, + { + "epoch": 0.7261365658802824, + "grad_norm": 1.0068458744723774, + "learning_rate": 9.397898930945875e-06, + "loss": 1.0307, + "step": 18360 + }, + { + "epoch": 0.7265320650991715, + "grad_norm": 1.0781183364207743, + "learning_rate": 9.396803764320743e-06, + "loss": 1.024, + "step": 18370 + }, + { + "epoch": 0.7269275643180605, + "grad_norm": 1.0326553789168182, + "learning_rate": 9.3957076665279e-06, + "loss": 1.0284, + "step": 18380 + }, + { + "epoch": 0.7273230635369495, + "grad_norm": 1.001373259245197, + "learning_rate": 9.394610637799482e-06, + "loss": 1.0305, + "step": 18390 + }, + { + "epoch": 0.7277185627558386, + "grad_norm": 1.1376419765398862, + "learning_rate": 9.39351267836782e-06, + "loss": 1.0398, + "step": 18400 + }, + { + "epoch": 0.7281140619747276, + "grad_norm": 0.9661245920163976, + "learning_rate": 9.392413788465443e-06, + "loss": 1.0262, + "step": 18410 + }, + { + "epoch": 0.7285095611936167, + "grad_norm": 0.9975516455237873, + "learning_rate": 9.391313968325077e-06, + "loss": 1.0205, + "step": 18420 + }, + { + "epoch": 0.7289050604125057, + "grad_norm": 1.147619498671326, + "learning_rate": 9.390213218179645e-06, + "loss": 1.0025, + "step": 18430 + }, + { + "epoch": 0.7293005596313947, + "grad_norm": 1.1986029401913216, + "learning_rate": 9.389111538262268e-06, + "loss": 1.0211, + "step": 18440 + }, + { + "epoch": 0.7296960588502838, + "grad_norm": 1.189262096818869, + "learning_rate": 9.388008928806263e-06, + "loss": 1.0247, + "step": 18450 + }, + { + "epoch": 0.7300915580691728, + "grad_norm": 1.0667874555031875, + "learning_rate": 9.386905390045142e-06, + "loss": 1.0239, + "step": 18460 + }, + { + "epoch": 0.7304870572880618, + "grad_norm": 0.9547270237562987, + "learning_rate": 9.385800922212618e-06, + "loss": 1.0358, + "step": 18470 + }, + { + "epoch": 0.7308825565069509, + "grad_norm": 0.9695529496527657, + "learning_rate": 9.384695525542595e-06, + "loss": 1.0187, + "step": 18480 + }, + { + "epoch": 0.7312780557258399, + "grad_norm": 1.0152062952088161, + "learning_rate": 9.383589200269181e-06, + "loss": 1.0246, + "step": 18490 + }, + { + "epoch": 0.731673554944729, + "grad_norm": 1.0770326148334348, + "learning_rate": 9.382481946626673e-06, + "loss": 1.0261, + "step": 18500 + }, + { + "epoch": 0.732069054163618, + "grad_norm": 1.0345921166723826, + "learning_rate": 9.381373764849571e-06, + "loss": 1.0162, + "step": 18510 + }, + { + "epoch": 0.732464553382507, + "grad_norm": 1.0708522771601607, + "learning_rate": 9.380264655172569e-06, + "loss": 1.0291, + "step": 18520 + }, + { + "epoch": 0.7328600526013961, + "grad_norm": 1.0719678860846598, + "learning_rate": 9.379154617830556e-06, + "loss": 1.0309, + "step": 18530 + }, + { + "epoch": 0.7332555518202851, + "grad_norm": 1.059418646494692, + "learning_rate": 9.37804365305862e-06, + "loss": 1.0252, + "step": 18540 + }, + { + "epoch": 0.7336510510391742, + "grad_norm": 1.0923693514124921, + "learning_rate": 9.376931761092042e-06, + "loss": 1.0389, + "step": 18550 + }, + { + "epoch": 0.7340465502580632, + "grad_norm": 0.9773367022043173, + "learning_rate": 9.375818942166304e-06, + "loss": 1.0092, + "step": 18560 + }, + { + "epoch": 0.7344420494769522, + "grad_norm": 1.064751679259574, + "learning_rate": 9.374705196517082e-06, + "loss": 1.0183, + "step": 18570 + }, + { + "epoch": 0.7348375486958413, + "grad_norm": 1.0461734337854973, + "learning_rate": 9.373590524380248e-06, + "loss": 1.0296, + "step": 18580 + }, + { + "epoch": 0.7352330479147303, + "grad_norm": 1.0101405293159191, + "learning_rate": 9.37247492599187e-06, + "loss": 1.0221, + "step": 18590 + }, + { + "epoch": 0.7356285471336194, + "grad_norm": 1.020775319830127, + "learning_rate": 9.371358401588212e-06, + "loss": 1.0132, + "step": 18600 + }, + { + "epoch": 0.7360240463525084, + "grad_norm": 1.1127647281143733, + "learning_rate": 9.370240951405736e-06, + "loss": 1.0172, + "step": 18610 + }, + { + "epoch": 0.7364195455713975, + "grad_norm": 1.1383922818760752, + "learning_rate": 9.369122575681098e-06, + "loss": 1.0488, + "step": 18620 + }, + { + "epoch": 0.7368150447902866, + "grad_norm": 1.1171086386308282, + "learning_rate": 9.368003274651152e-06, + "loss": 1.0241, + "step": 18630 + }, + { + "epoch": 0.7372105440091756, + "grad_norm": 0.9939206769042934, + "learning_rate": 9.366883048552945e-06, + "loss": 1.0041, + "step": 18640 + }, + { + "epoch": 0.7376060432280647, + "grad_norm": 0.9339266962120722, + "learning_rate": 9.365761897623722e-06, + "loss": 1.0028, + "step": 18650 + }, + { + "epoch": 0.7380015424469537, + "grad_norm": 1.0657826513680748, + "learning_rate": 9.364639822100926e-06, + "loss": 1.0142, + "step": 18660 + }, + { + "epoch": 0.7383970416658427, + "grad_norm": 1.1114751812392274, + "learning_rate": 9.36351682222219e-06, + "loss": 1.0247, + "step": 18670 + }, + { + "epoch": 0.7387925408847318, + "grad_norm": 1.0182911899944536, + "learning_rate": 9.362392898225347e-06, + "loss": 1.035, + "step": 18680 + }, + { + "epoch": 0.7391880401036208, + "grad_norm": 1.0005635152557726, + "learning_rate": 9.361268050348428e-06, + "loss": 1.0395, + "step": 18690 + }, + { + "epoch": 0.7395835393225099, + "grad_norm": 1.0128993404430935, + "learning_rate": 9.360142278829653e-06, + "loss": 1.0243, + "step": 18700 + }, + { + "epoch": 0.7399790385413989, + "grad_norm": 1.004920029440176, + "learning_rate": 9.35901558390744e-06, + "loss": 1.0139, + "step": 18710 + }, + { + "epoch": 0.7403745377602879, + "grad_norm": 1.0002744150986418, + "learning_rate": 9.357887965820409e-06, + "loss": 1.0302, + "step": 18720 + }, + { + "epoch": 0.740770036979177, + "grad_norm": 1.0484190542947533, + "learning_rate": 9.356759424807365e-06, + "loss": 1.0363, + "step": 18730 + }, + { + "epoch": 0.741165536198066, + "grad_norm": 1.0076111366352989, + "learning_rate": 9.355629961107316e-06, + "loss": 1.0356, + "step": 18740 + }, + { + "epoch": 0.7415610354169551, + "grad_norm": 0.9036911244433983, + "learning_rate": 9.354499574959463e-06, + "loss": 1.031, + "step": 18750 + }, + { + "epoch": 0.7419565346358441, + "grad_norm": 1.0456535972012757, + "learning_rate": 9.353368266603202e-06, + "loss": 1.0181, + "step": 18760 + }, + { + "epoch": 0.7423520338547331, + "grad_norm": 0.9783379098922605, + "learning_rate": 9.352236036278127e-06, + "loss": 1.0272, + "step": 18770 + }, + { + "epoch": 0.7427475330736222, + "grad_norm": 1.1262272512320328, + "learning_rate": 9.351102884224019e-06, + "loss": 1.0172, + "step": 18780 + }, + { + "epoch": 0.7431430322925112, + "grad_norm": 1.0104228882303397, + "learning_rate": 9.349968810680866e-06, + "loss": 1.0143, + "step": 18790 + }, + { + "epoch": 0.7435385315114003, + "grad_norm": 0.9288780657972774, + "learning_rate": 9.348833815888843e-06, + "loss": 1.0241, + "step": 18800 + }, + { + "epoch": 0.7439340307302893, + "grad_norm": 0.9465221319852352, + "learning_rate": 9.347697900088323e-06, + "loss": 1.0227, + "step": 18810 + }, + { + "epoch": 0.7443295299491783, + "grad_norm": 1.0031384590934056, + "learning_rate": 9.346561063519873e-06, + "loss": 1.0332, + "step": 18820 + }, + { + "epoch": 0.7447250291680674, + "grad_norm": 1.1213781153150284, + "learning_rate": 9.345423306424257e-06, + "loss": 1.0326, + "step": 18830 + }, + { + "epoch": 0.7451205283869564, + "grad_norm": 1.0488927764672529, + "learning_rate": 9.344284629042431e-06, + "loss": 1.0351, + "step": 18840 + }, + { + "epoch": 0.7455160276058455, + "grad_norm": 1.012118316253596, + "learning_rate": 9.343145031615546e-06, + "loss": 1.0344, + "step": 18850 + }, + { + "epoch": 0.7459115268247345, + "grad_norm": 0.9856969792699805, + "learning_rate": 9.342004514384952e-06, + "loss": 1.0205, + "step": 18860 + }, + { + "epoch": 0.7463070260436235, + "grad_norm": 0.9575334310958326, + "learning_rate": 9.34086307759219e-06, + "loss": 1.0175, + "step": 18870 + }, + { + "epoch": 0.7467025252625126, + "grad_norm": 0.9601966119795331, + "learning_rate": 9.339720721478998e-06, + "loss": 1.0431, + "step": 18880 + }, + { + "epoch": 0.7470980244814016, + "grad_norm": 1.06373689786651, + "learning_rate": 9.338577446287305e-06, + "loss": 1.0356, + "step": 18890 + }, + { + "epoch": 0.7474935237002907, + "grad_norm": 1.155558342613977, + "learning_rate": 9.337433252259237e-06, + "loss": 1.0039, + "step": 18900 + }, + { + "epoch": 0.7478890229191797, + "grad_norm": 1.0242148425969468, + "learning_rate": 9.336288139637118e-06, + "loss": 1.0105, + "step": 18910 + }, + { + "epoch": 0.7482845221380687, + "grad_norm": 1.0165882156852342, + "learning_rate": 9.33514210866346e-06, + "loss": 1.0297, + "step": 18920 + }, + { + "epoch": 0.7486800213569578, + "grad_norm": 1.0668748228677531, + "learning_rate": 9.333995159580974e-06, + "loss": 1.0255, + "step": 18930 + }, + { + "epoch": 0.7490755205758468, + "grad_norm": 1.0927084015302633, + "learning_rate": 9.332847292632563e-06, + "loss": 1.0421, + "step": 18940 + }, + { + "epoch": 0.749471019794736, + "grad_norm": 1.0627145192401357, + "learning_rate": 9.331698508061326e-06, + "loss": 1.0158, + "step": 18950 + }, + { + "epoch": 0.749866519013625, + "grad_norm": 1.0023214644900786, + "learning_rate": 9.330548806110559e-06, + "loss": 1.0038, + "step": 18960 + }, + { + "epoch": 0.750262018232514, + "grad_norm": 1.0694419088452047, + "learning_rate": 9.329398187023745e-06, + "loss": 1.0188, + "step": 18970 + }, + { + "epoch": 0.7506575174514031, + "grad_norm": 0.9185680354699801, + "learning_rate": 9.328246651044567e-06, + "loss": 0.9952, + "step": 18980 + }, + { + "epoch": 0.7510530166702921, + "grad_norm": 1.006900840762992, + "learning_rate": 9.327094198416903e-06, + "loss": 1.013, + "step": 18990 + }, + { + "epoch": 0.7514485158891812, + "grad_norm": 1.0093577748613276, + "learning_rate": 9.325940829384819e-06, + "loss": 1.0281, + "step": 19000 + }, + { + "epoch": 0.7518440151080702, + "grad_norm": 1.0860462169035565, + "learning_rate": 9.32478654419258e-06, + "loss": 1.0198, + "step": 19010 + }, + { + "epoch": 0.7522395143269592, + "grad_norm": 0.994771402041234, + "learning_rate": 9.323631343084642e-06, + "loss": 1.0285, + "step": 19020 + }, + { + "epoch": 0.7526350135458483, + "grad_norm": 1.0531262016048346, + "learning_rate": 9.322475226305661e-06, + "loss": 1.0379, + "step": 19030 + }, + { + "epoch": 0.7530305127647373, + "grad_norm": 1.0673140331312077, + "learning_rate": 9.32131819410048e-06, + "loss": 1.0299, + "step": 19040 + }, + { + "epoch": 0.7534260119836264, + "grad_norm": 0.9409190989423175, + "learning_rate": 9.320160246714141e-06, + "loss": 1.0103, + "step": 19050 + }, + { + "epoch": 0.7538215112025154, + "grad_norm": 1.0828491007160792, + "learning_rate": 9.319001384391872e-06, + "loss": 1.0197, + "step": 19060 + }, + { + "epoch": 0.7542170104214044, + "grad_norm": 0.9563940190884601, + "learning_rate": 9.317841607379106e-06, + "loss": 1.021, + "step": 19070 + }, + { + "epoch": 0.7546125096402935, + "grad_norm": 0.9711286036666847, + "learning_rate": 9.316680915921461e-06, + "loss": 1.0233, + "step": 19080 + }, + { + "epoch": 0.7550080088591825, + "grad_norm": 0.9695610908794037, + "learning_rate": 9.315519310264753e-06, + "loss": 1.0154, + "step": 19090 + }, + { + "epoch": 0.7554035080780716, + "grad_norm": 1.1595785378587375, + "learning_rate": 9.31435679065499e-06, + "loss": 1.0238, + "step": 19100 + }, + { + "epoch": 0.7557990072969606, + "grad_norm": 1.005502156386906, + "learning_rate": 9.313193357338372e-06, + "loss": 1.0329, + "step": 19110 + }, + { + "epoch": 0.7561945065158496, + "grad_norm": 1.0772824039980156, + "learning_rate": 9.312029010561294e-06, + "loss": 1.0284, + "step": 19120 + }, + { + "epoch": 0.7565900057347387, + "grad_norm": 0.961761958552405, + "learning_rate": 9.310863750570348e-06, + "loss": 1.0285, + "step": 19130 + }, + { + "epoch": 0.7569855049536277, + "grad_norm": 1.0259406252479997, + "learning_rate": 9.309697577612312e-06, + "loss": 1.0045, + "step": 19140 + }, + { + "epoch": 0.7573810041725167, + "grad_norm": 0.9099565392120221, + "learning_rate": 9.308530491934167e-06, + "loss": 1.0199, + "step": 19150 + }, + { + "epoch": 0.7577765033914058, + "grad_norm": 1.020017637537644, + "learning_rate": 9.307362493783077e-06, + "loss": 1.0194, + "step": 19160 + }, + { + "epoch": 0.7581720026102948, + "grad_norm": 0.9384524429371721, + "learning_rate": 9.306193583406404e-06, + "loss": 1.012, + "step": 19170 + }, + { + "epoch": 0.7585675018291839, + "grad_norm": 1.0490319279654097, + "learning_rate": 9.305023761051706e-06, + "loss": 1.0247, + "step": 19180 + }, + { + "epoch": 0.7589630010480729, + "grad_norm": 1.0652328994301927, + "learning_rate": 9.30385302696673e-06, + "loss": 1.0309, + "step": 19190 + }, + { + "epoch": 0.7593585002669619, + "grad_norm": 1.0199138231431706, + "learning_rate": 9.302681381399415e-06, + "loss": 1.0272, + "step": 19200 + }, + { + "epoch": 0.759753999485851, + "grad_norm": 1.00140381770798, + "learning_rate": 9.3015088245979e-06, + "loss": 1.0154, + "step": 19210 + }, + { + "epoch": 0.76014949870474, + "grad_norm": 1.090021740909663, + "learning_rate": 9.30033535681051e-06, + "loss": 1.0096, + "step": 19220 + }, + { + "epoch": 0.7605449979236291, + "grad_norm": 1.152257779770484, + "learning_rate": 9.299160978285766e-06, + "loss": 1.0281, + "step": 19230 + }, + { + "epoch": 0.7609404971425181, + "grad_norm": 1.0165380948124314, + "learning_rate": 9.297985689272379e-06, + "loss": 1.0068, + "step": 19240 + }, + { + "epoch": 0.7613359963614071, + "grad_norm": 1.0619715659131068, + "learning_rate": 9.296809490019258e-06, + "loss": 1.0327, + "step": 19250 + }, + { + "epoch": 0.7617314955802962, + "grad_norm": 1.0359303110161961, + "learning_rate": 9.295632380775502e-06, + "loss": 1.0095, + "step": 19260 + }, + { + "epoch": 0.7621269947991852, + "grad_norm": 1.1420116482120743, + "learning_rate": 9.294454361790399e-06, + "loss": 1.0155, + "step": 19270 + }, + { + "epoch": 0.7625224940180743, + "grad_norm": 1.1108425689025752, + "learning_rate": 9.293275433313436e-06, + "loss": 1.0274, + "step": 19280 + }, + { + "epoch": 0.7629179932369634, + "grad_norm": 0.9690075577594031, + "learning_rate": 9.29209559559429e-06, + "loss": 1.047, + "step": 19290 + }, + { + "epoch": 0.7633134924558524, + "grad_norm": 0.9810714172727245, + "learning_rate": 9.290914848882831e-06, + "loss": 1.0434, + "step": 19300 + }, + { + "epoch": 0.7637089916747415, + "grad_norm": 0.9813211700219521, + "learning_rate": 9.289733193429119e-06, + "loss": 1.0228, + "step": 19310 + }, + { + "epoch": 0.7641044908936305, + "grad_norm": 0.9576164054215149, + "learning_rate": 9.288550629483408e-06, + "loss": 1.0018, + "step": 19320 + }, + { + "epoch": 0.7644999901125196, + "grad_norm": 1.0455231335606272, + "learning_rate": 9.287367157296146e-06, + "loss": 1.031, + "step": 19330 + }, + { + "epoch": 0.7648954893314086, + "grad_norm": 1.1297181918273749, + "learning_rate": 9.286182777117974e-06, + "loss": 1.0181, + "step": 19340 + }, + { + "epoch": 0.7652909885502976, + "grad_norm": 1.056291522635083, + "learning_rate": 9.28499748919972e-06, + "loss": 1.0262, + "step": 19350 + }, + { + "epoch": 0.7656864877691867, + "grad_norm": 0.9724494560184846, + "learning_rate": 9.28381129379241e-06, + "loss": 1.0141, + "step": 19360 + }, + { + "epoch": 0.7660819869880757, + "grad_norm": 1.043582624117857, + "learning_rate": 9.282624191147258e-06, + "loss": 1.0218, + "step": 19370 + }, + { + "epoch": 0.7664774862069648, + "grad_norm": 1.0650864660468717, + "learning_rate": 9.281436181515673e-06, + "loss": 1.008, + "step": 19380 + }, + { + "epoch": 0.7668729854258538, + "grad_norm": 1.0774913543212994, + "learning_rate": 9.280247265149256e-06, + "loss": 1.0093, + "step": 19390 + }, + { + "epoch": 0.7672684846447428, + "grad_norm": 1.036731805880924, + "learning_rate": 9.2790574422998e-06, + "loss": 1.018, + "step": 19400 + }, + { + "epoch": 0.7676639838636319, + "grad_norm": 1.144514484297006, + "learning_rate": 9.277866713219284e-06, + "loss": 1.0255, + "step": 19410 + }, + { + "epoch": 0.7680594830825209, + "grad_norm": 0.9593936439738194, + "learning_rate": 9.27667507815989e-06, + "loss": 1.0161, + "step": 19420 + }, + { + "epoch": 0.76845498230141, + "grad_norm": 0.9432364439032389, + "learning_rate": 9.27548253737398e-06, + "loss": 1.0168, + "step": 19430 + }, + { + "epoch": 0.768850481520299, + "grad_norm": 0.9577852173455976, + "learning_rate": 9.27428909111412e-06, + "loss": 1.0159, + "step": 19440 + }, + { + "epoch": 0.769245980739188, + "grad_norm": 1.16737808015621, + "learning_rate": 9.273094739633057e-06, + "loss": 0.9975, + "step": 19450 + }, + { + "epoch": 0.7696414799580771, + "grad_norm": 1.0686547602772396, + "learning_rate": 9.271899483183737e-06, + "loss": 0.9994, + "step": 19460 + }, + { + "epoch": 0.7700369791769661, + "grad_norm": 1.0717802552838147, + "learning_rate": 9.270703322019293e-06, + "loss": 1.0035, + "step": 19470 + }, + { + "epoch": 0.7704324783958552, + "grad_norm": 0.9113304418118345, + "learning_rate": 9.269506256393051e-06, + "loss": 1.0215, + "step": 19480 + }, + { + "epoch": 0.7708279776147442, + "grad_norm": 1.0505384794273, + "learning_rate": 9.26830828655853e-06, + "loss": 0.9917, + "step": 19490 + }, + { + "epoch": 0.7712234768336332, + "grad_norm": 1.0493878251717221, + "learning_rate": 9.26710941276944e-06, + "loss": 1.0098, + "step": 19500 + }, + { + "epoch": 0.7716189760525223, + "grad_norm": 0.9743329912014642, + "learning_rate": 9.26590963527968e-06, + "loss": 1.0393, + "step": 19510 + }, + { + "epoch": 0.7720144752714113, + "grad_norm": 1.0295075900448483, + "learning_rate": 9.264708954343344e-06, + "loss": 1.015, + "step": 19520 + }, + { + "epoch": 0.7724099744903004, + "grad_norm": 1.0715856241898993, + "learning_rate": 9.263507370214716e-06, + "loss": 1.0099, + "step": 19530 + }, + { + "epoch": 0.7728054737091894, + "grad_norm": 1.0928812797829903, + "learning_rate": 9.26230488314827e-06, + "loss": 1.0178, + "step": 19540 + }, + { + "epoch": 0.7732009729280784, + "grad_norm": 1.079469361362925, + "learning_rate": 9.261101493398672e-06, + "loss": 1.0125, + "step": 19550 + }, + { + "epoch": 0.7735964721469675, + "grad_norm": 1.094048467683584, + "learning_rate": 9.25989720122078e-06, + "loss": 1.0087, + "step": 19560 + }, + { + "epoch": 0.7739919713658565, + "grad_norm": 0.9829794624397377, + "learning_rate": 9.258692006869644e-06, + "loss": 1.026, + "step": 19570 + }, + { + "epoch": 0.7743874705847456, + "grad_norm": 0.9329725298405944, + "learning_rate": 9.2574859106005e-06, + "loss": 0.9901, + "step": 19580 + }, + { + "epoch": 0.7747829698036346, + "grad_norm": 1.0273996978263786, + "learning_rate": 9.25627891266878e-06, + "loss": 1.0258, + "step": 19590 + }, + { + "epoch": 0.7751784690225236, + "grad_norm": 1.0539329548331768, + "learning_rate": 9.255071013330104e-06, + "loss": 1.0119, + "step": 19600 + }, + { + "epoch": 0.7755739682414127, + "grad_norm": 1.0041432290156813, + "learning_rate": 9.253862212840288e-06, + "loss": 1.0112, + "step": 19610 + }, + { + "epoch": 0.7759694674603018, + "grad_norm": 0.9971852827921748, + "learning_rate": 9.252652511455333e-06, + "loss": 1.0056, + "step": 19620 + }, + { + "epoch": 0.7763649666791909, + "grad_norm": 0.9406095710545285, + "learning_rate": 9.251441909431433e-06, + "loss": 1.0154, + "step": 19630 + }, + { + "epoch": 0.7767604658980799, + "grad_norm": 1.142755100981078, + "learning_rate": 9.250230407024975e-06, + "loss": 1.0139, + "step": 19640 + }, + { + "epoch": 0.7771559651169689, + "grad_norm": 1.057150985151994, + "learning_rate": 9.249018004492529e-06, + "loss": 0.9967, + "step": 19650 + }, + { + "epoch": 0.777551464335858, + "grad_norm": 0.9795617707482281, + "learning_rate": 9.247804702090867e-06, + "loss": 1.0221, + "step": 19660 + }, + { + "epoch": 0.777946963554747, + "grad_norm": 1.015999182209421, + "learning_rate": 9.246590500076944e-06, + "loss": 1.0176, + "step": 19670 + }, + { + "epoch": 0.7783424627736361, + "grad_norm": 1.04158190346237, + "learning_rate": 9.245375398707905e-06, + "loss": 1.0209, + "step": 19680 + }, + { + "epoch": 0.7787379619925251, + "grad_norm": 1.122543189280439, + "learning_rate": 9.24415939824109e-06, + "loss": 1.0212, + "step": 19690 + }, + { + "epoch": 0.7791334612114141, + "grad_norm": 0.9261687506749237, + "learning_rate": 9.242942498934025e-06, + "loss": 1.0228, + "step": 19700 + }, + { + "epoch": 0.7795289604303032, + "grad_norm": 1.0028590439493865, + "learning_rate": 9.24172470104443e-06, + "loss": 1.002, + "step": 19710 + }, + { + "epoch": 0.7799244596491922, + "grad_norm": 1.0293759066344548, + "learning_rate": 9.240506004830214e-06, + "loss": 1.0101, + "step": 19720 + }, + { + "epoch": 0.7803199588680813, + "grad_norm": 1.0317117590147789, + "learning_rate": 9.239286410549475e-06, + "loss": 1.0067, + "step": 19730 + }, + { + "epoch": 0.7807154580869703, + "grad_norm": 1.0205275027954295, + "learning_rate": 9.238065918460503e-06, + "loss": 1.0034, + "step": 19740 + }, + { + "epoch": 0.7811109573058593, + "grad_norm": 0.9735273695279139, + "learning_rate": 9.236844528821776e-06, + "loss": 1.0179, + "step": 19750 + }, + { + "epoch": 0.7815064565247484, + "grad_norm": 1.0646165684795013, + "learning_rate": 9.235622241891964e-06, + "loss": 1.0046, + "step": 19760 + }, + { + "epoch": 0.7819019557436374, + "grad_norm": 1.079221881642748, + "learning_rate": 9.234399057929928e-06, + "loss": 1.0046, + "step": 19770 + }, + { + "epoch": 0.7822974549625265, + "grad_norm": 1.0613787563963717, + "learning_rate": 9.233174977194717e-06, + "loss": 1.0135, + "step": 19780 + }, + { + "epoch": 0.7826929541814155, + "grad_norm": 0.9600904675334615, + "learning_rate": 9.231949999945567e-06, + "loss": 1.021, + "step": 19790 + }, + { + "epoch": 0.7830884534003045, + "grad_norm": 1.027985703630233, + "learning_rate": 9.23072412644191e-06, + "loss": 1.0337, + "step": 19800 + }, + { + "epoch": 0.7834839526191936, + "grad_norm": 0.93390627761561, + "learning_rate": 9.229497356943364e-06, + "loss": 1.0027, + "step": 19810 + }, + { + "epoch": 0.7838794518380826, + "grad_norm": 1.0219463087472611, + "learning_rate": 9.228269691709739e-06, + "loss": 1.0225, + "step": 19820 + }, + { + "epoch": 0.7842749510569716, + "grad_norm": 1.002995355213593, + "learning_rate": 9.227041131001034e-06, + "loss": 1.0218, + "step": 19830 + }, + { + "epoch": 0.7846704502758607, + "grad_norm": 0.9860650591548502, + "learning_rate": 9.225811675077435e-06, + "loss": 1.0288, + "step": 19840 + }, + { + "epoch": 0.7850659494947497, + "grad_norm": 1.1148899724117105, + "learning_rate": 9.22458132419932e-06, + "loss": 1.033, + "step": 19850 + }, + { + "epoch": 0.7854614487136388, + "grad_norm": 0.9534438569029959, + "learning_rate": 9.223350078627258e-06, + "loss": 1.0056, + "step": 19860 + }, + { + "epoch": 0.7858569479325278, + "grad_norm": 0.9898571873086477, + "learning_rate": 9.222117938622004e-06, + "loss": 1.0206, + "step": 19870 + }, + { + "epoch": 0.7862524471514168, + "grad_norm": 1.0681159410788263, + "learning_rate": 9.220884904444505e-06, + "loss": 1.0117, + "step": 19880 + }, + { + "epoch": 0.7866479463703059, + "grad_norm": 0.992718103988424, + "learning_rate": 9.219650976355895e-06, + "loss": 1.0124, + "step": 19890 + }, + { + "epoch": 0.7870434455891949, + "grad_norm": 1.0062492875639375, + "learning_rate": 9.218416154617503e-06, + "loss": 1.0268, + "step": 19900 + }, + { + "epoch": 0.787438944808084, + "grad_norm": 1.0558858349412301, + "learning_rate": 9.217180439490836e-06, + "loss": 1.0039, + "step": 19910 + }, + { + "epoch": 0.787834444026973, + "grad_norm": 1.0229715091797993, + "learning_rate": 9.215943831237604e-06, + "loss": 1.0227, + "step": 19920 + }, + { + "epoch": 0.788229943245862, + "grad_norm": 0.9772301798391267, + "learning_rate": 9.214706330119697e-06, + "loss": 0.9968, + "step": 19930 + }, + { + "epoch": 0.7886254424647511, + "grad_norm": 0.9954296565633302, + "learning_rate": 9.213467936399196e-06, + "loss": 0.9874, + "step": 19940 + }, + { + "epoch": 0.7890209416836401, + "grad_norm": 1.158774509423265, + "learning_rate": 9.212228650338371e-06, + "loss": 1.0161, + "step": 19950 + }, + { + "epoch": 0.7894164409025293, + "grad_norm": 1.0023364016480276, + "learning_rate": 9.210988472199685e-06, + "loss": 1.012, + "step": 19960 + }, + { + "epoch": 0.7898119401214183, + "grad_norm": 1.021651821279293, + "learning_rate": 9.209747402245782e-06, + "loss": 1.0152, + "step": 19970 + }, + { + "epoch": 0.7902074393403073, + "grad_norm": 1.0512413445924476, + "learning_rate": 9.208505440739502e-06, + "loss": 1.0105, + "step": 19980 + }, + { + "epoch": 0.7906029385591964, + "grad_norm": 1.0602035686643103, + "learning_rate": 9.20726258794387e-06, + "loss": 1.0161, + "step": 19990 + }, + { + "epoch": 0.7909984377780854, + "grad_norm": 0.9953783236544859, + "learning_rate": 9.206018844122102e-06, + "loss": 1.0047, + "step": 20000 + }, + { + "epoch": 0.7913939369969745, + "grad_norm": 1.0587613276719485, + "learning_rate": 9.204774209537602e-06, + "loss": 1.0304, + "step": 20010 + }, + { + "epoch": 0.7917894362158635, + "grad_norm": 1.113710621677745, + "learning_rate": 9.203528684453961e-06, + "loss": 1.0176, + "step": 20020 + }, + { + "epoch": 0.7921849354347525, + "grad_norm": 1.0342528626614702, + "learning_rate": 9.202282269134959e-06, + "loss": 1.0158, + "step": 20030 + }, + { + "epoch": 0.7925804346536416, + "grad_norm": 1.116941707591573, + "learning_rate": 9.201034963844568e-06, + "loss": 1.0048, + "step": 20040 + }, + { + "epoch": 0.7929759338725306, + "grad_norm": 1.1317385552718788, + "learning_rate": 9.199786768846944e-06, + "loss": 1.0257, + "step": 20050 + }, + { + "epoch": 0.7933714330914197, + "grad_norm": 1.11888521603365, + "learning_rate": 9.198537684406434e-06, + "loss": 1.0037, + "step": 20060 + }, + { + "epoch": 0.7937669323103087, + "grad_norm": 1.1002390924797294, + "learning_rate": 9.197287710787573e-06, + "loss": 1.0333, + "step": 20070 + }, + { + "epoch": 0.7941624315291977, + "grad_norm": 1.1251850282361266, + "learning_rate": 9.196036848255084e-06, + "loss": 1.016, + "step": 20080 + }, + { + "epoch": 0.7945579307480868, + "grad_norm": 1.007544152452515, + "learning_rate": 9.194785097073875e-06, + "loss": 1.0109, + "step": 20090 + }, + { + "epoch": 0.7949534299669758, + "grad_norm": 1.0140803324390137, + "learning_rate": 9.193532457509051e-06, + "loss": 0.9777, + "step": 20100 + }, + { + "epoch": 0.7953489291858649, + "grad_norm": 1.1089816560064845, + "learning_rate": 9.192278929825896e-06, + "loss": 1.0106, + "step": 20110 + }, + { + "epoch": 0.7957444284047539, + "grad_norm": 1.0461554147005818, + "learning_rate": 9.191024514289887e-06, + "loss": 1.0157, + "step": 20120 + }, + { + "epoch": 0.7961399276236429, + "grad_norm": 0.948486945865474, + "learning_rate": 9.189769211166688e-06, + "loss": 1.0277, + "step": 20130 + }, + { + "epoch": 0.796535426842532, + "grad_norm": 1.0156445520609347, + "learning_rate": 9.188513020722149e-06, + "loss": 1.016, + "step": 20140 + }, + { + "epoch": 0.796930926061421, + "grad_norm": 1.0775912190846668, + "learning_rate": 9.187255943222311e-06, + "loss": 1.0016, + "step": 20150 + }, + { + "epoch": 0.7973264252803101, + "grad_norm": 1.0547485733442106, + "learning_rate": 9.1859979789334e-06, + "loss": 1.0107, + "step": 20160 + }, + { + "epoch": 0.7977219244991991, + "grad_norm": 0.9850939277351383, + "learning_rate": 9.184739128121833e-06, + "loss": 1.0135, + "step": 20170 + }, + { + "epoch": 0.7981174237180881, + "grad_norm": 1.0413362027888042, + "learning_rate": 9.183479391054212e-06, + "loss": 1.0206, + "step": 20180 + }, + { + "epoch": 0.7985129229369772, + "grad_norm": 1.0252657507839713, + "learning_rate": 9.182218767997329e-06, + "loss": 1.0061, + "step": 20190 + }, + { + "epoch": 0.7989084221558662, + "grad_norm": 1.1199126021828947, + "learning_rate": 9.180957259218162e-06, + "loss": 1.0145, + "step": 20200 + }, + { + "epoch": 0.7993039213747553, + "grad_norm": 1.030768294725857, + "learning_rate": 9.179694864983874e-06, + "loss": 0.9993, + "step": 20210 + }, + { + "epoch": 0.7996994205936443, + "grad_norm": 1.0615858418506352, + "learning_rate": 9.178431585561825e-06, + "loss": 1.0225, + "step": 20220 + }, + { + "epoch": 0.8000949198125333, + "grad_norm": 0.9416693770891857, + "learning_rate": 9.177167421219548e-06, + "loss": 1.0158, + "step": 20230 + }, + { + "epoch": 0.8004904190314224, + "grad_norm": 1.0000017417854856, + "learning_rate": 9.175902372224777e-06, + "loss": 0.9853, + "step": 20240 + }, + { + "epoch": 0.8008859182503114, + "grad_norm": 1.2554165506836938, + "learning_rate": 9.174636438845426e-06, + "loss": 1.0092, + "step": 20250 + }, + { + "epoch": 0.8012814174692005, + "grad_norm": 1.1318244966833997, + "learning_rate": 9.173369621349597e-06, + "loss": 1.0144, + "step": 20260 + }, + { + "epoch": 0.8016769166880895, + "grad_norm": 1.1375317281761548, + "learning_rate": 9.172101920005582e-06, + "loss": 1.0166, + "step": 20270 + }, + { + "epoch": 0.8020724159069785, + "grad_norm": 0.9756803045017065, + "learning_rate": 9.170833335081857e-06, + "loss": 1.0109, + "step": 20280 + }, + { + "epoch": 0.8024679151258677, + "grad_norm": 1.115143344151944, + "learning_rate": 9.169563866847086e-06, + "loss": 1.0031, + "step": 20290 + }, + { + "epoch": 0.8028634143447567, + "grad_norm": 1.083087175179526, + "learning_rate": 9.168293515570123e-06, + "loss": 1.0205, + "step": 20300 + }, + { + "epoch": 0.8032589135636458, + "grad_norm": 0.9408082881127293, + "learning_rate": 9.167022281520007e-06, + "loss": 1.0026, + "step": 20310 + }, + { + "epoch": 0.8036544127825348, + "grad_norm": 1.0748663499652444, + "learning_rate": 9.165750164965961e-06, + "loss": 1.0024, + "step": 20320 + }, + { + "epoch": 0.8040499120014238, + "grad_norm": 1.1439582786245392, + "learning_rate": 9.164477166177397e-06, + "loss": 0.9995, + "step": 20330 + }, + { + "epoch": 0.8044454112203129, + "grad_norm": 1.030662329697222, + "learning_rate": 9.163203285423917e-06, + "loss": 1.0135, + "step": 20340 + }, + { + "epoch": 0.8048409104392019, + "grad_norm": 1.1445213134877017, + "learning_rate": 9.161928522975305e-06, + "loss": 1.0036, + "step": 20350 + }, + { + "epoch": 0.805236409658091, + "grad_norm": 0.9941138888962111, + "learning_rate": 9.160652879101537e-06, + "loss": 1.0358, + "step": 20360 + }, + { + "epoch": 0.80563190887698, + "grad_norm": 0.9961305624271573, + "learning_rate": 9.159376354072769e-06, + "loss": 0.9897, + "step": 20370 + }, + { + "epoch": 0.806027408095869, + "grad_norm": 0.9991187306679739, + "learning_rate": 9.158098948159348e-06, + "loss": 1.0185, + "step": 20380 + }, + { + "epoch": 0.8064229073147581, + "grad_norm": 1.0520312040744422, + "learning_rate": 9.156820661631807e-06, + "loss": 0.9839, + "step": 20390 + }, + { + "epoch": 0.8068184065336471, + "grad_norm": 1.1059790334995376, + "learning_rate": 9.155541494760865e-06, + "loss": 1.0156, + "step": 20400 + }, + { + "epoch": 0.8072139057525362, + "grad_norm": 1.1258721010655148, + "learning_rate": 9.15426144781743e-06, + "loss": 1.0006, + "step": 20410 + }, + { + "epoch": 0.8076094049714252, + "grad_norm": 1.1017557773545024, + "learning_rate": 9.15298052107259e-06, + "loss": 0.9823, + "step": 20420 + }, + { + "epoch": 0.8080049041903142, + "grad_norm": 1.0281770283228173, + "learning_rate": 9.151698714797625e-06, + "loss": 1.0263, + "step": 20430 + }, + { + "epoch": 0.8084004034092033, + "grad_norm": 1.190826235966877, + "learning_rate": 9.150416029264e-06, + "loss": 1.0205, + "step": 20440 + }, + { + "epoch": 0.8087959026280923, + "grad_norm": 1.0144729083676585, + "learning_rate": 9.149132464743367e-06, + "loss": 1.0191, + "step": 20450 + }, + { + "epoch": 0.8091914018469814, + "grad_norm": 1.1409518677606938, + "learning_rate": 9.147848021507561e-06, + "loss": 1.0155, + "step": 20460 + }, + { + "epoch": 0.8095869010658704, + "grad_norm": 0.9853256029033983, + "learning_rate": 9.146562699828607e-06, + "loss": 1.0208, + "step": 20470 + }, + { + "epoch": 0.8099824002847594, + "grad_norm": 1.0521218898071147, + "learning_rate": 9.145276499978712e-06, + "loss": 1.0128, + "step": 20480 + }, + { + "epoch": 0.8103778995036485, + "grad_norm": 0.976138875676817, + "learning_rate": 9.14398942223027e-06, + "loss": 1.0112, + "step": 20490 + }, + { + "epoch": 0.8107733987225375, + "grad_norm": 1.1369504950315994, + "learning_rate": 9.142701466855867e-06, + "loss": 1.0001, + "step": 20500 + }, + { + "epoch": 0.8111688979414265, + "grad_norm": 1.0333480817123695, + "learning_rate": 9.141412634128266e-06, + "loss": 1.0107, + "step": 20510 + }, + { + "epoch": 0.8115643971603156, + "grad_norm": 1.0764740894511584, + "learning_rate": 9.140122924320419e-06, + "loss": 0.9936, + "step": 20520 + }, + { + "epoch": 0.8119598963792046, + "grad_norm": 0.9546016757818112, + "learning_rate": 9.138832337705467e-06, + "loss": 0.9954, + "step": 20530 + }, + { + "epoch": 0.8123553955980937, + "grad_norm": 0.9871714005710543, + "learning_rate": 9.137540874556734e-06, + "loss": 1.0117, + "step": 20540 + }, + { + "epoch": 0.8127508948169827, + "grad_norm": 0.945592475259025, + "learning_rate": 9.136248535147729e-06, + "loss": 0.999, + "step": 20550 + }, + { + "epoch": 0.8131463940358717, + "grad_norm": 1.0467075391893805, + "learning_rate": 9.134955319752146e-06, + "loss": 1.0215, + "step": 20560 + }, + { + "epoch": 0.8135418932547608, + "grad_norm": 1.04763511832311, + "learning_rate": 9.133661228643866e-06, + "loss": 1.0234, + "step": 20570 + }, + { + "epoch": 0.8139373924736498, + "grad_norm": 1.187121967630847, + "learning_rate": 9.132366262096959e-06, + "loss": 0.9904, + "step": 20580 + }, + { + "epoch": 0.8143328916925389, + "grad_norm": 1.0276544619131693, + "learning_rate": 9.131070420385673e-06, + "loss": 1.0153, + "step": 20590 + }, + { + "epoch": 0.8147283909114279, + "grad_norm": 0.957530284172485, + "learning_rate": 9.129773703784448e-06, + "loss": 1.0146, + "step": 20600 + }, + { + "epoch": 0.8151238901303169, + "grad_norm": 1.1495994091939181, + "learning_rate": 9.1284761125679e-06, + "loss": 1.029, + "step": 20610 + }, + { + "epoch": 0.815519389349206, + "grad_norm": 1.0103176916944374, + "learning_rate": 9.127177647010845e-06, + "loss": 1.0303, + "step": 20620 + }, + { + "epoch": 0.8159148885680951, + "grad_norm": 1.0050906077864001, + "learning_rate": 9.125878307388272e-06, + "loss": 1.0089, + "step": 20630 + }, + { + "epoch": 0.8163103877869842, + "grad_norm": 0.974353635775406, + "learning_rate": 9.124578093975358e-06, + "loss": 1.0041, + "step": 20640 + }, + { + "epoch": 0.8167058870058732, + "grad_norm": 1.0383845920556423, + "learning_rate": 9.123277007047467e-06, + "loss": 1.0038, + "step": 20650 + }, + { + "epoch": 0.8171013862247622, + "grad_norm": 1.1946496566770903, + "learning_rate": 9.121975046880146e-06, + "loss": 1.0082, + "step": 20660 + }, + { + "epoch": 0.8174968854436513, + "grad_norm": 1.102640351886497, + "learning_rate": 9.120672213749129e-06, + "loss": 0.9968, + "step": 20670 + }, + { + "epoch": 0.8178923846625403, + "grad_norm": 1.0274708984122392, + "learning_rate": 9.119368507930332e-06, + "loss": 1.0024, + "step": 20680 + }, + { + "epoch": 0.8182878838814294, + "grad_norm": 0.9487788908666108, + "learning_rate": 9.11806392969986e-06, + "loss": 1.0179, + "step": 20690 + }, + { + "epoch": 0.8186833831003184, + "grad_norm": 1.1192067496020226, + "learning_rate": 9.116758479334e-06, + "loss": 1.0051, + "step": 20700 + }, + { + "epoch": 0.8190788823192074, + "grad_norm": 1.1195359956028454, + "learning_rate": 9.115452157109223e-06, + "loss": 1.0231, + "step": 20710 + }, + { + "epoch": 0.8194743815380965, + "grad_norm": 0.9781688862836991, + "learning_rate": 9.114144963302185e-06, + "loss": 1.0001, + "step": 20720 + }, + { + "epoch": 0.8198698807569855, + "grad_norm": 1.0556996456247876, + "learning_rate": 9.112836898189728e-06, + "loss": 1.0067, + "step": 20730 + }, + { + "epoch": 0.8202653799758746, + "grad_norm": 1.0364740558005778, + "learning_rate": 9.111527962048878e-06, + "loss": 1.0026, + "step": 20740 + }, + { + "epoch": 0.8206608791947636, + "grad_norm": 1.029224935259368, + "learning_rate": 9.110218155156845e-06, + "loss": 0.9798, + "step": 20750 + }, + { + "epoch": 0.8210563784136526, + "grad_norm": 0.9687082383237153, + "learning_rate": 9.108907477791025e-06, + "loss": 0.9967, + "step": 20760 + }, + { + "epoch": 0.8214518776325417, + "grad_norm": 1.1099612391952516, + "learning_rate": 9.107595930228995e-06, + "loss": 1.0094, + "step": 20770 + }, + { + "epoch": 0.8218473768514307, + "grad_norm": 1.100023961165057, + "learning_rate": 9.106283512748518e-06, + "loss": 1.0056, + "step": 20780 + }, + { + "epoch": 0.8222428760703198, + "grad_norm": 1.1170383977979603, + "learning_rate": 9.104970225627544e-06, + "loss": 1.0035, + "step": 20790 + }, + { + "epoch": 0.8226383752892088, + "grad_norm": 1.0664944115887174, + "learning_rate": 9.103656069144203e-06, + "loss": 1.0076, + "step": 20800 + }, + { + "epoch": 0.8230338745080978, + "grad_norm": 1.057179360255666, + "learning_rate": 9.10234104357681e-06, + "loss": 1.0197, + "step": 20810 + }, + { + "epoch": 0.8234293737269869, + "grad_norm": 1.0514999811425032, + "learning_rate": 9.101025149203868e-06, + "loss": 1.0132, + "step": 20820 + }, + { + "epoch": 0.8238248729458759, + "grad_norm": 1.182089975356494, + "learning_rate": 9.099708386304059e-06, + "loss": 0.9924, + "step": 20830 + }, + { + "epoch": 0.824220372164765, + "grad_norm": 1.0404828424727715, + "learning_rate": 9.098390755156248e-06, + "loss": 1.0031, + "step": 20840 + }, + { + "epoch": 0.824615871383654, + "grad_norm": 1.0427993789695162, + "learning_rate": 9.097072256039493e-06, + "loss": 1.0207, + "step": 20850 + }, + { + "epoch": 0.825011370602543, + "grad_norm": 1.1547262038143493, + "learning_rate": 9.095752889233022e-06, + "loss": 1.0034, + "step": 20860 + }, + { + "epoch": 0.8254068698214321, + "grad_norm": 1.0519044488185854, + "learning_rate": 9.094432655016261e-06, + "loss": 0.9782, + "step": 20870 + }, + { + "epoch": 0.8258023690403211, + "grad_norm": 1.0126784299359057, + "learning_rate": 9.09311155366881e-06, + "loss": 1.008, + "step": 20880 + }, + { + "epoch": 0.8261978682592102, + "grad_norm": 0.9799142297044481, + "learning_rate": 9.091789585470455e-06, + "loss": 0.9959, + "step": 20890 + }, + { + "epoch": 0.8265933674780992, + "grad_norm": 1.0333492223830443, + "learning_rate": 9.090466750701168e-06, + "loss": 0.9882, + "step": 20900 + }, + { + "epoch": 0.8269888666969882, + "grad_norm": 1.0041615656713214, + "learning_rate": 9.089143049641101e-06, + "loss": 1.0112, + "step": 20910 + }, + { + "epoch": 0.8273843659158773, + "grad_norm": 1.1141159412870034, + "learning_rate": 9.087818482570593e-06, + "loss": 1.0058, + "step": 20920 + }, + { + "epoch": 0.8277798651347663, + "grad_norm": 1.0850635629839598, + "learning_rate": 9.086493049770165e-06, + "loss": 1.0137, + "step": 20930 + }, + { + "epoch": 0.8281753643536554, + "grad_norm": 1.1006933245055717, + "learning_rate": 9.085166751520517e-06, + "loss": 1.019, + "step": 20940 + }, + { + "epoch": 0.8285708635725444, + "grad_norm": 1.1516567615085185, + "learning_rate": 9.083839588102539e-06, + "loss": 1.0017, + "step": 20950 + }, + { + "epoch": 0.8289663627914335, + "grad_norm": 1.0424298342039158, + "learning_rate": 9.082511559797302e-06, + "loss": 0.9985, + "step": 20960 + }, + { + "epoch": 0.8293618620103226, + "grad_norm": 1.0673143560532994, + "learning_rate": 9.081182666886059e-06, + "loss": 0.9949, + "step": 20970 + }, + { + "epoch": 0.8297573612292116, + "grad_norm": 1.0369142970444467, + "learning_rate": 9.079852909650247e-06, + "loss": 0.999, + "step": 20980 + }, + { + "epoch": 0.8301528604481007, + "grad_norm": 1.0804231131520636, + "learning_rate": 9.078522288371486e-06, + "loss": 0.9986, + "step": 20990 + }, + { + "epoch": 0.8305483596669897, + "grad_norm": 1.0560859100914735, + "learning_rate": 9.077190803331578e-06, + "loss": 1.0005, + "step": 21000 + }, + { + "epoch": 0.8309438588858787, + "grad_norm": 0.9761793889965177, + "learning_rate": 9.075858454812509e-06, + "loss": 1.0147, + "step": 21010 + }, + { + "epoch": 0.8313393581047678, + "grad_norm": 1.0433326934312541, + "learning_rate": 9.074525243096448e-06, + "loss": 1.0127, + "step": 21020 + }, + { + "epoch": 0.8317348573236568, + "grad_norm": 1.0396964022444006, + "learning_rate": 9.073191168465744e-06, + "loss": 1.0276, + "step": 21030 + }, + { + "epoch": 0.8321303565425459, + "grad_norm": 0.8809520769538411, + "learning_rate": 9.071856231202935e-06, + "loss": 0.9986, + "step": 21040 + }, + { + "epoch": 0.8325258557614349, + "grad_norm": 0.9505033964683348, + "learning_rate": 9.070520431590738e-06, + "loss": 1.0031, + "step": 21050 + }, + { + "epoch": 0.8329213549803239, + "grad_norm": 1.0667653702305564, + "learning_rate": 9.069183769912047e-06, + "loss": 1.0074, + "step": 21060 + }, + { + "epoch": 0.833316854199213, + "grad_norm": 1.0369305863001432, + "learning_rate": 9.067846246449949e-06, + "loss": 0.9934, + "step": 21070 + }, + { + "epoch": 0.833712353418102, + "grad_norm": 1.1159581307715478, + "learning_rate": 9.066507861487706e-06, + "loss": 1.0011, + "step": 21080 + }, + { + "epoch": 0.834107852636991, + "grad_norm": 1.0835233058465736, + "learning_rate": 9.065168615308768e-06, + "loss": 0.9939, + "step": 21090 + }, + { + "epoch": 0.8345033518558801, + "grad_norm": 0.9610796517208031, + "learning_rate": 9.06382850819676e-06, + "loss": 0.9946, + "step": 21100 + }, + { + "epoch": 0.8348988510747691, + "grad_norm": 1.1627643870723747, + "learning_rate": 9.062487540435499e-06, + "loss": 0.9951, + "step": 21110 + }, + { + "epoch": 0.8352943502936582, + "grad_norm": 1.0511818022674042, + "learning_rate": 9.061145712308976e-06, + "loss": 0.9895, + "step": 21120 + }, + { + "epoch": 0.8356898495125472, + "grad_norm": 1.024891348022398, + "learning_rate": 9.059803024101366e-06, + "loss": 1.0033, + "step": 21130 + }, + { + "epoch": 0.8360853487314363, + "grad_norm": 1.0386602294141605, + "learning_rate": 9.05845947609703e-06, + "loss": 0.9991, + "step": 21140 + }, + { + "epoch": 0.8364808479503253, + "grad_norm": 0.944619408084996, + "learning_rate": 9.057115068580507e-06, + "loss": 1.0098, + "step": 21150 + }, + { + "epoch": 0.8368763471692143, + "grad_norm": 1.0457033829753168, + "learning_rate": 9.055769801836519e-06, + "loss": 0.9846, + "step": 21160 + }, + { + "epoch": 0.8372718463881034, + "grad_norm": 1.0629479327654794, + "learning_rate": 9.05442367614997e-06, + "loss": 1.0071, + "step": 21170 + }, + { + "epoch": 0.8376673456069924, + "grad_norm": 1.1416288433649697, + "learning_rate": 9.053076691805951e-06, + "loss": 1.0018, + "step": 21180 + }, + { + "epoch": 0.8380628448258814, + "grad_norm": 1.0397006095640355, + "learning_rate": 9.051728849089725e-06, + "loss": 1.0207, + "step": 21190 + }, + { + "epoch": 0.8384583440447705, + "grad_norm": 1.060035159193279, + "learning_rate": 9.050380148286742e-06, + "loss": 1.0111, + "step": 21200 + }, + { + "epoch": 0.8388538432636595, + "grad_norm": 0.9273487618593962, + "learning_rate": 9.049030589682638e-06, + "loss": 1.0094, + "step": 21210 + }, + { + "epoch": 0.8392493424825486, + "grad_norm": 1.0775456316393612, + "learning_rate": 9.047680173563222e-06, + "loss": 1.0008, + "step": 21220 + }, + { + "epoch": 0.8396448417014376, + "grad_norm": 0.9917818778239733, + "learning_rate": 9.046328900214492e-06, + "loss": 0.9977, + "step": 21230 + }, + { + "epoch": 0.8400403409203266, + "grad_norm": 1.0280943941438079, + "learning_rate": 9.044976769922624e-06, + "loss": 0.9925, + "step": 21240 + }, + { + "epoch": 0.8404358401392157, + "grad_norm": 1.0472990897542513, + "learning_rate": 9.043623782973975e-06, + "loss": 1.0208, + "step": 21250 + }, + { + "epoch": 0.8408313393581047, + "grad_norm": 1.1377024546107963, + "learning_rate": 9.042269939655084e-06, + "loss": 0.9907, + "step": 21260 + }, + { + "epoch": 0.8412268385769938, + "grad_norm": 1.060852444732565, + "learning_rate": 9.040915240252675e-06, + "loss": 1.001, + "step": 21270 + }, + { + "epoch": 0.8416223377958828, + "grad_norm": 1.1050320219907457, + "learning_rate": 9.039559685053644e-06, + "loss": 1.0054, + "step": 21280 + }, + { + "epoch": 0.8420178370147718, + "grad_norm": 1.0659929404608206, + "learning_rate": 9.03820327434508e-06, + "loss": 0.9991, + "step": 21290 + }, + { + "epoch": 0.842413336233661, + "grad_norm": 1.1021131685880639, + "learning_rate": 9.036846008414248e-06, + "loss": 1.0077, + "step": 21300 + }, + { + "epoch": 0.84280883545255, + "grad_norm": 1.1174778826419904, + "learning_rate": 9.03548788754859e-06, + "loss": 0.986, + "step": 21310 + }, + { + "epoch": 0.8432043346714391, + "grad_norm": 0.9888560736588828, + "learning_rate": 9.034128912035732e-06, + "loss": 1.0116, + "step": 21320 + }, + { + "epoch": 0.8435998338903281, + "grad_norm": 1.051027417292185, + "learning_rate": 9.032769082163486e-06, + "loss": 0.9965, + "step": 21330 + }, + { + "epoch": 0.8439953331092171, + "grad_norm": 0.9397173569391313, + "learning_rate": 9.031408398219838e-06, + "loss": 1.0008, + "step": 21340 + }, + { + "epoch": 0.8443908323281062, + "grad_norm": 1.0710147167031807, + "learning_rate": 9.030046860492959e-06, + "loss": 0.995, + "step": 21350 + }, + { + "epoch": 0.8447863315469952, + "grad_norm": 0.9900453053139646, + "learning_rate": 9.028684469271198e-06, + "loss": 0.9836, + "step": 21360 + }, + { + "epoch": 0.8451818307658843, + "grad_norm": 1.0061774957291256, + "learning_rate": 9.027321224843086e-06, + "loss": 1.0056, + "step": 21370 + }, + { + "epoch": 0.8455773299847733, + "grad_norm": 0.942951836667559, + "learning_rate": 9.025957127497336e-06, + "loss": 1.0091, + "step": 21380 + }, + { + "epoch": 0.8459728292036623, + "grad_norm": 1.1064081530376138, + "learning_rate": 9.024592177522839e-06, + "loss": 1.0043, + "step": 21390 + }, + { + "epoch": 0.8463683284225514, + "grad_norm": 1.0057545239273362, + "learning_rate": 9.02322637520867e-06, + "loss": 0.9977, + "step": 21400 + }, + { + "epoch": 0.8467638276414404, + "grad_norm": 1.0226611037397328, + "learning_rate": 9.02185972084408e-06, + "loss": 0.998, + "step": 21410 + }, + { + "epoch": 0.8471593268603295, + "grad_norm": 1.0030626165590641, + "learning_rate": 9.020492214718506e-06, + "loss": 1.0093, + "step": 21420 + }, + { + "epoch": 0.8475548260792185, + "grad_norm": 1.0146955727227316, + "learning_rate": 9.01912385712156e-06, + "loss": 0.9858, + "step": 21430 + }, + { + "epoch": 0.8479503252981075, + "grad_norm": 0.9844640421958475, + "learning_rate": 9.017754648343037e-06, + "loss": 0.9998, + "step": 21440 + }, + { + "epoch": 0.8483458245169966, + "grad_norm": 1.0082705120528728, + "learning_rate": 9.016384588672913e-06, + "loss": 0.9915, + "step": 21450 + }, + { + "epoch": 0.8487413237358856, + "grad_norm": 1.0431960598191798, + "learning_rate": 9.015013678401342e-06, + "loss": 1.0083, + "step": 21460 + }, + { + "epoch": 0.8491368229547747, + "grad_norm": 1.0864357377053249, + "learning_rate": 9.01364191781866e-06, + "loss": 1.0011, + "step": 21470 + }, + { + "epoch": 0.8495323221736637, + "grad_norm": 1.1351534671556414, + "learning_rate": 9.012269307215383e-06, + "loss": 1.0043, + "step": 21480 + }, + { + "epoch": 0.8499278213925527, + "grad_norm": 1.1022353161351826, + "learning_rate": 9.010895846882206e-06, + "loss": 1.0193, + "step": 21490 + }, + { + "epoch": 0.8503233206114418, + "grad_norm": 0.9740184358260867, + "learning_rate": 9.009521537110002e-06, + "loss": 0.9981, + "step": 21500 + }, + { + "epoch": 0.8507188198303308, + "grad_norm": 1.131563506295198, + "learning_rate": 9.00814637818983e-06, + "loss": 1.005, + "step": 21510 + }, + { + "epoch": 0.8511143190492199, + "grad_norm": 1.1540534837572083, + "learning_rate": 9.006770370412923e-06, + "loss": 1.0032, + "step": 21520 + }, + { + "epoch": 0.8515098182681089, + "grad_norm": 1.0073112416678964, + "learning_rate": 9.005393514070697e-06, + "loss": 1.0033, + "step": 21530 + }, + { + "epoch": 0.8519053174869979, + "grad_norm": 1.0622763490606484, + "learning_rate": 9.004015809454745e-06, + "loss": 0.9957, + "step": 21540 + }, + { + "epoch": 0.852300816705887, + "grad_norm": 1.1715352590525332, + "learning_rate": 9.002637256856843e-06, + "loss": 0.9959, + "step": 21550 + }, + { + "epoch": 0.852696315924776, + "grad_norm": 1.05505835647216, + "learning_rate": 9.001257856568943e-06, + "loss": 0.9907, + "step": 21560 + }, + { + "epoch": 0.853091815143665, + "grad_norm": 0.9607283176448762, + "learning_rate": 8.99987760888318e-06, + "loss": 1.004, + "step": 21570 + }, + { + "epoch": 0.8534873143625541, + "grad_norm": 1.0528036250343198, + "learning_rate": 8.998496514091866e-06, + "loss": 1.0027, + "step": 21580 + }, + { + "epoch": 0.8538828135814431, + "grad_norm": 1.0896795131708021, + "learning_rate": 8.997114572487493e-06, + "loss": 0.9782, + "step": 21590 + }, + { + "epoch": 0.8542783128003322, + "grad_norm": 1.1829755608166352, + "learning_rate": 8.995731784362732e-06, + "loss": 0.9768, + "step": 21600 + }, + { + "epoch": 0.8546738120192212, + "grad_norm": 1.0271548528462164, + "learning_rate": 8.994348150010437e-06, + "loss": 0.9919, + "step": 21610 + }, + { + "epoch": 0.8550693112381103, + "grad_norm": 1.0570025898072521, + "learning_rate": 8.992963669723634e-06, + "loss": 0.9893, + "step": 21620 + }, + { + "epoch": 0.8554648104569994, + "grad_norm": 0.9828930706020425, + "learning_rate": 8.991578343795534e-06, + "loss": 1.0223, + "step": 21630 + }, + { + "epoch": 0.8558603096758884, + "grad_norm": 1.0433199204674395, + "learning_rate": 8.990192172519526e-06, + "loss": 1.0084, + "step": 21640 + }, + { + "epoch": 0.8562558088947775, + "grad_norm": 1.0427539307500984, + "learning_rate": 8.988805156189175e-06, + "loss": 1.0067, + "step": 21650 + }, + { + "epoch": 0.8566513081136665, + "grad_norm": 1.0433525035536786, + "learning_rate": 8.987417295098232e-06, + "loss": 0.9811, + "step": 21660 + }, + { + "epoch": 0.8570468073325556, + "grad_norm": 1.0916431735324656, + "learning_rate": 8.986028589540617e-06, + "loss": 0.9871, + "step": 21670 + }, + { + "epoch": 0.8574423065514446, + "grad_norm": 1.0516037705700454, + "learning_rate": 8.984639039810434e-06, + "loss": 0.9926, + "step": 21680 + }, + { + "epoch": 0.8578378057703336, + "grad_norm": 1.2286205294296046, + "learning_rate": 8.983248646201971e-06, + "loss": 0.9908, + "step": 21690 + }, + { + "epoch": 0.8582333049892227, + "grad_norm": 0.9482057633397742, + "learning_rate": 8.981857409009686e-06, + "loss": 1.0063, + "step": 21700 + }, + { + "epoch": 0.8586288042081117, + "grad_norm": 1.0408234686934443, + "learning_rate": 8.98046532852822e-06, + "loss": 1.0007, + "step": 21710 + }, + { + "epoch": 0.8590243034270008, + "grad_norm": 1.1059918742404784, + "learning_rate": 8.97907240505239e-06, + "loss": 0.9865, + "step": 21720 + }, + { + "epoch": 0.8594198026458898, + "grad_norm": 0.9863986551725117, + "learning_rate": 8.977678638877196e-06, + "loss": 0.9989, + "step": 21730 + }, + { + "epoch": 0.8598153018647788, + "grad_norm": 1.0710026362571239, + "learning_rate": 8.976284030297813e-06, + "loss": 1.0047, + "step": 21740 + }, + { + "epoch": 0.8602108010836679, + "grad_norm": 0.9110605982412614, + "learning_rate": 8.974888579609594e-06, + "loss": 0.9844, + "step": 21750 + }, + { + "epoch": 0.8606063003025569, + "grad_norm": 1.1571559798879532, + "learning_rate": 8.973492287108071e-06, + "loss": 0.9954, + "step": 21760 + }, + { + "epoch": 0.861001799521446, + "grad_norm": 0.9501566723081118, + "learning_rate": 8.97209515308896e-06, + "loss": 1.001, + "step": 21770 + }, + { + "epoch": 0.861397298740335, + "grad_norm": 1.0770423546268713, + "learning_rate": 8.970697177848144e-06, + "loss": 1.0076, + "step": 21780 + }, + { + "epoch": 0.861792797959224, + "grad_norm": 1.0911217915233409, + "learning_rate": 8.969298361681693e-06, + "loss": 1.0011, + "step": 21790 + }, + { + "epoch": 0.8621882971781131, + "grad_norm": 1.124812243623449, + "learning_rate": 8.96789870488585e-06, + "loss": 0.9953, + "step": 21800 + }, + { + "epoch": 0.8625837963970021, + "grad_norm": 0.9903946628456698, + "learning_rate": 8.966498207757042e-06, + "loss": 0.9938, + "step": 21810 + }, + { + "epoch": 0.8629792956158912, + "grad_norm": 1.0302636106438623, + "learning_rate": 8.965096870591867e-06, + "loss": 0.9764, + "step": 21820 + }, + { + "epoch": 0.8633747948347802, + "grad_norm": 0.9598588924155196, + "learning_rate": 8.963694693687108e-06, + "loss": 0.9936, + "step": 21830 + }, + { + "epoch": 0.8637702940536692, + "grad_norm": 1.2027681313282046, + "learning_rate": 8.962291677339718e-06, + "loss": 0.9953, + "step": 21840 + }, + { + "epoch": 0.8641657932725583, + "grad_norm": 1.0398833695261345, + "learning_rate": 8.960887821846833e-06, + "loss": 1.0061, + "step": 21850 + }, + { + "epoch": 0.8645612924914473, + "grad_norm": 1.1869989079615797, + "learning_rate": 8.959483127505767e-06, + "loss": 0.9879, + "step": 21860 + }, + { + "epoch": 0.8649567917103363, + "grad_norm": 1.1522357731525883, + "learning_rate": 8.958077594614009e-06, + "loss": 0.9848, + "step": 21870 + }, + { + "epoch": 0.8653522909292254, + "grad_norm": 1.0823567076143104, + "learning_rate": 8.956671223469227e-06, + "loss": 0.996, + "step": 21880 + }, + { + "epoch": 0.8657477901481144, + "grad_norm": 1.0012564144573024, + "learning_rate": 8.955264014369265e-06, + "loss": 0.9962, + "step": 21890 + }, + { + "epoch": 0.8661432893670035, + "grad_norm": 1.0154924701018986, + "learning_rate": 8.953855967612149e-06, + "loss": 0.999, + "step": 21900 + }, + { + "epoch": 0.8665387885858925, + "grad_norm": 1.008104276895344, + "learning_rate": 8.952447083496076e-06, + "loss": 0.9836, + "step": 21910 + }, + { + "epoch": 0.8669342878047815, + "grad_norm": 1.063103455610592, + "learning_rate": 8.951037362319426e-06, + "loss": 1.002, + "step": 21920 + }, + { + "epoch": 0.8673297870236706, + "grad_norm": 1.129804248194133, + "learning_rate": 8.949626804380752e-06, + "loss": 0.9851, + "step": 21930 + }, + { + "epoch": 0.8677252862425596, + "grad_norm": 0.9930342968555023, + "learning_rate": 8.948215409978786e-06, + "loss": 0.9921, + "step": 21940 + }, + { + "epoch": 0.8681207854614487, + "grad_norm": 1.1075912092647413, + "learning_rate": 8.946803179412438e-06, + "loss": 0.9981, + "step": 21950 + }, + { + "epoch": 0.8685162846803377, + "grad_norm": 1.1044526463396946, + "learning_rate": 8.945390112980793e-06, + "loss": 0.9946, + "step": 21960 + }, + { + "epoch": 0.8689117838992269, + "grad_norm": 1.0844903696763522, + "learning_rate": 8.943976210983116e-06, + "loss": 0.9831, + "step": 21970 + }, + { + "epoch": 0.8693072831181159, + "grad_norm": 1.0284321244525942, + "learning_rate": 8.942561473718849e-06, + "loss": 0.9993, + "step": 21980 + }, + { + "epoch": 0.8697027823370049, + "grad_norm": 1.0408457188164864, + "learning_rate": 8.941145901487604e-06, + "loss": 0.9863, + "step": 21990 + }, + { + "epoch": 0.870098281555894, + "grad_norm": 1.1083249765702519, + "learning_rate": 8.939729494589178e-06, + "loss": 0.9806, + "step": 22000 + }, + { + "epoch": 0.870493780774783, + "grad_norm": 0.9955913062759518, + "learning_rate": 8.93831225332354e-06, + "loss": 1.0049, + "step": 22010 + }, + { + "epoch": 0.870889279993672, + "grad_norm": 1.159117440647705, + "learning_rate": 8.93689417799084e-06, + "loss": 0.9933, + "step": 22020 + }, + { + "epoch": 0.8712847792125611, + "grad_norm": 1.121029602431043, + "learning_rate": 8.9354752688914e-06, + "loss": 1.0084, + "step": 22030 + }, + { + "epoch": 0.8716802784314501, + "grad_norm": 1.0368559365343235, + "learning_rate": 8.934055526325723e-06, + "loss": 0.9996, + "step": 22040 + }, + { + "epoch": 0.8720757776503392, + "grad_norm": 0.9378608607239615, + "learning_rate": 8.932634950594483e-06, + "loss": 0.9906, + "step": 22050 + }, + { + "epoch": 0.8724712768692282, + "grad_norm": 0.9926488457193317, + "learning_rate": 8.931213541998535e-06, + "loss": 0.987, + "step": 22060 + }, + { + "epoch": 0.8728667760881172, + "grad_norm": 0.9791296220617621, + "learning_rate": 8.92979130083891e-06, + "loss": 0.992, + "step": 22070 + }, + { + "epoch": 0.8732622753070063, + "grad_norm": 1.1667133074922789, + "learning_rate": 8.928368227416813e-06, + "loss": 0.9867, + "step": 22080 + }, + { + "epoch": 0.8736577745258953, + "grad_norm": 1.045978411357206, + "learning_rate": 8.926944322033627e-06, + "loss": 1.0047, + "step": 22090 + }, + { + "epoch": 0.8740532737447844, + "grad_norm": 1.141466349392, + "learning_rate": 8.925519584990911e-06, + "loss": 0.9993, + "step": 22100 + }, + { + "epoch": 0.8744487729636734, + "grad_norm": 1.0436743151153005, + "learning_rate": 8.924094016590399e-06, + "loss": 0.9906, + "step": 22110 + }, + { + "epoch": 0.8748442721825624, + "grad_norm": 0.9231897957742409, + "learning_rate": 8.922667617134004e-06, + "loss": 0.9928, + "step": 22120 + }, + { + "epoch": 0.8752397714014515, + "grad_norm": 1.0864962003267222, + "learning_rate": 8.921240386923811e-06, + "loss": 0.9847, + "step": 22130 + }, + { + "epoch": 0.8756352706203405, + "grad_norm": 1.1081670266964114, + "learning_rate": 8.919812326262084e-06, + "loss": 0.9852, + "step": 22140 + }, + { + "epoch": 0.8760307698392296, + "grad_norm": 1.0863186277236239, + "learning_rate": 8.918383435451259e-06, + "loss": 1.0086, + "step": 22150 + }, + { + "epoch": 0.8764262690581186, + "grad_norm": 1.1158010931666167, + "learning_rate": 8.916953714793954e-06, + "loss": 0.9912, + "step": 22160 + }, + { + "epoch": 0.8768217682770076, + "grad_norm": 1.0135586503215224, + "learning_rate": 8.915523164592958e-06, + "loss": 0.9814, + "step": 22170 + }, + { + "epoch": 0.8772172674958967, + "grad_norm": 1.107413396930375, + "learning_rate": 8.914091785151238e-06, + "loss": 1.0031, + "step": 22180 + }, + { + "epoch": 0.8776127667147857, + "grad_norm": 1.0611835392923699, + "learning_rate": 8.912659576771935e-06, + "loss": 0.9865, + "step": 22190 + }, + { + "epoch": 0.8780082659336748, + "grad_norm": 0.943054047802915, + "learning_rate": 8.911226539758366e-06, + "loss": 0.9989, + "step": 22200 + }, + { + "epoch": 0.8784037651525638, + "grad_norm": 1.0351252352265388, + "learning_rate": 8.909792674414021e-06, + "loss": 0.9933, + "step": 22210 + }, + { + "epoch": 0.8787992643714528, + "grad_norm": 1.0626104462016321, + "learning_rate": 8.908357981042575e-06, + "loss": 1.003, + "step": 22220 + }, + { + "epoch": 0.8791947635903419, + "grad_norm": 1.0221346784061454, + "learning_rate": 8.906922459947865e-06, + "loss": 0.9885, + "step": 22230 + }, + { + "epoch": 0.8795902628092309, + "grad_norm": 1.0219969564048954, + "learning_rate": 8.905486111433913e-06, + "loss": 0.9972, + "step": 22240 + }, + { + "epoch": 0.87998576202812, + "grad_norm": 1.1497859667991182, + "learning_rate": 8.904048935804912e-06, + "loss": 0.9772, + "step": 22250 + }, + { + "epoch": 0.880381261247009, + "grad_norm": 1.0336302605879202, + "learning_rate": 8.90261093336523e-06, + "loss": 1.0016, + "step": 22260 + }, + { + "epoch": 0.880776760465898, + "grad_norm": 1.1374940246089795, + "learning_rate": 8.901172104419415e-06, + "loss": 0.9847, + "step": 22270 + }, + { + "epoch": 0.8811722596847871, + "grad_norm": 1.1175168071250212, + "learning_rate": 8.899732449272182e-06, + "loss": 1.0082, + "step": 22280 + }, + { + "epoch": 0.8815677589036761, + "grad_norm": 0.9234830732443703, + "learning_rate": 8.898291968228427e-06, + "loss": 1.0038, + "step": 22290 + }, + { + "epoch": 0.8819632581225653, + "grad_norm": 1.1343079009330828, + "learning_rate": 8.896850661593221e-06, + "loss": 0.9941, + "step": 22300 + }, + { + "epoch": 0.8823587573414543, + "grad_norm": 0.9966277801592895, + "learning_rate": 8.895408529671806e-06, + "loss": 0.9941, + "step": 22310 + }, + { + "epoch": 0.8827542565603433, + "grad_norm": 0.9822937140424421, + "learning_rate": 8.8939655727696e-06, + "loss": 0.9981, + "step": 22320 + }, + { + "epoch": 0.8831497557792324, + "grad_norm": 1.114945396356318, + "learning_rate": 8.8925217911922e-06, + "loss": 0.9664, + "step": 22330 + }, + { + "epoch": 0.8835452549981214, + "grad_norm": 1.0408984401113937, + "learning_rate": 8.89107718524537e-06, + "loss": 0.9946, + "step": 22340 + }, + { + "epoch": 0.8839407542170105, + "grad_norm": 1.0590325922700263, + "learning_rate": 8.889631755235056e-06, + "loss": 0.9922, + "step": 22350 + }, + { + "epoch": 0.8843362534358995, + "grad_norm": 1.0233741178165348, + "learning_rate": 8.888185501467372e-06, + "loss": 1.0061, + "step": 22360 + }, + { + "epoch": 0.8847317526547885, + "grad_norm": 1.139118241384216, + "learning_rate": 8.886738424248615e-06, + "loss": 1.006, + "step": 22370 + }, + { + "epoch": 0.8851272518736776, + "grad_norm": 1.0316951425657999, + "learning_rate": 8.885290523885247e-06, + "loss": 0.9996, + "step": 22380 + }, + { + "epoch": 0.8855227510925666, + "grad_norm": 1.0691554404761705, + "learning_rate": 8.88384180068391e-06, + "loss": 1.0152, + "step": 22390 + }, + { + "epoch": 0.8859182503114557, + "grad_norm": 1.1243988288814266, + "learning_rate": 8.882392254951418e-06, + "loss": 0.9823, + "step": 22400 + }, + { + "epoch": 0.8863137495303447, + "grad_norm": 1.1401770918338507, + "learning_rate": 8.880941886994758e-06, + "loss": 1.0003, + "step": 22410 + }, + { + "epoch": 0.8867092487492337, + "grad_norm": 1.059276264537003, + "learning_rate": 8.879490697121098e-06, + "loss": 1.0022, + "step": 22420 + }, + { + "epoch": 0.8871047479681228, + "grad_norm": 0.9936029389159007, + "learning_rate": 8.878038685637773e-06, + "loss": 0.9994, + "step": 22430 + }, + { + "epoch": 0.8875002471870118, + "grad_norm": 1.0137848924227966, + "learning_rate": 8.87658585285229e-06, + "loss": 0.9885, + "step": 22440 + }, + { + "epoch": 0.8878957464059009, + "grad_norm": 1.128129618563724, + "learning_rate": 8.875132199072341e-06, + "loss": 0.9987, + "step": 22450 + }, + { + "epoch": 0.8882912456247899, + "grad_norm": 0.9934591210218678, + "learning_rate": 8.873677724605781e-06, + "loss": 0.9862, + "step": 22460 + }, + { + "epoch": 0.8886867448436789, + "grad_norm": 1.0868858665689693, + "learning_rate": 8.872222429760644e-06, + "loss": 1.0092, + "step": 22470 + }, + { + "epoch": 0.889082244062568, + "grad_norm": 1.0104139361635571, + "learning_rate": 8.870766314845137e-06, + "loss": 0.987, + "step": 22480 + }, + { + "epoch": 0.889477743281457, + "grad_norm": 1.0123800474935396, + "learning_rate": 8.869309380167636e-06, + "loss": 1.0126, + "step": 22490 + }, + { + "epoch": 0.889873242500346, + "grad_norm": 1.0547210523317003, + "learning_rate": 8.867851626036701e-06, + "loss": 1.0037, + "step": 22500 + }, + { + "epoch": 0.8902687417192351, + "grad_norm": 0.9969373469238735, + "learning_rate": 8.866393052761055e-06, + "loss": 1.0102, + "step": 22510 + }, + { + "epoch": 0.8906642409381241, + "grad_norm": 1.047988136282342, + "learning_rate": 8.864933660649599e-06, + "loss": 0.9899, + "step": 22520 + }, + { + "epoch": 0.8910597401570132, + "grad_norm": 1.0123715780703257, + "learning_rate": 8.86347345001141e-06, + "loss": 0.9838, + "step": 22530 + }, + { + "epoch": 0.8914552393759022, + "grad_norm": 0.9985895829139116, + "learning_rate": 8.862012421155734e-06, + "loss": 0.9931, + "step": 22540 + }, + { + "epoch": 0.8918507385947912, + "grad_norm": 1.1836257401850125, + "learning_rate": 8.860550574391992e-06, + "loss": 0.9973, + "step": 22550 + }, + { + "epoch": 0.8922462378136803, + "grad_norm": 1.0252176862779547, + "learning_rate": 8.859087910029778e-06, + "loss": 0.995, + "step": 22560 + }, + { + "epoch": 0.8926417370325693, + "grad_norm": 1.0694766092330494, + "learning_rate": 8.85762442837886e-06, + "loss": 0.9908, + "step": 22570 + }, + { + "epoch": 0.8930372362514584, + "grad_norm": 1.1292543282932064, + "learning_rate": 8.856160129749177e-06, + "loss": 0.9853, + "step": 22580 + }, + { + "epoch": 0.8934327354703474, + "grad_norm": 1.0992567489255145, + "learning_rate": 8.854695014450843e-06, + "loss": 1.0135, + "step": 22590 + }, + { + "epoch": 0.8938282346892364, + "grad_norm": 1.0435087190150094, + "learning_rate": 8.853229082794143e-06, + "loss": 0.9895, + "step": 22600 + }, + { + "epoch": 0.8942237339081255, + "grad_norm": 0.9770812795964554, + "learning_rate": 8.851762335089539e-06, + "loss": 0.9798, + "step": 22610 + }, + { + "epoch": 0.8946192331270145, + "grad_norm": 1.1355912350823192, + "learning_rate": 8.850294771647661e-06, + "loss": 0.9941, + "step": 22620 + }, + { + "epoch": 0.8950147323459036, + "grad_norm": 0.9432287273154124, + "learning_rate": 8.848826392779315e-06, + "loss": 1.0076, + "step": 22630 + }, + { + "epoch": 0.8954102315647927, + "grad_norm": 1.0129628054599422, + "learning_rate": 8.847357198795477e-06, + "loss": 0.9795, + "step": 22640 + }, + { + "epoch": 0.8958057307836818, + "grad_norm": 1.1190507422850675, + "learning_rate": 8.845887190007299e-06, + "loss": 0.983, + "step": 22650 + }, + { + "epoch": 0.8962012300025708, + "grad_norm": 1.1013836601341722, + "learning_rate": 8.844416366726101e-06, + "loss": 1.0006, + "step": 22660 + }, + { + "epoch": 0.8965967292214598, + "grad_norm": 1.044186345233668, + "learning_rate": 8.84294472926338e-06, + "loss": 0.9907, + "step": 22670 + }, + { + "epoch": 0.8969922284403489, + "grad_norm": 1.0337971756226394, + "learning_rate": 8.841472277930805e-06, + "loss": 1.0174, + "step": 22680 + }, + { + "epoch": 0.8973877276592379, + "grad_norm": 1.1014760977075364, + "learning_rate": 8.839999013040212e-06, + "loss": 0.9955, + "step": 22690 + }, + { + "epoch": 0.897783226878127, + "grad_norm": 0.9774835640271742, + "learning_rate": 8.83852493490362e-06, + "loss": 0.9923, + "step": 22700 + }, + { + "epoch": 0.898178726097016, + "grad_norm": 0.95917912787041, + "learning_rate": 8.837050043833205e-06, + "loss": 0.989, + "step": 22710 + }, + { + "epoch": 0.898574225315905, + "grad_norm": 1.183673644318555, + "learning_rate": 8.835574340141328e-06, + "loss": 0.9855, + "step": 22720 + }, + { + "epoch": 0.8989697245347941, + "grad_norm": 0.9986675579473566, + "learning_rate": 8.834097824140519e-06, + "loss": 0.9927, + "step": 22730 + }, + { + "epoch": 0.8993652237536831, + "grad_norm": 1.0966403139465501, + "learning_rate": 8.832620496143476e-06, + "loss": 0.9846, + "step": 22740 + }, + { + "epoch": 0.8997607229725721, + "grad_norm": 1.1809364950312344, + "learning_rate": 8.831142356463075e-06, + "loss": 0.9883, + "step": 22750 + }, + { + "epoch": 0.9001562221914612, + "grad_norm": 1.0346068266663593, + "learning_rate": 8.829663405412356e-06, + "loss": 0.9945, + "step": 22760 + }, + { + "epoch": 0.9005517214103502, + "grad_norm": 1.167951301427672, + "learning_rate": 8.82818364330454e-06, + "loss": 0.9919, + "step": 22770 + }, + { + "epoch": 0.9009472206292393, + "grad_norm": 0.9233913719712421, + "learning_rate": 8.826703070453014e-06, + "loss": 0.9845, + "step": 22780 + }, + { + "epoch": 0.9013427198481283, + "grad_norm": 0.8991820237275926, + "learning_rate": 8.825221687171337e-06, + "loss": 1.0093, + "step": 22790 + }, + { + "epoch": 0.9017382190670173, + "grad_norm": 0.988284124934829, + "learning_rate": 8.823739493773242e-06, + "loss": 0.9918, + "step": 22800 + }, + { + "epoch": 0.9021337182859064, + "grad_norm": 1.2299112690149163, + "learning_rate": 8.82225649057263e-06, + "loss": 0.9755, + "step": 22810 + }, + { + "epoch": 0.9025292175047954, + "grad_norm": 1.170523119650909, + "learning_rate": 8.820772677883577e-06, + "loss": 0.9733, + "step": 22820 + }, + { + "epoch": 0.9029247167236845, + "grad_norm": 1.017746867765162, + "learning_rate": 8.819288056020329e-06, + "loss": 0.9919, + "step": 22830 + }, + { + "epoch": 0.9033202159425735, + "grad_norm": 1.0985751288824752, + "learning_rate": 8.817802625297304e-06, + "loss": 0.9822, + "step": 22840 + }, + { + "epoch": 0.9037157151614625, + "grad_norm": 1.0835501179096811, + "learning_rate": 8.816316386029089e-06, + "loss": 0.9795, + "step": 22850 + }, + { + "epoch": 0.9041112143803516, + "grad_norm": 0.9965732383709867, + "learning_rate": 8.814829338530446e-06, + "loss": 0.989, + "step": 22860 + }, + { + "epoch": 0.9045067135992406, + "grad_norm": 1.033560445468714, + "learning_rate": 8.813341483116307e-06, + "loss": 0.9715, + "step": 22870 + }, + { + "epoch": 0.9049022128181297, + "grad_norm": 0.94882157337787, + "learning_rate": 8.811852820101772e-06, + "loss": 0.9807, + "step": 22880 + }, + { + "epoch": 0.9052977120370187, + "grad_norm": 1.0828872992033372, + "learning_rate": 8.810363349802113e-06, + "loss": 0.9935, + "step": 22890 + }, + { + "epoch": 0.9056932112559077, + "grad_norm": 1.0020699385804437, + "learning_rate": 8.808873072532779e-06, + "loss": 0.9782, + "step": 22900 + }, + { + "epoch": 0.9060887104747968, + "grad_norm": 1.0293321370815471, + "learning_rate": 8.807381988609381e-06, + "loss": 0.9934, + "step": 22910 + }, + { + "epoch": 0.9064842096936858, + "grad_norm": 1.1112568202659678, + "learning_rate": 8.805890098347707e-06, + "loss": 0.9951, + "step": 22920 + }, + { + "epoch": 0.9068797089125749, + "grad_norm": 1.078258136675064, + "learning_rate": 8.804397402063714e-06, + "loss": 0.9854, + "step": 22930 + }, + { + "epoch": 0.9072752081314639, + "grad_norm": 1.064273334379174, + "learning_rate": 8.802903900073529e-06, + "loss": 0.9783, + "step": 22940 + }, + { + "epoch": 0.9076707073503529, + "grad_norm": 1.0606010733965434, + "learning_rate": 8.801409592693449e-06, + "loss": 1.0037, + "step": 22950 + }, + { + "epoch": 0.908066206569242, + "grad_norm": 1.0552623577993019, + "learning_rate": 8.799914480239944e-06, + "loss": 0.9915, + "step": 22960 + }, + { + "epoch": 0.908461705788131, + "grad_norm": 1.017323747809653, + "learning_rate": 8.798418563029654e-06, + "loss": 0.9785, + "step": 22970 + }, + { + "epoch": 0.9088572050070202, + "grad_norm": 1.0343661498631227, + "learning_rate": 8.796921841379386e-06, + "loss": 0.9997, + "step": 22980 + }, + { + "epoch": 0.9092527042259092, + "grad_norm": 1.0222293592540472, + "learning_rate": 8.795424315606122e-06, + "loss": 0.9866, + "step": 22990 + }, + { + "epoch": 0.9096482034447982, + "grad_norm": 1.075938189630379, + "learning_rate": 8.793925986027013e-06, + "loss": 1.0025, + "step": 23000 + }, + { + "epoch": 0.9100437026636873, + "grad_norm": 1.0456175519023878, + "learning_rate": 8.792426852959378e-06, + "loss": 0.9854, + "step": 23010 + }, + { + "epoch": 0.9104392018825763, + "grad_norm": 1.1417103357850213, + "learning_rate": 8.790926916720708e-06, + "loss": 0.9776, + "step": 23020 + }, + { + "epoch": 0.9108347011014654, + "grad_norm": 1.1437784834497344, + "learning_rate": 8.789426177628662e-06, + "loss": 0.9971, + "step": 23030 + }, + { + "epoch": 0.9112302003203544, + "grad_norm": 1.1907093030236078, + "learning_rate": 8.787924636001075e-06, + "loss": 0.9643, + "step": 23040 + }, + { + "epoch": 0.9116256995392434, + "grad_norm": 1.0252715750433767, + "learning_rate": 8.786422292155946e-06, + "loss": 1.0015, + "step": 23050 + }, + { + "epoch": 0.9120211987581325, + "grad_norm": 1.1575059298012786, + "learning_rate": 8.784919146411444e-06, + "loss": 0.9957, + "step": 23060 + }, + { + "epoch": 0.9124166979770215, + "grad_norm": 1.0400122077290705, + "learning_rate": 8.78341519908591e-06, + "loss": 0.98, + "step": 23070 + }, + { + "epoch": 0.9128121971959106, + "grad_norm": 1.149572413005648, + "learning_rate": 8.781910450497856e-06, + "loss": 0.9879, + "step": 23080 + }, + { + "epoch": 0.9132076964147996, + "grad_norm": 1.036139748253578, + "learning_rate": 8.780404900965962e-06, + "loss": 0.9703, + "step": 23090 + }, + { + "epoch": 0.9136031956336886, + "grad_norm": 0.9964524838102091, + "learning_rate": 8.778898550809074e-06, + "loss": 0.9995, + "step": 23100 + }, + { + "epoch": 0.9139986948525777, + "grad_norm": 1.1437294182809956, + "learning_rate": 8.777391400346216e-06, + "loss": 0.9753, + "step": 23110 + }, + { + "epoch": 0.9143941940714667, + "grad_norm": 1.0205371380592758, + "learning_rate": 8.775883449896575e-06, + "loss": 0.975, + "step": 23120 + }, + { + "epoch": 0.9147896932903558, + "grad_norm": 1.0625168977073607, + "learning_rate": 8.774374699779506e-06, + "loss": 0.9739, + "step": 23130 + }, + { + "epoch": 0.9151851925092448, + "grad_norm": 1.0242426008012278, + "learning_rate": 8.772865150314541e-06, + "loss": 0.992, + "step": 23140 + }, + { + "epoch": 0.9155806917281338, + "grad_norm": 1.0816904413373665, + "learning_rate": 8.771354801821372e-06, + "loss": 0.9896, + "step": 23150 + }, + { + "epoch": 0.9159761909470229, + "grad_norm": 1.148176143444153, + "learning_rate": 8.76984365461987e-06, + "loss": 0.9872, + "step": 23160 + }, + { + "epoch": 0.9163716901659119, + "grad_norm": 1.012599377789182, + "learning_rate": 8.768331709030067e-06, + "loss": 0.9883, + "step": 23170 + }, + { + "epoch": 0.916767189384801, + "grad_norm": 1.1342241805843325, + "learning_rate": 8.766818965372167e-06, + "loss": 0.9872, + "step": 23180 + }, + { + "epoch": 0.91716268860369, + "grad_norm": 1.1393767369318468, + "learning_rate": 8.765305423966544e-06, + "loss": 0.9763, + "step": 23190 + }, + { + "epoch": 0.917558187822579, + "grad_norm": 1.1539912560718792, + "learning_rate": 8.763791085133741e-06, + "loss": 0.9845, + "step": 23200 + }, + { + "epoch": 0.9179536870414681, + "grad_norm": 1.0008074199913217, + "learning_rate": 8.762275949194468e-06, + "loss": 0.9932, + "step": 23210 + }, + { + "epoch": 0.9183491862603571, + "grad_norm": 1.1259556514957458, + "learning_rate": 8.760760016469605e-06, + "loss": 1.0121, + "step": 23220 + }, + { + "epoch": 0.9187446854792461, + "grad_norm": 1.1576577910927688, + "learning_rate": 8.759243287280201e-06, + "loss": 0.9682, + "step": 23230 + }, + { + "epoch": 0.9191401846981352, + "grad_norm": 1.1919388309296537, + "learning_rate": 8.75772576194747e-06, + "loss": 0.9736, + "step": 23240 + }, + { + "epoch": 0.9195356839170242, + "grad_norm": 1.0147416187741292, + "learning_rate": 8.756207440792805e-06, + "loss": 0.9872, + "step": 23250 + }, + { + "epoch": 0.9199311831359133, + "grad_norm": 1.2393222594099442, + "learning_rate": 8.754688324137754e-06, + "loss": 0.9885, + "step": 23260 + }, + { + "epoch": 0.9203266823548023, + "grad_norm": 1.2370378621450326, + "learning_rate": 8.753168412304041e-06, + "loss": 0.9827, + "step": 23270 + }, + { + "epoch": 0.9207221815736913, + "grad_norm": 1.020160393878756, + "learning_rate": 8.75164770561356e-06, + "loss": 0.9807, + "step": 23280 + }, + { + "epoch": 0.9211176807925804, + "grad_norm": 1.1519746009622465, + "learning_rate": 8.750126204388368e-06, + "loss": 0.9715, + "step": 23290 + }, + { + "epoch": 0.9215131800114694, + "grad_norm": 1.0942413536629974, + "learning_rate": 8.748603908950694e-06, + "loss": 1.004, + "step": 23300 + }, + { + "epoch": 0.9219086792303586, + "grad_norm": 0.9655570124129816, + "learning_rate": 8.747080819622932e-06, + "loss": 0.9854, + "step": 23310 + }, + { + "epoch": 0.9223041784492476, + "grad_norm": 1.0299210507179013, + "learning_rate": 8.74555693672765e-06, + "loss": 0.9746, + "step": 23320 + }, + { + "epoch": 0.9226996776681367, + "grad_norm": 0.9807506906193965, + "learning_rate": 8.744032260587581e-06, + "loss": 1.002, + "step": 23330 + }, + { + "epoch": 0.9230951768870257, + "grad_norm": 1.0181924212006077, + "learning_rate": 8.742506791525617e-06, + "loss": 0.989, + "step": 23340 + }, + { + "epoch": 0.9234906761059147, + "grad_norm": 1.1182118898424565, + "learning_rate": 8.740980529864837e-06, + "loss": 0.9671, + "step": 23350 + }, + { + "epoch": 0.9238861753248038, + "grad_norm": 0.9842730408912936, + "learning_rate": 8.73945347592847e-06, + "loss": 0.9998, + "step": 23360 + }, + { + "epoch": 0.9242816745436928, + "grad_norm": 1.0289373437877, + "learning_rate": 8.737925630039921e-06, + "loss": 0.9718, + "step": 23370 + }, + { + "epoch": 0.9246771737625818, + "grad_norm": 1.0897506857407533, + "learning_rate": 8.736396992522762e-06, + "loss": 0.9625, + "step": 23380 + }, + { + "epoch": 0.9250726729814709, + "grad_norm": 0.9733091800848298, + "learning_rate": 8.734867563700734e-06, + "loss": 0.9711, + "step": 23390 + }, + { + "epoch": 0.9254681722003599, + "grad_norm": 1.1750819878640861, + "learning_rate": 8.733337343897743e-06, + "loss": 0.9839, + "step": 23400 + }, + { + "epoch": 0.925863671419249, + "grad_norm": 0.9919572977820524, + "learning_rate": 8.731806333437862e-06, + "loss": 1.0007, + "step": 23410 + }, + { + "epoch": 0.926259170638138, + "grad_norm": 1.0891715728498546, + "learning_rate": 8.730274532645333e-06, + "loss": 0.986, + "step": 23420 + }, + { + "epoch": 0.926654669857027, + "grad_norm": 1.0431658941727613, + "learning_rate": 8.728741941844568e-06, + "loss": 0.9692, + "step": 23430 + }, + { + "epoch": 0.9270501690759161, + "grad_norm": 1.0720953935857238, + "learning_rate": 8.727208561360139e-06, + "loss": 1.0115, + "step": 23440 + }, + { + "epoch": 0.9274456682948051, + "grad_norm": 1.0509206784225917, + "learning_rate": 8.725674391516791e-06, + "loss": 0.9812, + "step": 23450 + }, + { + "epoch": 0.9278411675136942, + "grad_norm": 1.1325001148401703, + "learning_rate": 8.724139432639439e-06, + "loss": 0.9902, + "step": 23460 + }, + { + "epoch": 0.9282366667325832, + "grad_norm": 0.9422616361715284, + "learning_rate": 8.722603685053155e-06, + "loss": 0.9776, + "step": 23470 + }, + { + "epoch": 0.9286321659514722, + "grad_norm": 1.06866448296265, + "learning_rate": 8.72106714908319e-06, + "loss": 0.9776, + "step": 23480 + }, + { + "epoch": 0.9290276651703613, + "grad_norm": 0.9826613241611012, + "learning_rate": 8.719529825054949e-06, + "loss": 0.9803, + "step": 23490 + }, + { + "epoch": 0.9294231643892503, + "grad_norm": 1.0254324719318577, + "learning_rate": 8.717991713294016e-06, + "loss": 1.015, + "step": 23500 + }, + { + "epoch": 0.9298186636081394, + "grad_norm": 1.184924481312643, + "learning_rate": 8.716452814126138e-06, + "loss": 0.978, + "step": 23510 + }, + { + "epoch": 0.9302141628270284, + "grad_norm": 1.0873957863450652, + "learning_rate": 8.714913127877222e-06, + "loss": 0.9772, + "step": 23520 + }, + { + "epoch": 0.9306096620459174, + "grad_norm": 0.9526286358172754, + "learning_rate": 8.71337265487335e-06, + "loss": 0.9753, + "step": 23530 + }, + { + "epoch": 0.9310051612648065, + "grad_norm": 1.0317168449837404, + "learning_rate": 8.71183139544077e-06, + "loss": 0.9674, + "step": 23540 + }, + { + "epoch": 0.9314006604836955, + "grad_norm": 1.0917615948441148, + "learning_rate": 8.710289349905892e-06, + "loss": 0.9989, + "step": 23550 + }, + { + "epoch": 0.9317961597025846, + "grad_norm": 1.0454900947985744, + "learning_rate": 8.708746518595293e-06, + "loss": 0.9726, + "step": 23560 + }, + { + "epoch": 0.9321916589214736, + "grad_norm": 1.0730227821194152, + "learning_rate": 8.707202901835722e-06, + "loss": 0.9848, + "step": 23570 + }, + { + "epoch": 0.9325871581403626, + "grad_norm": 1.0243397941306367, + "learning_rate": 8.705658499954088e-06, + "loss": 0.9718, + "step": 23580 + }, + { + "epoch": 0.9329826573592517, + "grad_norm": 1.0710727387135093, + "learning_rate": 8.70411331327747e-06, + "loss": 0.9802, + "step": 23590 + }, + { + "epoch": 0.9333781565781407, + "grad_norm": 1.0059987512029682, + "learning_rate": 8.702567342133114e-06, + "loss": 0.9802, + "step": 23600 + }, + { + "epoch": 0.9337736557970298, + "grad_norm": 1.076635607057341, + "learning_rate": 8.701020586848426e-06, + "loss": 1.0014, + "step": 23610 + }, + { + "epoch": 0.9341691550159188, + "grad_norm": 1.1075009135277165, + "learning_rate": 8.699473047750984e-06, + "loss": 0.9748, + "step": 23620 + }, + { + "epoch": 0.9345646542348078, + "grad_norm": 1.0445746467086114, + "learning_rate": 8.69792472516853e-06, + "loss": 0.9829, + "step": 23630 + }, + { + "epoch": 0.9349601534536969, + "grad_norm": 1.2571801568986372, + "learning_rate": 8.696375619428976e-06, + "loss": 0.9951, + "step": 23640 + }, + { + "epoch": 0.935355652672586, + "grad_norm": 1.0088603359783315, + "learning_rate": 8.69482573086039e-06, + "loss": 0.9786, + "step": 23650 + }, + { + "epoch": 0.9357511518914751, + "grad_norm": 1.0623255392530664, + "learning_rate": 8.693275059791016e-06, + "loss": 0.9814, + "step": 23660 + }, + { + "epoch": 0.9361466511103641, + "grad_norm": 0.959154552157258, + "learning_rate": 8.691723606549256e-06, + "loss": 0.9791, + "step": 23670 + }, + { + "epoch": 0.9365421503292531, + "grad_norm": 1.0706723029268512, + "learning_rate": 8.690171371463684e-06, + "loss": 0.9888, + "step": 23680 + }, + { + "epoch": 0.9369376495481422, + "grad_norm": 1.0728027680501875, + "learning_rate": 8.688618354863038e-06, + "loss": 0.9848, + "step": 23690 + }, + { + "epoch": 0.9373331487670312, + "grad_norm": 1.0981777666780366, + "learning_rate": 8.687064557076217e-06, + "loss": 0.9624, + "step": 23700 + }, + { + "epoch": 0.9377286479859203, + "grad_norm": 1.1610255849448627, + "learning_rate": 8.685509978432292e-06, + "loss": 0.9919, + "step": 23710 + }, + { + "epoch": 0.9381241472048093, + "grad_norm": 1.1192051968153376, + "learning_rate": 8.683954619260493e-06, + "loss": 1.0051, + "step": 23720 + }, + { + "epoch": 0.9385196464236983, + "grad_norm": 1.0934737964644337, + "learning_rate": 8.682398479890219e-06, + "loss": 0.9745, + "step": 23730 + }, + { + "epoch": 0.9389151456425874, + "grad_norm": 1.0796311371704077, + "learning_rate": 8.680841560651037e-06, + "loss": 0.9927, + "step": 23740 + }, + { + "epoch": 0.9393106448614764, + "grad_norm": 1.030331097033002, + "learning_rate": 8.679283861872672e-06, + "loss": 0.9906, + "step": 23750 + }, + { + "epoch": 0.9397061440803655, + "grad_norm": 1.070847869700182, + "learning_rate": 8.67772538388502e-06, + "loss": 0.9945, + "step": 23760 + }, + { + "epoch": 0.9401016432992545, + "grad_norm": 1.1692486255761738, + "learning_rate": 8.676166127018137e-06, + "loss": 0.9874, + "step": 23770 + }, + { + "epoch": 0.9404971425181435, + "grad_norm": 1.172331981822452, + "learning_rate": 8.67460609160225e-06, + "loss": 0.9917, + "step": 23780 + }, + { + "epoch": 0.9408926417370326, + "grad_norm": 1.0936880809301424, + "learning_rate": 8.67304527796775e-06, + "loss": 0.9775, + "step": 23790 + }, + { + "epoch": 0.9412881409559216, + "grad_norm": 1.043218347758308, + "learning_rate": 8.671483686445184e-06, + "loss": 0.9932, + "step": 23800 + }, + { + "epoch": 0.9416836401748107, + "grad_norm": 1.0317258621196275, + "learning_rate": 8.669921317365274e-06, + "loss": 0.9591, + "step": 23810 + }, + { + "epoch": 0.9420791393936997, + "grad_norm": 1.1741181931341496, + "learning_rate": 8.668358171058903e-06, + "loss": 0.9728, + "step": 23820 + }, + { + "epoch": 0.9424746386125887, + "grad_norm": 1.1974144409006249, + "learning_rate": 8.66679424785712e-06, + "loss": 0.9783, + "step": 23830 + }, + { + "epoch": 0.9428701378314778, + "grad_norm": 1.0861471357077015, + "learning_rate": 8.665229548091134e-06, + "loss": 0.9693, + "step": 23840 + }, + { + "epoch": 0.9432656370503668, + "grad_norm": 0.9479709508096992, + "learning_rate": 8.663664072092324e-06, + "loss": 0.99, + "step": 23850 + }, + { + "epoch": 0.9436611362692559, + "grad_norm": 1.1996719805140812, + "learning_rate": 8.66209782019223e-06, + "loss": 0.9642, + "step": 23860 + }, + { + "epoch": 0.9440566354881449, + "grad_norm": 1.1358990074467126, + "learning_rate": 8.660530792722555e-06, + "loss": 0.9717, + "step": 23870 + }, + { + "epoch": 0.9444521347070339, + "grad_norm": 1.0318011536159686, + "learning_rate": 8.658962990015174e-06, + "loss": 0.9931, + "step": 23880 + }, + { + "epoch": 0.944847633925923, + "grad_norm": 0.9843461295627615, + "learning_rate": 8.657394412402115e-06, + "loss": 0.9715, + "step": 23890 + }, + { + "epoch": 0.945243133144812, + "grad_norm": 1.0198154769378152, + "learning_rate": 8.655825060215582e-06, + "loss": 0.9822, + "step": 23900 + }, + { + "epoch": 0.945638632363701, + "grad_norm": 1.1072586596436313, + "learning_rate": 8.65425493378793e-06, + "loss": 0.9825, + "step": 23910 + }, + { + "epoch": 0.9460341315825901, + "grad_norm": 1.0291072786851772, + "learning_rate": 8.652684033451693e-06, + "loss": 0.9724, + "step": 23920 + }, + { + "epoch": 0.9464296308014791, + "grad_norm": 1.0030405398765163, + "learning_rate": 8.651112359539554e-06, + "loss": 0.9692, + "step": 23930 + }, + { + "epoch": 0.9468251300203682, + "grad_norm": 0.9796792892649463, + "learning_rate": 8.649539912384367e-06, + "loss": 0.9793, + "step": 23940 + }, + { + "epoch": 0.9472206292392572, + "grad_norm": 1.0989814736494155, + "learning_rate": 8.647966692319152e-06, + "loss": 0.9675, + "step": 23950 + }, + { + "epoch": 0.9476161284581462, + "grad_norm": 1.043570875984775, + "learning_rate": 8.646392699677089e-06, + "loss": 0.9921, + "step": 23960 + }, + { + "epoch": 0.9480116276770353, + "grad_norm": 0.9667944501064308, + "learning_rate": 8.644817934791526e-06, + "loss": 0.9578, + "step": 23970 + }, + { + "epoch": 0.9484071268959244, + "grad_norm": 1.2399687314982235, + "learning_rate": 8.643242397995964e-06, + "loss": 0.9816, + "step": 23980 + }, + { + "epoch": 0.9488026261148135, + "grad_norm": 1.0944353925235228, + "learning_rate": 8.641666089624081e-06, + "loss": 0.9823, + "step": 23990 + }, + { + "epoch": 0.9491981253337025, + "grad_norm": 1.0536797595473253, + "learning_rate": 8.640089010009709e-06, + "loss": 0.9935, + "step": 24000 + }, + { + "epoch": 0.9495936245525916, + "grad_norm": 1.0801568145664902, + "learning_rate": 8.638511159486848e-06, + "loss": 0.9915, + "step": 24010 + }, + { + "epoch": 0.9499891237714806, + "grad_norm": 1.0780547137127836, + "learning_rate": 8.63693253838966e-06, + "loss": 0.9813, + "step": 24020 + }, + { + "epoch": 0.9503846229903696, + "grad_norm": 1.1720263913753735, + "learning_rate": 8.635353147052467e-06, + "loss": 0.9906, + "step": 24030 + }, + { + "epoch": 0.9507801222092587, + "grad_norm": 0.9704723456162168, + "learning_rate": 8.63377298580976e-06, + "loss": 0.9928, + "step": 24040 + }, + { + "epoch": 0.9511756214281477, + "grad_norm": 1.0667577133791284, + "learning_rate": 8.632192054996189e-06, + "loss": 0.9732, + "step": 24050 + }, + { + "epoch": 0.9515711206470368, + "grad_norm": 1.0962143236501385, + "learning_rate": 8.630610354946569e-06, + "loss": 0.9815, + "step": 24060 + }, + { + "epoch": 0.9519666198659258, + "grad_norm": 1.1299046191477977, + "learning_rate": 8.629027885995874e-06, + "loss": 0.9832, + "step": 24070 + }, + { + "epoch": 0.9523621190848148, + "grad_norm": 1.176366568083045, + "learning_rate": 8.627444648479248e-06, + "loss": 0.9848, + "step": 24080 + }, + { + "epoch": 0.9527576183037039, + "grad_norm": 1.063162096802908, + "learning_rate": 8.62586064273199e-06, + "loss": 0.9888, + "step": 24090 + }, + { + "epoch": 0.9531531175225929, + "grad_norm": 0.9984923004865964, + "learning_rate": 8.624275869089568e-06, + "loss": 0.994, + "step": 24100 + }, + { + "epoch": 0.953548616741482, + "grad_norm": 1.0155433138591725, + "learning_rate": 8.622690327887608e-06, + "loss": 0.9878, + "step": 24110 + }, + { + "epoch": 0.953944115960371, + "grad_norm": 1.083293640402409, + "learning_rate": 8.6211040194619e-06, + "loss": 0.9767, + "step": 24120 + }, + { + "epoch": 0.95433961517926, + "grad_norm": 1.0392009909154236, + "learning_rate": 8.6195169441484e-06, + "loss": 0.9729, + "step": 24130 + }, + { + "epoch": 0.9547351143981491, + "grad_norm": 1.2102955927932995, + "learning_rate": 8.617929102283222e-06, + "loss": 1.0018, + "step": 24140 + }, + { + "epoch": 0.9551306136170381, + "grad_norm": 0.9991388656786047, + "learning_rate": 8.616340494202642e-06, + "loss": 0.9835, + "step": 24150 + }, + { + "epoch": 0.9555261128359271, + "grad_norm": 1.0697285322514845, + "learning_rate": 8.614751120243102e-06, + "loss": 0.9757, + "step": 24160 + }, + { + "epoch": 0.9559216120548162, + "grad_norm": 0.9300497076805125, + "learning_rate": 8.613160980741202e-06, + "loss": 0.9572, + "step": 24170 + }, + { + "epoch": 0.9563171112737052, + "grad_norm": 1.153186051041131, + "learning_rate": 8.611570076033708e-06, + "loss": 0.9805, + "step": 24180 + }, + { + "epoch": 0.9567126104925943, + "grad_norm": 1.0006879664570016, + "learning_rate": 8.609978406457547e-06, + "loss": 0.9948, + "step": 24190 + }, + { + "epoch": 0.9571081097114833, + "grad_norm": 1.0861062914435131, + "learning_rate": 8.608385972349806e-06, + "loss": 0.9758, + "step": 24200 + }, + { + "epoch": 0.9575036089303723, + "grad_norm": 1.167037693779475, + "learning_rate": 8.606792774047735e-06, + "loss": 0.9654, + "step": 24210 + }, + { + "epoch": 0.9578991081492614, + "grad_norm": 1.149283586498159, + "learning_rate": 8.605198811888747e-06, + "loss": 0.9926, + "step": 24220 + }, + { + "epoch": 0.9582946073681504, + "grad_norm": 0.9599030301682894, + "learning_rate": 8.603604086210415e-06, + "loss": 0.9914, + "step": 24230 + }, + { + "epoch": 0.9586901065870395, + "grad_norm": 1.0951540139118099, + "learning_rate": 8.602008597350477e-06, + "loss": 0.9833, + "step": 24240 + }, + { + "epoch": 0.9590856058059285, + "grad_norm": 1.1108657195436034, + "learning_rate": 8.600412345646827e-06, + "loss": 0.9634, + "step": 24250 + }, + { + "epoch": 0.9594811050248175, + "grad_norm": 1.0971766303144515, + "learning_rate": 8.598815331437525e-06, + "loss": 0.9856, + "step": 24260 + }, + { + "epoch": 0.9598766042437066, + "grad_norm": 1.124539805228262, + "learning_rate": 8.597217555060791e-06, + "loss": 0.9611, + "step": 24270 + }, + { + "epoch": 0.9602721034625956, + "grad_norm": 1.0741751479171762, + "learning_rate": 8.595619016855008e-06, + "loss": 0.986, + "step": 24280 + }, + { + "epoch": 0.9606676026814847, + "grad_norm": 1.001879046646722, + "learning_rate": 8.594019717158718e-06, + "loss": 0.9743, + "step": 24290 + }, + { + "epoch": 0.9610631019003737, + "grad_norm": 1.1423274618036203, + "learning_rate": 8.592419656310626e-06, + "loss": 0.9738, + "step": 24300 + }, + { + "epoch": 0.9614586011192627, + "grad_norm": 1.0571012396736137, + "learning_rate": 8.590818834649595e-06, + "loss": 0.9812, + "step": 24310 + }, + { + "epoch": 0.9618541003381519, + "grad_norm": 1.183819711378045, + "learning_rate": 8.589217252514654e-06, + "loss": 0.9813, + "step": 24320 + }, + { + "epoch": 0.9622495995570409, + "grad_norm": 1.1489117380124345, + "learning_rate": 8.58761491024499e-06, + "loss": 0.9657, + "step": 24330 + }, + { + "epoch": 0.96264509877593, + "grad_norm": 1.0566611151640706, + "learning_rate": 8.586011808179953e-06, + "loss": 0.9893, + "step": 24340 + }, + { + "epoch": 0.963040597994819, + "grad_norm": 1.14223900394507, + "learning_rate": 8.58440794665905e-06, + "loss": 0.9692, + "step": 24350 + }, + { + "epoch": 0.963436097213708, + "grad_norm": 1.0985227241314044, + "learning_rate": 8.582803326021953e-06, + "loss": 0.9698, + "step": 24360 + }, + { + "epoch": 0.9638315964325971, + "grad_norm": 1.0039282774950415, + "learning_rate": 8.581197946608492e-06, + "loss": 0.9597, + "step": 24370 + }, + { + "epoch": 0.9642270956514861, + "grad_norm": 1.1665420747024093, + "learning_rate": 8.579591808758661e-06, + "loss": 0.9658, + "step": 24380 + }, + { + "epoch": 0.9646225948703752, + "grad_norm": 1.119336610269564, + "learning_rate": 8.57798491281261e-06, + "loss": 0.9712, + "step": 24390 + }, + { + "epoch": 0.9650180940892642, + "grad_norm": 1.0942144775036178, + "learning_rate": 8.576377259110655e-06, + "loss": 0.9634, + "step": 24400 + }, + { + "epoch": 0.9654135933081532, + "grad_norm": 1.2251357057638173, + "learning_rate": 8.574768847993266e-06, + "loss": 0.9811, + "step": 24410 + }, + { + "epoch": 0.9658090925270423, + "grad_norm": 1.0900044430802518, + "learning_rate": 8.57315967980108e-06, + "loss": 0.9531, + "step": 24420 + }, + { + "epoch": 0.9662045917459313, + "grad_norm": 1.0441010310095726, + "learning_rate": 8.571549754874888e-06, + "loss": 0.9768, + "step": 24430 + }, + { + "epoch": 0.9666000909648204, + "grad_norm": 1.1353781094471103, + "learning_rate": 8.569939073555649e-06, + "loss": 0.9868, + "step": 24440 + }, + { + "epoch": 0.9669955901837094, + "grad_norm": 1.1748225953794131, + "learning_rate": 8.568327636184472e-06, + "loss": 0.961, + "step": 24450 + }, + { + "epoch": 0.9673910894025984, + "grad_norm": 1.0340001923572182, + "learning_rate": 8.566715443102638e-06, + "loss": 0.9923, + "step": 24460 + }, + { + "epoch": 0.9677865886214875, + "grad_norm": 1.15050439192543, + "learning_rate": 8.565102494651575e-06, + "loss": 0.9606, + "step": 24470 + }, + { + "epoch": 0.9681820878403765, + "grad_norm": 1.0292956727015647, + "learning_rate": 8.563488791172885e-06, + "loss": 0.9568, + "step": 24480 + }, + { + "epoch": 0.9685775870592656, + "grad_norm": 1.0981091313247309, + "learning_rate": 8.561874333008317e-06, + "loss": 0.9501, + "step": 24490 + }, + { + "epoch": 0.9689730862781546, + "grad_norm": 1.0658403921725377, + "learning_rate": 8.56025912049979e-06, + "loss": 0.9939, + "step": 24500 + }, + { + "epoch": 0.9693685854970436, + "grad_norm": 1.0464554939391117, + "learning_rate": 8.558643153989376e-06, + "loss": 0.9719, + "step": 24510 + }, + { + "epoch": 0.9697640847159327, + "grad_norm": 0.9584950091854825, + "learning_rate": 8.557026433819309e-06, + "loss": 0.9841, + "step": 24520 + }, + { + "epoch": 0.9701595839348217, + "grad_norm": 1.0516021723383688, + "learning_rate": 8.555408960331984e-06, + "loss": 0.9923, + "step": 24530 + }, + { + "epoch": 0.9705550831537108, + "grad_norm": 0.9971277263320242, + "learning_rate": 8.55379073386995e-06, + "loss": 0.9893, + "step": 24540 + }, + { + "epoch": 0.9709505823725998, + "grad_norm": 0.9255117927107546, + "learning_rate": 8.552171754775926e-06, + "loss": 0.9746, + "step": 24550 + }, + { + "epoch": 0.9713460815914888, + "grad_norm": 1.2434192573745426, + "learning_rate": 8.55055202339278e-06, + "loss": 0.973, + "step": 24560 + }, + { + "epoch": 0.9717415808103779, + "grad_norm": 1.0887798133883928, + "learning_rate": 8.548931540063544e-06, + "loss": 0.9661, + "step": 24570 + }, + { + "epoch": 0.9721370800292669, + "grad_norm": 1.0779137810621702, + "learning_rate": 8.547310305131408e-06, + "loss": 0.9641, + "step": 24580 + }, + { + "epoch": 0.972532579248156, + "grad_norm": 1.172646876455584, + "learning_rate": 8.545688318939722e-06, + "loss": 0.9662, + "step": 24590 + }, + { + "epoch": 0.972928078467045, + "grad_norm": 0.9646992412708512, + "learning_rate": 8.544065581831998e-06, + "loss": 0.9756, + "step": 24600 + }, + { + "epoch": 0.973323577685934, + "grad_norm": 1.003010231878538, + "learning_rate": 8.5424420941519e-06, + "loss": 0.9759, + "step": 24610 + }, + { + "epoch": 0.9737190769048231, + "grad_norm": 1.073681049884641, + "learning_rate": 8.540817856243256e-06, + "loss": 0.9859, + "step": 24620 + }, + { + "epoch": 0.9741145761237121, + "grad_norm": 1.0940896551155155, + "learning_rate": 8.539192868450051e-06, + "loss": 0.9715, + "step": 24630 + }, + { + "epoch": 0.9745100753426011, + "grad_norm": 1.1119498644474028, + "learning_rate": 8.537567131116432e-06, + "loss": 0.9851, + "step": 24640 + }, + { + "epoch": 0.9749055745614903, + "grad_norm": 1.2037715625381082, + "learning_rate": 8.5359406445867e-06, + "loss": 0.9765, + "step": 24650 + }, + { + "epoch": 0.9753010737803793, + "grad_norm": 1.1158633096753356, + "learning_rate": 8.53431340920532e-06, + "loss": 0.9851, + "step": 24660 + }, + { + "epoch": 0.9756965729992684, + "grad_norm": 0.9851811030875438, + "learning_rate": 8.53268542531691e-06, + "loss": 0.9832, + "step": 24670 + }, + { + "epoch": 0.9760920722181574, + "grad_norm": 1.0082758887117738, + "learning_rate": 8.531056693266247e-06, + "loss": 0.9912, + "step": 24680 + }, + { + "epoch": 0.9764875714370465, + "grad_norm": 1.0469525366784656, + "learning_rate": 8.529427213398273e-06, + "loss": 0.9874, + "step": 24690 + }, + { + "epoch": 0.9768830706559355, + "grad_norm": 1.0626592652464433, + "learning_rate": 8.527796986058082e-06, + "loss": 0.9743, + "step": 24700 + }, + { + "epoch": 0.9772785698748245, + "grad_norm": 0.9594083953456605, + "learning_rate": 8.526166011590926e-06, + "loss": 0.9688, + "step": 24710 + }, + { + "epoch": 0.9776740690937136, + "grad_norm": 1.0130924977436224, + "learning_rate": 8.524534290342224e-06, + "loss": 0.9663, + "step": 24720 + }, + { + "epoch": 0.9780695683126026, + "grad_norm": 1.256564726703701, + "learning_rate": 8.522901822657538e-06, + "loss": 0.9711, + "step": 24730 + }, + { + "epoch": 0.9784650675314917, + "grad_norm": 0.9845687923933368, + "learning_rate": 8.521268608882602e-06, + "loss": 0.9935, + "step": 24740 + }, + { + "epoch": 0.9788605667503807, + "grad_norm": 1.0044882677272104, + "learning_rate": 8.519634649363305e-06, + "loss": 0.9772, + "step": 24750 + }, + { + "epoch": 0.9792560659692697, + "grad_norm": 0.9990586224870897, + "learning_rate": 8.517999944445684e-06, + "loss": 0.9783, + "step": 24760 + }, + { + "epoch": 0.9796515651881588, + "grad_norm": 1.0824122734800394, + "learning_rate": 8.516364494475947e-06, + "loss": 0.9742, + "step": 24770 + }, + { + "epoch": 0.9800470644070478, + "grad_norm": 1.0743659396923315, + "learning_rate": 8.514728299800456e-06, + "loss": 0.9874, + "step": 24780 + }, + { + "epoch": 0.9804425636259368, + "grad_norm": 1.1588839163241136, + "learning_rate": 8.513091360765724e-06, + "loss": 0.9568, + "step": 24790 + }, + { + "epoch": 0.9808380628448259, + "grad_norm": 1.0525018969474194, + "learning_rate": 8.511453677718428e-06, + "loss": 0.9588, + "step": 24800 + }, + { + "epoch": 0.9812335620637149, + "grad_norm": 1.122657093279453, + "learning_rate": 8.509815251005402e-06, + "loss": 0.9544, + "step": 24810 + }, + { + "epoch": 0.981629061282604, + "grad_norm": 1.0003572985318947, + "learning_rate": 8.508176080973636e-06, + "loss": 0.9693, + "step": 24820 + }, + { + "epoch": 0.982024560501493, + "grad_norm": 1.0647895252488284, + "learning_rate": 8.506536167970282e-06, + "loss": 0.9867, + "step": 24830 + }, + { + "epoch": 0.982420059720382, + "grad_norm": 1.0339369710015192, + "learning_rate": 8.504895512342639e-06, + "loss": 0.9844, + "step": 24840 + }, + { + "epoch": 0.9828155589392711, + "grad_norm": 1.0970746775487668, + "learning_rate": 8.503254114438176e-06, + "loss": 0.9542, + "step": 24850 + }, + { + "epoch": 0.9832110581581601, + "grad_norm": 1.166215584521932, + "learning_rate": 8.501611974604507e-06, + "loss": 0.9581, + "step": 24860 + }, + { + "epoch": 0.9836065573770492, + "grad_norm": 0.9409443143951971, + "learning_rate": 8.499969093189413e-06, + "loss": 0.9698, + "step": 24870 + }, + { + "epoch": 0.9840020565959382, + "grad_norm": 0.9956033294425854, + "learning_rate": 8.498325470540829e-06, + "loss": 0.9549, + "step": 24880 + }, + { + "epoch": 0.9843975558148272, + "grad_norm": 1.1131349280853753, + "learning_rate": 8.49668110700684e-06, + "loss": 0.9797, + "step": 24890 + }, + { + "epoch": 0.9847930550337163, + "grad_norm": 0.9820467687229171, + "learning_rate": 8.495036002935704e-06, + "loss": 0.9807, + "step": 24900 + }, + { + "epoch": 0.9851885542526053, + "grad_norm": 1.0392446418209116, + "learning_rate": 8.493390158675815e-06, + "loss": 0.9869, + "step": 24910 + }, + { + "epoch": 0.9855840534714944, + "grad_norm": 1.0887242468295657, + "learning_rate": 8.491743574575743e-06, + "loss": 0.9764, + "step": 24920 + }, + { + "epoch": 0.9859795526903834, + "grad_norm": 0.9735300480010712, + "learning_rate": 8.490096250984203e-06, + "loss": 0.9906, + "step": 24930 + }, + { + "epoch": 0.9863750519092724, + "grad_norm": 1.1269026560005961, + "learning_rate": 8.488448188250068e-06, + "loss": 0.9725, + "step": 24940 + }, + { + "epoch": 0.9867705511281615, + "grad_norm": 1.2244448423161338, + "learning_rate": 8.486799386722372e-06, + "loss": 0.9834, + "step": 24950 + }, + { + "epoch": 0.9871660503470505, + "grad_norm": 1.1517443881734524, + "learning_rate": 8.485149846750304e-06, + "loss": 0.995, + "step": 24960 + }, + { + "epoch": 0.9875615495659396, + "grad_norm": 0.9742346488245709, + "learning_rate": 8.483499568683206e-06, + "loss": 0.9619, + "step": 24970 + }, + { + "epoch": 0.9879570487848286, + "grad_norm": 1.116800309731723, + "learning_rate": 8.48184855287058e-06, + "loss": 0.9678, + "step": 24980 + }, + { + "epoch": 0.9883525480037177, + "grad_norm": 1.2221451131700947, + "learning_rate": 8.480196799662082e-06, + "loss": 0.9789, + "step": 24990 + }, + { + "epoch": 0.9887480472226068, + "grad_norm": 1.1721500740274573, + "learning_rate": 8.478544309407524e-06, + "loss": 0.9934, + "step": 25000 + }, + { + "epoch": 0.9891435464414958, + "grad_norm": 1.0531231798318392, + "learning_rate": 8.476891082456877e-06, + "loss": 0.9677, + "step": 25010 + }, + { + "epoch": 0.9895390456603849, + "grad_norm": 1.0300403205195796, + "learning_rate": 8.475237119160267e-06, + "loss": 0.9681, + "step": 25020 + }, + { + "epoch": 0.9899345448792739, + "grad_norm": 1.083815401921792, + "learning_rate": 8.473582419867971e-06, + "loss": 0.9817, + "step": 25030 + }, + { + "epoch": 0.9903300440981629, + "grad_norm": 1.098402138660783, + "learning_rate": 8.47192698493043e-06, + "loss": 0.9798, + "step": 25040 + }, + { + "epoch": 0.990725543317052, + "grad_norm": 1.2358762454533718, + "learning_rate": 8.470270814698234e-06, + "loss": 0.9798, + "step": 25050 + }, + { + "epoch": 0.991121042535941, + "grad_norm": 1.0047991855222802, + "learning_rate": 8.468613909522135e-06, + "loss": 0.9715, + "step": 25060 + }, + { + "epoch": 0.9915165417548301, + "grad_norm": 1.1436589937445054, + "learning_rate": 8.466956269753033e-06, + "loss": 0.9625, + "step": 25070 + }, + { + "epoch": 0.9919120409737191, + "grad_norm": 1.135032816869507, + "learning_rate": 8.465297895741989e-06, + "loss": 0.9725, + "step": 25080 + }, + { + "epoch": 0.9923075401926081, + "grad_norm": 1.2465170306999038, + "learning_rate": 8.46363878784022e-06, + "loss": 0.9682, + "step": 25090 + }, + { + "epoch": 0.9927030394114972, + "grad_norm": 0.9622293138397663, + "learning_rate": 8.461978946399097e-06, + "loss": 0.9597, + "step": 25100 + }, + { + "epoch": 0.9930985386303862, + "grad_norm": 0.9983130861800154, + "learning_rate": 8.460318371770142e-06, + "loss": 0.9671, + "step": 25110 + }, + { + "epoch": 0.9934940378492753, + "grad_norm": 1.100857497232352, + "learning_rate": 8.45865706430504e-06, + "loss": 0.9736, + "step": 25120 + }, + { + "epoch": 0.9938895370681643, + "grad_norm": 1.0431628846721765, + "learning_rate": 8.456995024355626e-06, + "loss": 0.9754, + "step": 25130 + }, + { + "epoch": 0.9942850362870533, + "grad_norm": 1.1679608148087055, + "learning_rate": 8.45533225227389e-06, + "loss": 0.9899, + "step": 25140 + }, + { + "epoch": 0.9946805355059424, + "grad_norm": 1.2493484344888548, + "learning_rate": 8.453668748411982e-06, + "loss": 0.9602, + "step": 25150 + }, + { + "epoch": 0.9950760347248314, + "grad_norm": 1.0460490783985266, + "learning_rate": 8.452004513122203e-06, + "loss": 0.9846, + "step": 25160 + }, + { + "epoch": 0.9954715339437205, + "grad_norm": 1.1157475160160253, + "learning_rate": 8.450339546757007e-06, + "loss": 0.978, + "step": 25170 + }, + { + "epoch": 0.9958670331626095, + "grad_norm": 1.047670583168386, + "learning_rate": 8.448673849669007e-06, + "loss": 0.9714, + "step": 25180 + }, + { + "epoch": 0.9962625323814985, + "grad_norm": 1.0795638452773237, + "learning_rate": 8.447007422210969e-06, + "loss": 0.9727, + "step": 25190 + }, + { + "epoch": 0.9966580316003876, + "grad_norm": 1.182375761433743, + "learning_rate": 8.445340264735816e-06, + "loss": 0.9726, + "step": 25200 + }, + { + "epoch": 0.9970535308192766, + "grad_norm": 1.1092415996793215, + "learning_rate": 8.443672377596619e-06, + "loss": 0.9676, + "step": 25210 + }, + { + "epoch": 0.9974490300381657, + "grad_norm": 1.0483620430525118, + "learning_rate": 8.442003761146608e-06, + "loss": 0.9462, + "step": 25220 + }, + { + "epoch": 0.9978445292570547, + "grad_norm": 1.0521489228971246, + "learning_rate": 8.440334415739174e-06, + "loss": 0.9664, + "step": 25230 + }, + { + "epoch": 0.9982400284759437, + "grad_norm": 1.0559306373608404, + "learning_rate": 8.438664341727847e-06, + "loss": 0.9725, + "step": 25240 + }, + { + "epoch": 0.9986355276948328, + "grad_norm": 1.294081154449968, + "learning_rate": 8.436993539466327e-06, + "loss": 0.9673, + "step": 25250 + }, + { + "epoch": 0.9990310269137218, + "grad_norm": 1.285149072661124, + "learning_rate": 8.435322009308457e-06, + "loss": 0.9432, + "step": 25260 + }, + { + "epoch": 0.9994265261326108, + "grad_norm": 1.0958739312464687, + "learning_rate": 8.433649751608242e-06, + "loss": 0.9786, + "step": 25270 + }, + { + "epoch": 0.9998220253514999, + "grad_norm": 1.0560425402901619, + "learning_rate": 8.431976766719834e-06, + "loss": 0.9726, + "step": 25280 + }, + { + "epoch": 1.000217524570389, + "grad_norm": 1.1122063131120121, + "learning_rate": 8.430303054997544e-06, + "loss": 0.9292, + "step": 25290 + }, + { + "epoch": 1.000613023789278, + "grad_norm": 1.0965049679869467, + "learning_rate": 8.428628616795835e-06, + "loss": 0.906, + "step": 25300 + }, + { + "epoch": 1.0010085230081671, + "grad_norm": 1.1176026427279113, + "learning_rate": 8.426953452469326e-06, + "loss": 0.9058, + "step": 25310 + }, + { + "epoch": 1.0014040222270562, + "grad_norm": 1.2102590984394073, + "learning_rate": 8.425277562372786e-06, + "loss": 0.9061, + "step": 25320 + }, + { + "epoch": 1.0017995214459452, + "grad_norm": 1.044705965380849, + "learning_rate": 8.423600946861144e-06, + "loss": 0.892, + "step": 25330 + }, + { + "epoch": 1.0021950206648342, + "grad_norm": 1.1302544296221577, + "learning_rate": 8.421923606289473e-06, + "loss": 0.8991, + "step": 25340 + }, + { + "epoch": 1.0025905198837233, + "grad_norm": 1.0912614317007048, + "learning_rate": 8.420245541013006e-06, + "loss": 0.9054, + "step": 25350 + }, + { + "epoch": 1.0029860191026123, + "grad_norm": 1.0532253533864584, + "learning_rate": 8.41856675138713e-06, + "loss": 0.915, + "step": 25360 + }, + { + "epoch": 1.0033815183215014, + "grad_norm": 1.1055684310333207, + "learning_rate": 8.416887237767385e-06, + "loss": 0.8978, + "step": 25370 + }, + { + "epoch": 1.0037770175403904, + "grad_norm": 1.108742767866052, + "learning_rate": 8.415207000509461e-06, + "loss": 0.9115, + "step": 25380 + }, + { + "epoch": 1.0041725167592794, + "grad_norm": 1.1769804228345317, + "learning_rate": 8.413526039969204e-06, + "loss": 0.8913, + "step": 25390 + }, + { + "epoch": 1.0045680159781685, + "grad_norm": 1.0650812806565177, + "learning_rate": 8.411844356502615e-06, + "loss": 0.8955, + "step": 25400 + }, + { + "epoch": 1.0049635151970575, + "grad_norm": 1.2571698043878294, + "learning_rate": 8.41016195046584e-06, + "loss": 0.8945, + "step": 25410 + }, + { + "epoch": 1.0053590144159466, + "grad_norm": 1.1928729441937398, + "learning_rate": 8.408478822215191e-06, + "loss": 0.8842, + "step": 25420 + }, + { + "epoch": 1.0057545136348356, + "grad_norm": 1.1196771377639678, + "learning_rate": 8.406794972107119e-06, + "loss": 0.9332, + "step": 25430 + }, + { + "epoch": 1.0061500128537246, + "grad_norm": 1.0632532577603537, + "learning_rate": 8.405110400498239e-06, + "loss": 0.8966, + "step": 25440 + }, + { + "epoch": 1.0065455120726137, + "grad_norm": 0.9924201140796625, + "learning_rate": 8.403425107745315e-06, + "loss": 0.9148, + "step": 25450 + }, + { + "epoch": 1.0069410112915027, + "grad_norm": 1.124075974689088, + "learning_rate": 8.401739094205259e-06, + "loss": 0.9151, + "step": 25460 + }, + { + "epoch": 1.0073365105103917, + "grad_norm": 1.2155242175758245, + "learning_rate": 8.400052360235143e-06, + "loss": 0.9092, + "step": 25470 + }, + { + "epoch": 1.0077320097292808, + "grad_norm": 1.130226587808288, + "learning_rate": 8.398364906192189e-06, + "loss": 0.9222, + "step": 25480 + }, + { + "epoch": 1.0081275089481698, + "grad_norm": 1.029056255979358, + "learning_rate": 8.396676732433767e-06, + "loss": 0.9224, + "step": 25490 + }, + { + "epoch": 1.0085230081670589, + "grad_norm": 1.142710985123221, + "learning_rate": 8.394987839317405e-06, + "loss": 0.8994, + "step": 25500 + }, + { + "epoch": 1.008918507385948, + "grad_norm": 1.2459966870815842, + "learning_rate": 8.393298227200783e-06, + "loss": 0.9037, + "step": 25510 + }, + { + "epoch": 1.009314006604837, + "grad_norm": 1.1217583580700818, + "learning_rate": 8.391607896441733e-06, + "loss": 0.9044, + "step": 25520 + }, + { + "epoch": 1.009709505823726, + "grad_norm": 1.1583672071361588, + "learning_rate": 8.389916847398235e-06, + "loss": 0.8974, + "step": 25530 + }, + { + "epoch": 1.010105005042615, + "grad_norm": 1.0629845412090806, + "learning_rate": 8.388225080428425e-06, + "loss": 0.9019, + "step": 25540 + }, + { + "epoch": 1.010500504261504, + "grad_norm": 1.1274331366613304, + "learning_rate": 8.38653259589059e-06, + "loss": 0.9138, + "step": 25550 + }, + { + "epoch": 1.010896003480393, + "grad_norm": 1.1008028318047414, + "learning_rate": 8.38483939414317e-06, + "loss": 0.8925, + "step": 25560 + }, + { + "epoch": 1.0112915026992821, + "grad_norm": 1.1759268056993244, + "learning_rate": 8.383145475544757e-06, + "loss": 0.9015, + "step": 25570 + }, + { + "epoch": 1.0116870019181712, + "grad_norm": 1.1414444035725138, + "learning_rate": 8.381450840454092e-06, + "loss": 0.9018, + "step": 25580 + }, + { + "epoch": 1.0120825011370602, + "grad_norm": 1.0825001927420355, + "learning_rate": 8.379755489230073e-06, + "loss": 0.9242, + "step": 25590 + }, + { + "epoch": 1.0124780003559493, + "grad_norm": 1.2755566730808183, + "learning_rate": 8.378059422231741e-06, + "loss": 0.8869, + "step": 25600 + }, + { + "epoch": 1.0128734995748383, + "grad_norm": 1.0901785165508477, + "learning_rate": 8.376362639818298e-06, + "loss": 0.9001, + "step": 25610 + }, + { + "epoch": 1.0132689987937273, + "grad_norm": 1.1192391210756936, + "learning_rate": 8.374665142349095e-06, + "loss": 0.9208, + "step": 25620 + }, + { + "epoch": 1.0136644980126164, + "grad_norm": 1.1452051245208987, + "learning_rate": 8.372966930183625e-06, + "loss": 0.8938, + "step": 25630 + }, + { + "epoch": 1.0140599972315054, + "grad_norm": 0.9900050914642096, + "learning_rate": 8.371268003681549e-06, + "loss": 0.8953, + "step": 25640 + }, + { + "epoch": 1.0144554964503945, + "grad_norm": 1.0762038225120603, + "learning_rate": 8.369568363202667e-06, + "loss": 0.8986, + "step": 25650 + }, + { + "epoch": 1.0148509956692835, + "grad_norm": 1.1200986578948064, + "learning_rate": 8.367868009106935e-06, + "loss": 0.9106, + "step": 25660 + }, + { + "epoch": 1.0152464948881725, + "grad_norm": 1.0539551484285923, + "learning_rate": 8.366166941754455e-06, + "loss": 0.9012, + "step": 25670 + }, + { + "epoch": 1.0156419941070616, + "grad_norm": 1.1051340982271542, + "learning_rate": 8.364465161505487e-06, + "loss": 0.8996, + "step": 25680 + }, + { + "epoch": 1.0160374933259506, + "grad_norm": 1.2476102193040963, + "learning_rate": 8.362762668720438e-06, + "loss": 0.9111, + "step": 25690 + }, + { + "epoch": 1.0164329925448397, + "grad_norm": 1.127198128985044, + "learning_rate": 8.36105946375987e-06, + "loss": 0.9054, + "step": 25700 + }, + { + "epoch": 1.0168284917637287, + "grad_norm": 1.1633383076226893, + "learning_rate": 8.359355546984487e-06, + "loss": 0.9122, + "step": 25710 + }, + { + "epoch": 1.0172239909826177, + "grad_norm": 1.0110877777537237, + "learning_rate": 8.357650918755153e-06, + "loss": 0.8986, + "step": 25720 + }, + { + "epoch": 1.0176194902015068, + "grad_norm": 1.0071923202531416, + "learning_rate": 8.355945579432878e-06, + "loss": 0.8927, + "step": 25730 + }, + { + "epoch": 1.0180149894203958, + "grad_norm": 1.163110831712327, + "learning_rate": 8.354239529378825e-06, + "loss": 0.8991, + "step": 25740 + }, + { + "epoch": 1.0184104886392848, + "grad_norm": 1.1454397337178799, + "learning_rate": 8.352532768954305e-06, + "loss": 0.9082, + "step": 25750 + }, + { + "epoch": 1.018805987858174, + "grad_norm": 1.0282578570312622, + "learning_rate": 8.35082529852078e-06, + "loss": 0.9113, + "step": 25760 + }, + { + "epoch": 1.019201487077063, + "grad_norm": 1.1644705857999138, + "learning_rate": 8.349117118439864e-06, + "loss": 0.9056, + "step": 25770 + }, + { + "epoch": 1.019596986295952, + "grad_norm": 1.0553504386081243, + "learning_rate": 8.347408229073321e-06, + "loss": 0.9207, + "step": 25780 + }, + { + "epoch": 1.019992485514841, + "grad_norm": 1.049931458978996, + "learning_rate": 8.345698630783062e-06, + "loss": 0.9034, + "step": 25790 + }, + { + "epoch": 1.02038798473373, + "grad_norm": 1.0646536273269438, + "learning_rate": 8.343988323931155e-06, + "loss": 0.8817, + "step": 25800 + }, + { + "epoch": 1.020783483952619, + "grad_norm": 1.1382528685128661, + "learning_rate": 8.342277308879807e-06, + "loss": 0.8863, + "step": 25810 + }, + { + "epoch": 1.0211789831715081, + "grad_norm": 1.082035748051446, + "learning_rate": 8.340565585991386e-06, + "loss": 0.9063, + "step": 25820 + }, + { + "epoch": 1.0215744823903974, + "grad_norm": 1.0543652850417833, + "learning_rate": 8.338853155628408e-06, + "loss": 0.8878, + "step": 25830 + }, + { + "epoch": 1.0219699816092864, + "grad_norm": 1.2583980088512, + "learning_rate": 8.337140018153532e-06, + "loss": 0.9034, + "step": 25840 + }, + { + "epoch": 1.0223654808281755, + "grad_norm": 1.1363987916154439, + "learning_rate": 8.33542617392957e-06, + "loss": 0.9007, + "step": 25850 + }, + { + "epoch": 1.0227609800470645, + "grad_norm": 1.1322430291594914, + "learning_rate": 8.333711623319492e-06, + "loss": 0.9015, + "step": 25860 + }, + { + "epoch": 1.0231564792659535, + "grad_norm": 1.2583851440179512, + "learning_rate": 8.331996366686404e-06, + "loss": 0.9121, + "step": 25870 + }, + { + "epoch": 1.0235519784848426, + "grad_norm": 1.1672767543728206, + "learning_rate": 8.330280404393569e-06, + "loss": 0.9036, + "step": 25880 + }, + { + "epoch": 1.0239474777037316, + "grad_norm": 1.0233772454658783, + "learning_rate": 8.3285637368044e-06, + "loss": 0.9091, + "step": 25890 + }, + { + "epoch": 1.0243429769226207, + "grad_norm": 1.11875252850936, + "learning_rate": 8.326846364282457e-06, + "loss": 0.8915, + "step": 25900 + }, + { + "epoch": 1.0247384761415097, + "grad_norm": 1.2000842182902896, + "learning_rate": 8.325128287191451e-06, + "loss": 0.902, + "step": 25910 + }, + { + "epoch": 1.0251339753603987, + "grad_norm": 1.1814098772590507, + "learning_rate": 8.32340950589524e-06, + "loss": 0.9104, + "step": 25920 + }, + { + "epoch": 1.0255294745792878, + "grad_norm": 1.1477897706659193, + "learning_rate": 8.321690020757833e-06, + "loss": 0.924, + "step": 25930 + }, + { + "epoch": 1.0259249737981768, + "grad_norm": 1.0810906223097685, + "learning_rate": 8.319969832143389e-06, + "loss": 0.8895, + "step": 25940 + }, + { + "epoch": 1.0263204730170659, + "grad_norm": 1.038721590468152, + "learning_rate": 8.31824894041621e-06, + "loss": 0.9234, + "step": 25950 + }, + { + "epoch": 1.026715972235955, + "grad_norm": 1.1338856682034621, + "learning_rate": 8.316527345940754e-06, + "loss": 0.9097, + "step": 25960 + }, + { + "epoch": 1.027111471454844, + "grad_norm": 1.0908569789747333, + "learning_rate": 8.31480504908163e-06, + "loss": 0.9036, + "step": 25970 + }, + { + "epoch": 1.027506970673733, + "grad_norm": 1.1858118165224072, + "learning_rate": 8.313082050203581e-06, + "loss": 0.888, + "step": 25980 + }, + { + "epoch": 1.027902469892622, + "grad_norm": 1.0937115220729168, + "learning_rate": 8.311358349671516e-06, + "loss": 0.8987, + "step": 25990 + }, + { + "epoch": 1.028297969111511, + "grad_norm": 1.1735515640484673, + "learning_rate": 8.309633947850486e-06, + "loss": 0.9019, + "step": 26000 + }, + { + "epoch": 1.0286934683304, + "grad_norm": 1.0710227994306132, + "learning_rate": 8.307908845105685e-06, + "loss": 0.9147, + "step": 26010 + }, + { + "epoch": 1.0290889675492891, + "grad_norm": 1.1215359325055878, + "learning_rate": 8.306183041802462e-06, + "loss": 0.9011, + "step": 26020 + }, + { + "epoch": 1.0294844667681782, + "grad_norm": 1.0669939726035618, + "learning_rate": 8.304456538306314e-06, + "loss": 0.8788, + "step": 26030 + }, + { + "epoch": 1.0298799659870672, + "grad_norm": 1.2722986858375807, + "learning_rate": 8.302729334982883e-06, + "loss": 0.9052, + "step": 26040 + }, + { + "epoch": 1.0302754652059563, + "grad_norm": 1.0800191926325546, + "learning_rate": 8.301001432197962e-06, + "loss": 0.919, + "step": 26050 + }, + { + "epoch": 1.0306709644248453, + "grad_norm": 1.042226298377939, + "learning_rate": 8.299272830317491e-06, + "loss": 0.9213, + "step": 26060 + }, + { + "epoch": 1.0310664636437343, + "grad_norm": 1.1974991756960707, + "learning_rate": 8.297543529707558e-06, + "loss": 0.9123, + "step": 26070 + }, + { + "epoch": 1.0314619628626234, + "grad_norm": 1.2329459284704791, + "learning_rate": 8.2958135307344e-06, + "loss": 0.914, + "step": 26080 + }, + { + "epoch": 1.0318574620815124, + "grad_norm": 1.103551851670795, + "learning_rate": 8.294082833764401e-06, + "loss": 0.9046, + "step": 26090 + }, + { + "epoch": 1.0322529613004015, + "grad_norm": 1.142944376839717, + "learning_rate": 8.29235143916409e-06, + "loss": 0.9165, + "step": 26100 + }, + { + "epoch": 1.0326484605192905, + "grad_norm": 1.0294926939216598, + "learning_rate": 8.290619347300153e-06, + "loss": 0.9108, + "step": 26110 + }, + { + "epoch": 1.0330439597381795, + "grad_norm": 1.0870483219904943, + "learning_rate": 8.288886558539414e-06, + "loss": 0.9048, + "step": 26120 + }, + { + "epoch": 1.0334394589570686, + "grad_norm": 1.1548781079038755, + "learning_rate": 8.287153073248845e-06, + "loss": 0.9127, + "step": 26130 + }, + { + "epoch": 1.0338349581759576, + "grad_norm": 1.0016720999598157, + "learning_rate": 8.285418891795572e-06, + "loss": 0.9009, + "step": 26140 + }, + { + "epoch": 1.0342304573948466, + "grad_norm": 1.1700391722248944, + "learning_rate": 8.283684014546864e-06, + "loss": 0.8815, + "step": 26150 + }, + { + "epoch": 1.0346259566137357, + "grad_norm": 1.1628013271247277, + "learning_rate": 8.281948441870138e-06, + "loss": 0.8992, + "step": 26160 + }, + { + "epoch": 1.0350214558326247, + "grad_norm": 1.1572643485272085, + "learning_rate": 8.28021217413296e-06, + "loss": 0.9151, + "step": 26170 + }, + { + "epoch": 1.0354169550515138, + "grad_norm": 1.2056700413286618, + "learning_rate": 8.278475211703041e-06, + "loss": 0.8934, + "step": 26180 + }, + { + "epoch": 1.0358124542704028, + "grad_norm": 1.0951288087858853, + "learning_rate": 8.27673755494824e-06, + "loss": 0.9179, + "step": 26190 + }, + { + "epoch": 1.0362079534892918, + "grad_norm": 1.1243189476912894, + "learning_rate": 8.274999204236562e-06, + "loss": 0.924, + "step": 26200 + }, + { + "epoch": 1.0366034527081809, + "grad_norm": 1.2070529113877762, + "learning_rate": 8.27326015993616e-06, + "loss": 0.9066, + "step": 26210 + }, + { + "epoch": 1.03699895192707, + "grad_norm": 1.1319268547196213, + "learning_rate": 8.271520422415333e-06, + "loss": 0.9044, + "step": 26220 + }, + { + "epoch": 1.037394451145959, + "grad_norm": 1.226502204292547, + "learning_rate": 8.26977999204253e-06, + "loss": 0.9174, + "step": 26230 + }, + { + "epoch": 1.037789950364848, + "grad_norm": 1.1405096597113105, + "learning_rate": 8.268038869186345e-06, + "loss": 0.9003, + "step": 26240 + }, + { + "epoch": 1.038185449583737, + "grad_norm": 1.316048883514623, + "learning_rate": 8.266297054215515e-06, + "loss": 0.9029, + "step": 26250 + }, + { + "epoch": 1.038580948802626, + "grad_norm": 1.2984908387966145, + "learning_rate": 8.264554547498927e-06, + "loss": 0.8923, + "step": 26260 + }, + { + "epoch": 1.0389764480215151, + "grad_norm": 1.1693070516574628, + "learning_rate": 8.262811349405616e-06, + "loss": 0.888, + "step": 26270 + }, + { + "epoch": 1.0393719472404042, + "grad_norm": 1.1580902130950566, + "learning_rate": 8.261067460304759e-06, + "loss": 0.905, + "step": 26280 + }, + { + "epoch": 1.0397674464592932, + "grad_norm": 1.2112393609040144, + "learning_rate": 8.259322880565683e-06, + "loss": 0.9152, + "step": 26290 + }, + { + "epoch": 1.0401629456781822, + "grad_norm": 1.0918316136989545, + "learning_rate": 8.257577610557861e-06, + "loss": 0.9063, + "step": 26300 + }, + { + "epoch": 1.0405584448970713, + "grad_norm": 1.188777233244895, + "learning_rate": 8.25583165065091e-06, + "loss": 0.9069, + "step": 26310 + }, + { + "epoch": 1.0409539441159603, + "grad_norm": 1.2420338199548104, + "learning_rate": 8.254085001214596e-06, + "loss": 0.9133, + "step": 26320 + }, + { + "epoch": 1.0413494433348494, + "grad_norm": 1.3712209946739753, + "learning_rate": 8.252337662618826e-06, + "loss": 0.9187, + "step": 26330 + }, + { + "epoch": 1.0417449425537384, + "grad_norm": 1.1484534682367094, + "learning_rate": 8.250589635233662e-06, + "loss": 0.9072, + "step": 26340 + }, + { + "epoch": 1.0421404417726274, + "grad_norm": 1.1937789648256916, + "learning_rate": 8.248840919429301e-06, + "loss": 0.8974, + "step": 26350 + }, + { + "epoch": 1.0425359409915165, + "grad_norm": 1.0002180706825603, + "learning_rate": 8.247091515576093e-06, + "loss": 0.9027, + "step": 26360 + }, + { + "epoch": 1.0429314402104055, + "grad_norm": 1.2674330472529245, + "learning_rate": 8.245341424044532e-06, + "loss": 0.8807, + "step": 26370 + }, + { + "epoch": 1.0433269394292946, + "grad_norm": 1.154459992349424, + "learning_rate": 8.243590645205256e-06, + "loss": 0.8949, + "step": 26380 + }, + { + "epoch": 1.0437224386481836, + "grad_norm": 1.0375522349338302, + "learning_rate": 8.241839179429054e-06, + "loss": 0.8975, + "step": 26390 + }, + { + "epoch": 1.0441179378670726, + "grad_norm": 1.0463465357097628, + "learning_rate": 8.240087027086852e-06, + "loss": 0.914, + "step": 26400 + }, + { + "epoch": 1.0445134370859617, + "grad_norm": 1.3768072345518934, + "learning_rate": 8.238334188549727e-06, + "loss": 0.9035, + "step": 26410 + }, + { + "epoch": 1.0449089363048507, + "grad_norm": 1.1738597367096681, + "learning_rate": 8.2365806641889e-06, + "loss": 0.8897, + "step": 26420 + }, + { + "epoch": 1.0453044355237398, + "grad_norm": 1.0828780802961187, + "learning_rate": 8.234826454375741e-06, + "loss": 0.9035, + "step": 26430 + }, + { + "epoch": 1.0456999347426288, + "grad_norm": 1.0407314262519847, + "learning_rate": 8.233071559481755e-06, + "loss": 0.9062, + "step": 26440 + }, + { + "epoch": 1.0460954339615178, + "grad_norm": 1.1602763104533675, + "learning_rate": 8.231315979878604e-06, + "loss": 0.8907, + "step": 26450 + }, + { + "epoch": 1.0464909331804069, + "grad_norm": 1.0824471121944337, + "learning_rate": 8.22955971593809e-06, + "loss": 0.8885, + "step": 26460 + }, + { + "epoch": 1.046886432399296, + "grad_norm": 1.3661926987365036, + "learning_rate": 8.227802768032154e-06, + "loss": 0.906, + "step": 26470 + }, + { + "epoch": 1.047281931618185, + "grad_norm": 1.0936388123559422, + "learning_rate": 8.226045136532894e-06, + "loss": 0.9039, + "step": 26480 + }, + { + "epoch": 1.0476774308370742, + "grad_norm": 1.1445401938325779, + "learning_rate": 8.224286821812541e-06, + "loss": 0.8909, + "step": 26490 + }, + { + "epoch": 1.0480729300559632, + "grad_norm": 1.3011384987255774, + "learning_rate": 8.22252782424348e-06, + "loss": 0.9089, + "step": 26500 + }, + { + "epoch": 1.0484684292748523, + "grad_norm": 1.1488955314782079, + "learning_rate": 8.220768144198235e-06, + "loss": 0.9083, + "step": 26510 + }, + { + "epoch": 1.0488639284937413, + "grad_norm": 1.1573743425888623, + "learning_rate": 8.219007782049474e-06, + "loss": 0.9198, + "step": 26520 + }, + { + "epoch": 1.0492594277126304, + "grad_norm": 1.1255650197311504, + "learning_rate": 8.217246738170014e-06, + "loss": 0.8934, + "step": 26530 + }, + { + "epoch": 1.0496549269315194, + "grad_norm": 1.0875887225361511, + "learning_rate": 8.215485012932813e-06, + "loss": 0.8849, + "step": 26540 + }, + { + "epoch": 1.0500504261504084, + "grad_norm": 1.0776725283215887, + "learning_rate": 8.213722606710975e-06, + "loss": 0.9023, + "step": 26550 + }, + { + "epoch": 1.0504459253692975, + "grad_norm": 1.094791239471887, + "learning_rate": 8.211959519877749e-06, + "loss": 0.8915, + "step": 26560 + }, + { + "epoch": 1.0508414245881865, + "grad_norm": 1.2152553885248114, + "learning_rate": 8.210195752806523e-06, + "loss": 0.8836, + "step": 26570 + }, + { + "epoch": 1.0512369238070756, + "grad_norm": 1.251656420660928, + "learning_rate": 8.208431305870832e-06, + "loss": 0.9095, + "step": 26580 + }, + { + "epoch": 1.0516324230259646, + "grad_norm": 1.1012829413081515, + "learning_rate": 8.206666179444361e-06, + "loss": 0.9029, + "step": 26590 + }, + { + "epoch": 1.0520279222448536, + "grad_norm": 1.1199261024941962, + "learning_rate": 8.204900373900928e-06, + "loss": 0.8948, + "step": 26600 + }, + { + "epoch": 1.0524234214637427, + "grad_norm": 1.161610127231663, + "learning_rate": 8.203133889614504e-06, + "loss": 0.8763, + "step": 26610 + }, + { + "epoch": 1.0528189206826317, + "grad_norm": 1.0866075528266157, + "learning_rate": 8.2013667269592e-06, + "loss": 0.9017, + "step": 26620 + }, + { + "epoch": 1.0532144199015208, + "grad_norm": 1.0359004203083304, + "learning_rate": 8.199598886309268e-06, + "loss": 0.8823, + "step": 26630 + }, + { + "epoch": 1.0536099191204098, + "grad_norm": 1.333290592930516, + "learning_rate": 8.197830368039109e-06, + "loss": 0.9019, + "step": 26640 + }, + { + "epoch": 1.0540054183392988, + "grad_norm": 1.2852055663424773, + "learning_rate": 8.196061172523263e-06, + "loss": 0.8986, + "step": 26650 + }, + { + "epoch": 1.0544009175581879, + "grad_norm": 1.2210669835170829, + "learning_rate": 8.194291300136417e-06, + "loss": 0.9148, + "step": 26660 + }, + { + "epoch": 1.054796416777077, + "grad_norm": 1.1571294959315643, + "learning_rate": 8.192520751253399e-06, + "loss": 0.8965, + "step": 26670 + }, + { + "epoch": 1.055191915995966, + "grad_norm": 1.190394692001253, + "learning_rate": 8.19074952624918e-06, + "loss": 0.8978, + "step": 26680 + }, + { + "epoch": 1.055587415214855, + "grad_norm": 1.038681535401097, + "learning_rate": 8.188977625498876e-06, + "loss": 0.9082, + "step": 26690 + }, + { + "epoch": 1.055982914433744, + "grad_norm": 1.026335290544012, + "learning_rate": 8.187205049377746e-06, + "loss": 0.9192, + "step": 26700 + }, + { + "epoch": 1.056378413652633, + "grad_norm": 1.1822187498552572, + "learning_rate": 8.18543179826119e-06, + "loss": 0.8938, + "step": 26710 + }, + { + "epoch": 1.0567739128715221, + "grad_norm": 1.1552711851772264, + "learning_rate": 8.183657872524751e-06, + "loss": 0.9199, + "step": 26720 + }, + { + "epoch": 1.0571694120904112, + "grad_norm": 1.2539855798373403, + "learning_rate": 8.181883272544119e-06, + "loss": 0.8907, + "step": 26730 + }, + { + "epoch": 1.0575649113093002, + "grad_norm": 1.1759232523817262, + "learning_rate": 8.180107998695122e-06, + "loss": 0.9169, + "step": 26740 + }, + { + "epoch": 1.0579604105281892, + "grad_norm": 1.2925226259078264, + "learning_rate": 8.178332051353734e-06, + "loss": 0.9003, + "step": 26750 + }, + { + "epoch": 1.0583559097470783, + "grad_norm": 1.058633064738671, + "learning_rate": 8.176555430896068e-06, + "loss": 0.895, + "step": 26760 + }, + { + "epoch": 1.0587514089659673, + "grad_norm": 1.2178054428790546, + "learning_rate": 8.174778137698384e-06, + "loss": 0.9016, + "step": 26770 + }, + { + "epoch": 1.0591469081848564, + "grad_norm": 1.0291043090941907, + "learning_rate": 8.17300017213708e-06, + "loss": 0.8949, + "step": 26780 + }, + { + "epoch": 1.0595424074037454, + "grad_norm": 1.149950623218104, + "learning_rate": 8.171221534588702e-06, + "loss": 0.896, + "step": 26790 + }, + { + "epoch": 1.0599379066226344, + "grad_norm": 1.214800080595436, + "learning_rate": 8.169442225429931e-06, + "loss": 0.8848, + "step": 26800 + }, + { + "epoch": 1.0603334058415235, + "grad_norm": 1.0834996282959277, + "learning_rate": 8.167662245037598e-06, + "loss": 0.927, + "step": 26810 + }, + { + "epoch": 1.0607289050604125, + "grad_norm": 1.084241228228272, + "learning_rate": 8.165881593788669e-06, + "loss": 0.9023, + "step": 26820 + }, + { + "epoch": 1.0611244042793015, + "grad_norm": 1.184474320344194, + "learning_rate": 8.164100272060258e-06, + "loss": 0.9039, + "step": 26830 + }, + { + "epoch": 1.0615199034981906, + "grad_norm": 1.2346490645276604, + "learning_rate": 8.162318280229618e-06, + "loss": 0.9239, + "step": 26840 + }, + { + "epoch": 1.0619154027170796, + "grad_norm": 1.1511896245603048, + "learning_rate": 8.160535618674142e-06, + "loss": 0.896, + "step": 26850 + }, + { + "epoch": 1.0623109019359687, + "grad_norm": 1.519220099755732, + "learning_rate": 8.158752287771369e-06, + "loss": 0.8957, + "step": 26860 + }, + { + "epoch": 1.0627064011548577, + "grad_norm": 1.0775623234113063, + "learning_rate": 8.156968287898978e-06, + "loss": 0.8811, + "step": 26870 + }, + { + "epoch": 1.0631019003737467, + "grad_norm": 1.163800431730651, + "learning_rate": 8.15518361943479e-06, + "loss": 0.909, + "step": 26880 + }, + { + "epoch": 1.0634973995926358, + "grad_norm": 1.062163936008161, + "learning_rate": 8.153398282756766e-06, + "loss": 0.9041, + "step": 26890 + }, + { + "epoch": 1.0638928988115248, + "grad_norm": 1.3120477412719362, + "learning_rate": 8.151612278243011e-06, + "loss": 0.9005, + "step": 26900 + }, + { + "epoch": 1.0642883980304139, + "grad_norm": 1.1731405590931228, + "learning_rate": 8.149825606271768e-06, + "loss": 0.9022, + "step": 26910 + }, + { + "epoch": 1.064683897249303, + "grad_norm": 1.2209189179323687, + "learning_rate": 8.148038267221427e-06, + "loss": 0.889, + "step": 26920 + }, + { + "epoch": 1.065079396468192, + "grad_norm": 1.036859905224877, + "learning_rate": 8.146250261470513e-06, + "loss": 0.9275, + "step": 26930 + }, + { + "epoch": 1.065474895687081, + "grad_norm": 1.0441229722824374, + "learning_rate": 8.144461589397695e-06, + "loss": 0.8953, + "step": 26940 + }, + { + "epoch": 1.06587039490597, + "grad_norm": 1.0483457702917496, + "learning_rate": 8.142672251381785e-06, + "loss": 0.8969, + "step": 26950 + }, + { + "epoch": 1.066265894124859, + "grad_norm": 1.1660382719042404, + "learning_rate": 8.140882247801731e-06, + "loss": 0.8956, + "step": 26960 + }, + { + "epoch": 1.066661393343748, + "grad_norm": 1.1728388099187352, + "learning_rate": 8.139091579036629e-06, + "loss": 0.9167, + "step": 26970 + }, + { + "epoch": 1.0670568925626371, + "grad_norm": 1.0583233252609623, + "learning_rate": 8.13730024546571e-06, + "loss": 0.9089, + "step": 26980 + }, + { + "epoch": 1.0674523917815262, + "grad_norm": 1.112016547589213, + "learning_rate": 8.135508247468348e-06, + "loss": 0.902, + "step": 26990 + }, + { + "epoch": 1.0678478910004152, + "grad_norm": 1.1472180772779197, + "learning_rate": 8.133715585424058e-06, + "loss": 0.9044, + "step": 27000 + }, + { + "epoch": 1.0682433902193043, + "grad_norm": 1.1128413884288322, + "learning_rate": 8.131922259712493e-06, + "loss": 0.8854, + "step": 27010 + }, + { + "epoch": 1.0686388894381933, + "grad_norm": 1.1240627783948445, + "learning_rate": 8.130128270713448e-06, + "loss": 0.915, + "step": 27020 + }, + { + "epoch": 1.0690343886570823, + "grad_norm": 1.1794780876630457, + "learning_rate": 8.128333618806865e-06, + "loss": 0.8876, + "step": 27030 + }, + { + "epoch": 1.0694298878759714, + "grad_norm": 1.1786219108311415, + "learning_rate": 8.126538304372816e-06, + "loss": 0.8907, + "step": 27040 + }, + { + "epoch": 1.0698253870948604, + "grad_norm": 1.0892197450126473, + "learning_rate": 8.124742327791517e-06, + "loss": 0.903, + "step": 27050 + }, + { + "epoch": 1.0702208863137495, + "grad_norm": 1.0006132720474732, + "learning_rate": 8.122945689443328e-06, + "loss": 0.8997, + "step": 27060 + }, + { + "epoch": 1.0706163855326385, + "grad_norm": 1.1778965773414205, + "learning_rate": 8.121148389708745e-06, + "loss": 0.9006, + "step": 27070 + }, + { + "epoch": 1.0710118847515275, + "grad_norm": 1.1244966274064, + "learning_rate": 8.119350428968403e-06, + "loss": 0.8954, + "step": 27080 + }, + { + "epoch": 1.0714073839704166, + "grad_norm": 1.0197631932308509, + "learning_rate": 8.117551807603083e-06, + "loss": 0.905, + "step": 27090 + }, + { + "epoch": 1.0718028831893056, + "grad_norm": 1.115615567866468, + "learning_rate": 8.115752525993701e-06, + "loss": 0.9203, + "step": 27100 + }, + { + "epoch": 1.0721983824081947, + "grad_norm": 1.0765239213424813, + "learning_rate": 8.113952584521314e-06, + "loss": 0.8965, + "step": 27110 + }, + { + "epoch": 1.0725938816270837, + "grad_norm": 1.0198095409900247, + "learning_rate": 8.112151983567117e-06, + "loss": 0.9106, + "step": 27120 + }, + { + "epoch": 1.0729893808459727, + "grad_norm": 1.0933626872102498, + "learning_rate": 8.110350723512448e-06, + "loss": 0.8972, + "step": 27130 + }, + { + "epoch": 1.0733848800648618, + "grad_norm": 1.0905159498447563, + "learning_rate": 8.108548804738783e-06, + "loss": 0.8983, + "step": 27140 + }, + { + "epoch": 1.0737803792837508, + "grad_norm": 1.0356050178244327, + "learning_rate": 8.106746227627739e-06, + "loss": 0.8876, + "step": 27150 + }, + { + "epoch": 1.0741758785026398, + "grad_norm": 1.1396098722692873, + "learning_rate": 8.104942992561067e-06, + "loss": 0.9057, + "step": 27160 + }, + { + "epoch": 1.0745713777215289, + "grad_norm": 1.2743302032628894, + "learning_rate": 8.103139099920666e-06, + "loss": 0.9179, + "step": 27170 + }, + { + "epoch": 1.074966876940418, + "grad_norm": 1.2623677737547447, + "learning_rate": 8.101334550088566e-06, + "loss": 0.9033, + "step": 27180 + }, + { + "epoch": 1.0753623761593072, + "grad_norm": 1.1729772587339842, + "learning_rate": 8.09952934344694e-06, + "loss": 0.892, + "step": 27190 + }, + { + "epoch": 1.0757578753781962, + "grad_norm": 1.1321572329731517, + "learning_rate": 8.097723480378102e-06, + "loss": 0.9014, + "step": 27200 + }, + { + "epoch": 1.0761533745970853, + "grad_norm": 1.0344941903869767, + "learning_rate": 8.095916961264502e-06, + "loss": 0.8866, + "step": 27210 + }, + { + "epoch": 1.0765488738159743, + "grad_norm": 0.9920099445325548, + "learning_rate": 8.094109786488729e-06, + "loss": 0.8986, + "step": 27220 + }, + { + "epoch": 1.0769443730348633, + "grad_norm": 1.0791226397179134, + "learning_rate": 8.092301956433512e-06, + "loss": 0.883, + "step": 27230 + }, + { + "epoch": 1.0773398722537524, + "grad_norm": 1.3703968053746718, + "learning_rate": 8.090493471481717e-06, + "loss": 0.8921, + "step": 27240 + }, + { + "epoch": 1.0777353714726414, + "grad_norm": 1.1420306684436488, + "learning_rate": 8.088684332016355e-06, + "loss": 0.9122, + "step": 27250 + }, + { + "epoch": 1.0781308706915305, + "grad_norm": 1.020377065531343, + "learning_rate": 8.086874538420563e-06, + "loss": 0.9131, + "step": 27260 + }, + { + "epoch": 1.0785263699104195, + "grad_norm": 1.175087832980096, + "learning_rate": 8.085064091077632e-06, + "loss": 0.8809, + "step": 27270 + }, + { + "epoch": 1.0789218691293085, + "grad_norm": 1.2445735045852733, + "learning_rate": 8.08325299037098e-06, + "loss": 0.885, + "step": 27280 + }, + { + "epoch": 1.0793173683481976, + "grad_norm": 1.2369625712158248, + "learning_rate": 8.08144123668417e-06, + "loss": 0.8906, + "step": 27290 + }, + { + "epoch": 1.0797128675670866, + "grad_norm": 1.0882454123715677, + "learning_rate": 8.079628830400893e-06, + "loss": 0.8837, + "step": 27300 + }, + { + "epoch": 1.0801083667859757, + "grad_norm": 1.1970886907228255, + "learning_rate": 8.077815771904993e-06, + "loss": 0.9011, + "step": 27310 + }, + { + "epoch": 1.0805038660048647, + "grad_norm": 1.1355522024792915, + "learning_rate": 8.076002061580441e-06, + "loss": 0.8942, + "step": 27320 + }, + { + "epoch": 1.0808993652237537, + "grad_norm": 1.0345932697649929, + "learning_rate": 8.074187699811351e-06, + "loss": 0.9065, + "step": 27330 + }, + { + "epoch": 1.0812948644426428, + "grad_norm": 1.2290030983193927, + "learning_rate": 8.072372686981975e-06, + "loss": 0.8976, + "step": 27340 + }, + { + "epoch": 1.0816903636615318, + "grad_norm": 1.1470371833594348, + "learning_rate": 8.0705570234767e-06, + "loss": 0.8984, + "step": 27350 + }, + { + "epoch": 1.0820858628804209, + "grad_norm": 1.1687524580356325, + "learning_rate": 8.068740709680054e-06, + "loss": 0.8858, + "step": 27360 + }, + { + "epoch": 1.08248136209931, + "grad_norm": 1.1309495748275602, + "learning_rate": 8.066923745976697e-06, + "loss": 0.8902, + "step": 27370 + }, + { + "epoch": 1.082876861318199, + "grad_norm": 1.167742351543067, + "learning_rate": 8.065106132751437e-06, + "loss": 0.8929, + "step": 27380 + }, + { + "epoch": 1.083272360537088, + "grad_norm": 1.2127522748180066, + "learning_rate": 8.063287870389207e-06, + "loss": 0.8898, + "step": 27390 + }, + { + "epoch": 1.083667859755977, + "grad_norm": 1.0811128616461532, + "learning_rate": 8.061468959275089e-06, + "loss": 0.9061, + "step": 27400 + }, + { + "epoch": 1.084063358974866, + "grad_norm": 1.1253260949092212, + "learning_rate": 8.059649399794295e-06, + "loss": 0.889, + "step": 27410 + }, + { + "epoch": 1.084458858193755, + "grad_norm": 1.3547996478343813, + "learning_rate": 8.057829192332177e-06, + "loss": 0.8925, + "step": 27420 + }, + { + "epoch": 1.0848543574126441, + "grad_norm": 1.102503719772114, + "learning_rate": 8.05600833727422e-06, + "loss": 0.9062, + "step": 27430 + }, + { + "epoch": 1.0852498566315332, + "grad_norm": 1.0900852983570823, + "learning_rate": 8.054186835006057e-06, + "loss": 0.8957, + "step": 27440 + }, + { + "epoch": 1.0856453558504222, + "grad_norm": 1.1085798877125128, + "learning_rate": 8.052364685913444e-06, + "loss": 0.8727, + "step": 27450 + }, + { + "epoch": 1.0860408550693113, + "grad_norm": 1.150757146133895, + "learning_rate": 8.050541890382287e-06, + "loss": 0.9008, + "step": 27460 + }, + { + "epoch": 1.0864363542882003, + "grad_norm": 1.244177478165845, + "learning_rate": 8.04871844879862e-06, + "loss": 0.8903, + "step": 27470 + }, + { + "epoch": 1.0868318535070893, + "grad_norm": 1.136418139174831, + "learning_rate": 8.046894361548617e-06, + "loss": 0.8917, + "step": 27480 + }, + { + "epoch": 1.0872273527259784, + "grad_norm": 1.0458905330421155, + "learning_rate": 8.045069629018585e-06, + "loss": 0.8867, + "step": 27490 + }, + { + "epoch": 1.0876228519448674, + "grad_norm": 1.287540629833461, + "learning_rate": 8.043244251594977e-06, + "loss": 0.8868, + "step": 27500 + }, + { + "epoch": 1.0880183511637564, + "grad_norm": 1.0983028229890337, + "learning_rate": 8.041418229664373e-06, + "loss": 0.8849, + "step": 27510 + }, + { + "epoch": 1.0884138503826455, + "grad_norm": 1.0820605708080853, + "learning_rate": 8.039591563613494e-06, + "loss": 0.8976, + "step": 27520 + }, + { + "epoch": 1.0888093496015345, + "grad_norm": 1.1308754257809712, + "learning_rate": 8.037764253829197e-06, + "loss": 0.8987, + "step": 27530 + }, + { + "epoch": 1.0892048488204236, + "grad_norm": 1.0928748560251753, + "learning_rate": 8.035936300698477e-06, + "loss": 0.885, + "step": 27540 + }, + { + "epoch": 1.0896003480393126, + "grad_norm": 1.2211412651393077, + "learning_rate": 8.034107704608458e-06, + "loss": 0.8886, + "step": 27550 + }, + { + "epoch": 1.0899958472582016, + "grad_norm": 1.07612843278846, + "learning_rate": 8.032278465946408e-06, + "loss": 0.9108, + "step": 27560 + }, + { + "epoch": 1.0903913464770907, + "grad_norm": 1.2401455742095315, + "learning_rate": 8.03044858509973e-06, + "loss": 0.8917, + "step": 27570 + }, + { + "epoch": 1.0907868456959797, + "grad_norm": 1.1072522064369923, + "learning_rate": 8.028618062455958e-06, + "loss": 0.8908, + "step": 27580 + }, + { + "epoch": 1.0911823449148688, + "grad_norm": 1.088237639846867, + "learning_rate": 8.026786898402769e-06, + "loss": 0.8972, + "step": 27590 + }, + { + "epoch": 1.0915778441337578, + "grad_norm": 1.1707833118213482, + "learning_rate": 8.02495509332797e-06, + "loss": 0.902, + "step": 27600 + }, + { + "epoch": 1.0919733433526468, + "grad_norm": 1.1923207004273388, + "learning_rate": 8.023122647619505e-06, + "loss": 0.8709, + "step": 27610 + }, + { + "epoch": 1.0923688425715359, + "grad_norm": 1.0218484845622284, + "learning_rate": 8.021289561665457e-06, + "loss": 0.8982, + "step": 27620 + }, + { + "epoch": 1.092764341790425, + "grad_norm": 1.1324848871719058, + "learning_rate": 8.01945583585404e-06, + "loss": 0.89, + "step": 27630 + }, + { + "epoch": 1.093159841009314, + "grad_norm": 1.2474788688766536, + "learning_rate": 8.01762147057361e-06, + "loss": 0.8642, + "step": 27640 + }, + { + "epoch": 1.093555340228203, + "grad_norm": 1.0842033907627324, + "learning_rate": 8.015786466212647e-06, + "loss": 0.8964, + "step": 27650 + }, + { + "epoch": 1.093950839447092, + "grad_norm": 1.3133258445448526, + "learning_rate": 8.01395082315978e-06, + "loss": 0.8841, + "step": 27660 + }, + { + "epoch": 1.094346338665981, + "grad_norm": 1.0946303898282324, + "learning_rate": 8.012114541803763e-06, + "loss": 0.9099, + "step": 27670 + }, + { + "epoch": 1.0947418378848701, + "grad_norm": 1.0855375575273427, + "learning_rate": 8.01027762253349e-06, + "loss": 0.9095, + "step": 27680 + }, + { + "epoch": 1.0951373371037592, + "grad_norm": 1.24721771882688, + "learning_rate": 8.008440065737992e-06, + "loss": 0.8805, + "step": 27690 + }, + { + "epoch": 1.0955328363226482, + "grad_norm": 1.2598897594091798, + "learning_rate": 8.006601871806426e-06, + "loss": 0.8962, + "step": 27700 + }, + { + "epoch": 1.0959283355415372, + "grad_norm": 1.0726932236345916, + "learning_rate": 8.004763041128095e-06, + "loss": 0.9019, + "step": 27710 + }, + { + "epoch": 1.0963238347604263, + "grad_norm": 1.110410541762521, + "learning_rate": 8.00292357409243e-06, + "loss": 0.9011, + "step": 27720 + }, + { + "epoch": 1.0967193339793153, + "grad_norm": 1.0898460633379807, + "learning_rate": 8.001083471088997e-06, + "loss": 0.8902, + "step": 27730 + }, + { + "epoch": 1.0971148331982044, + "grad_norm": 1.1139950496030946, + "learning_rate": 7.999242732507502e-06, + "loss": 0.8864, + "step": 27740 + }, + { + "epoch": 1.0975103324170934, + "grad_norm": 1.1467439016036507, + "learning_rate": 7.99740135873778e-06, + "loss": 0.9145, + "step": 27750 + }, + { + "epoch": 1.0979058316359824, + "grad_norm": 1.053929566157083, + "learning_rate": 7.9955593501698e-06, + "loss": 0.8905, + "step": 27760 + }, + { + "epoch": 1.0983013308548715, + "grad_norm": 1.082939633714128, + "learning_rate": 7.99371670719367e-06, + "loss": 0.8977, + "step": 27770 + }, + { + "epoch": 1.0986968300737605, + "grad_norm": 1.215191991670252, + "learning_rate": 7.991873430199633e-06, + "loss": 0.896, + "step": 27780 + }, + { + "epoch": 1.0990923292926496, + "grad_norm": 1.2287437959550631, + "learning_rate": 7.990029519578059e-06, + "loss": 0.9071, + "step": 27790 + }, + { + "epoch": 1.0994878285115386, + "grad_norm": 1.2068989822249285, + "learning_rate": 7.98818497571946e-06, + "loss": 0.88, + "step": 27800 + }, + { + "epoch": 1.0998833277304276, + "grad_norm": 1.184431580757121, + "learning_rate": 7.986339799014475e-06, + "loss": 0.8965, + "step": 27810 + }, + { + "epoch": 1.100278826949317, + "grad_norm": 1.136249035505473, + "learning_rate": 7.984493989853885e-06, + "loss": 0.8928, + "step": 27820 + }, + { + "epoch": 1.100674326168206, + "grad_norm": 1.1327519715150345, + "learning_rate": 7.982647548628599e-06, + "loss": 0.9078, + "step": 27830 + }, + { + "epoch": 1.101069825387095, + "grad_norm": 1.0809721166048105, + "learning_rate": 7.980800475729661e-06, + "loss": 0.8936, + "step": 27840 + }, + { + "epoch": 1.101465324605984, + "grad_norm": 1.2635711312312279, + "learning_rate": 7.978952771548249e-06, + "loss": 0.8951, + "step": 27850 + }, + { + "epoch": 1.101860823824873, + "grad_norm": 1.2618975031616506, + "learning_rate": 7.977104436475677e-06, + "loss": 0.9035, + "step": 27860 + }, + { + "epoch": 1.102256323043762, + "grad_norm": 1.1993118624109151, + "learning_rate": 7.97525547090339e-06, + "loss": 0.9063, + "step": 27870 + }, + { + "epoch": 1.1026518222626511, + "grad_norm": 1.0339672543401843, + "learning_rate": 7.973405875222965e-06, + "loss": 0.907, + "step": 27880 + }, + { + "epoch": 1.1030473214815402, + "grad_norm": 1.106009705517437, + "learning_rate": 7.971555649826117e-06, + "loss": 0.8772, + "step": 27890 + }, + { + "epoch": 1.1034428207004292, + "grad_norm": 0.9632336538697404, + "learning_rate": 7.969704795104693e-06, + "loss": 0.9139, + "step": 27900 + }, + { + "epoch": 1.1038383199193182, + "grad_norm": 1.3731543656345955, + "learning_rate": 7.96785331145067e-06, + "loss": 0.8678, + "step": 27910 + }, + { + "epoch": 1.1042338191382073, + "grad_norm": 1.18598360057372, + "learning_rate": 7.966001199256163e-06, + "loss": 0.8879, + "step": 27920 + }, + { + "epoch": 1.1046293183570963, + "grad_norm": 1.1972679924889749, + "learning_rate": 7.964148458913415e-06, + "loss": 0.8845, + "step": 27930 + }, + { + "epoch": 1.1050248175759854, + "grad_norm": 1.107406935089363, + "learning_rate": 7.962295090814805e-06, + "loss": 0.8932, + "step": 27940 + }, + { + "epoch": 1.1054203167948744, + "grad_norm": 1.371837033995282, + "learning_rate": 7.960441095352847e-06, + "loss": 0.8671, + "step": 27950 + }, + { + "epoch": 1.1058158160137634, + "grad_norm": 1.1529148339030442, + "learning_rate": 7.958586472920182e-06, + "loss": 0.875, + "step": 27960 + }, + { + "epoch": 1.1062113152326525, + "grad_norm": 1.0878175808190371, + "learning_rate": 7.956731223909591e-06, + "loss": 0.8979, + "step": 27970 + }, + { + "epoch": 1.1066068144515415, + "grad_norm": 1.0477759942991176, + "learning_rate": 7.95487534871398e-06, + "loss": 0.9099, + "step": 27980 + }, + { + "epoch": 1.1070023136704306, + "grad_norm": 1.1033663742505546, + "learning_rate": 7.953018847726395e-06, + "loss": 0.8816, + "step": 27990 + }, + { + "epoch": 1.1073978128893196, + "grad_norm": 1.319971764574536, + "learning_rate": 7.951161721340008e-06, + "loss": 0.8945, + "step": 28000 + }, + { + "epoch": 1.1077933121082086, + "grad_norm": 1.055882496634333, + "learning_rate": 7.94930396994813e-06, + "loss": 0.8976, + "step": 28010 + }, + { + "epoch": 1.1081888113270977, + "grad_norm": 1.0542913295818592, + "learning_rate": 7.947445593944198e-06, + "loss": 0.8872, + "step": 28020 + }, + { + "epoch": 1.1085843105459867, + "grad_norm": 1.2368089090227496, + "learning_rate": 7.945586593721789e-06, + "loss": 0.8888, + "step": 28030 + }, + { + "epoch": 1.1089798097648758, + "grad_norm": 1.20012700679628, + "learning_rate": 7.9437269696746e-06, + "loss": 0.8849, + "step": 28040 + }, + { + "epoch": 1.1093753089837648, + "grad_norm": 1.2653919292169655, + "learning_rate": 7.941866722196472e-06, + "loss": 0.8938, + "step": 28050 + }, + { + "epoch": 1.1097708082026538, + "grad_norm": 1.1033828876989404, + "learning_rate": 7.940005851681373e-06, + "loss": 0.9086, + "step": 28060 + }, + { + "epoch": 1.1101663074215429, + "grad_norm": 1.3354989216914552, + "learning_rate": 7.938144358523403e-06, + "loss": 0.8937, + "step": 28070 + }, + { + "epoch": 1.110561806640432, + "grad_norm": 1.1861421270295682, + "learning_rate": 7.936282243116795e-06, + "loss": 0.8811, + "step": 28080 + }, + { + "epoch": 1.110957305859321, + "grad_norm": 1.242405241883624, + "learning_rate": 7.934419505855915e-06, + "loss": 0.8817, + "step": 28090 + }, + { + "epoch": 1.11135280507821, + "grad_norm": 1.2286916772434766, + "learning_rate": 7.932556147135255e-06, + "loss": 0.8692, + "step": 28100 + }, + { + "epoch": 1.111748304297099, + "grad_norm": 1.1713755802561983, + "learning_rate": 7.930692167349443e-06, + "loss": 0.9094, + "step": 28110 + }, + { + "epoch": 1.112143803515988, + "grad_norm": 1.1797864180729225, + "learning_rate": 7.92882756689324e-06, + "loss": 0.9074, + "step": 28120 + }, + { + "epoch": 1.1125393027348771, + "grad_norm": 1.11371342424478, + "learning_rate": 7.926962346161535e-06, + "loss": 0.8806, + "step": 28130 + }, + { + "epoch": 1.1129348019537662, + "grad_norm": 1.0435891701912137, + "learning_rate": 7.925096505549352e-06, + "loss": 0.8938, + "step": 28140 + }, + { + "epoch": 1.1133303011726552, + "grad_norm": 1.1939501224670819, + "learning_rate": 7.923230045451842e-06, + "loss": 0.8754, + "step": 28150 + }, + { + "epoch": 1.1137258003915442, + "grad_norm": 1.169466698231923, + "learning_rate": 7.921362966264288e-06, + "loss": 0.8884, + "step": 28160 + }, + { + "epoch": 1.1141212996104333, + "grad_norm": 1.2350179638145247, + "learning_rate": 7.919495268382109e-06, + "loss": 0.898, + "step": 28170 + }, + { + "epoch": 1.1145167988293223, + "grad_norm": 1.2020950165796456, + "learning_rate": 7.917626952200849e-06, + "loss": 0.8985, + "step": 28180 + }, + { + "epoch": 1.1149122980482113, + "grad_norm": 1.2931599298173728, + "learning_rate": 7.915758018116185e-06, + "loss": 0.891, + "step": 28190 + }, + { + "epoch": 1.1153077972671004, + "grad_norm": 1.1819632414691104, + "learning_rate": 7.913888466523927e-06, + "loss": 0.8911, + "step": 28200 + }, + { + "epoch": 1.1157032964859894, + "grad_norm": 1.2165968019246507, + "learning_rate": 7.912018297820012e-06, + "loss": 0.8753, + "step": 28210 + }, + { + "epoch": 1.1160987957048785, + "grad_norm": 1.2093269983020627, + "learning_rate": 7.910147512400512e-06, + "loss": 0.9058, + "step": 28220 + }, + { + "epoch": 1.1164942949237675, + "grad_norm": 1.0302717910667079, + "learning_rate": 7.908276110661625e-06, + "loss": 0.9122, + "step": 28230 + }, + { + "epoch": 1.1168897941426565, + "grad_norm": 1.1168190634051882, + "learning_rate": 7.906404092999685e-06, + "loss": 0.8766, + "step": 28240 + }, + { + "epoch": 1.1172852933615456, + "grad_norm": 1.0163801403206851, + "learning_rate": 7.90453145981115e-06, + "loss": 0.9016, + "step": 28250 + }, + { + "epoch": 1.1176807925804346, + "grad_norm": 1.3533553884499652, + "learning_rate": 7.90265821149261e-06, + "loss": 0.8635, + "step": 28260 + }, + { + "epoch": 1.1180762917993237, + "grad_norm": 1.0967128254769198, + "learning_rate": 7.900784348440793e-06, + "loss": 0.9015, + "step": 28270 + }, + { + "epoch": 1.1184717910182127, + "grad_norm": 1.0520290096044187, + "learning_rate": 7.898909871052546e-06, + "loss": 0.8688, + "step": 28280 + }, + { + "epoch": 1.1188672902371017, + "grad_norm": 1.1827580731163367, + "learning_rate": 7.897034779724855e-06, + "loss": 0.8922, + "step": 28290 + }, + { + "epoch": 1.1192627894559908, + "grad_norm": 1.1140478824864055, + "learning_rate": 7.89515907485483e-06, + "loss": 0.8718, + "step": 28300 + }, + { + "epoch": 1.1196582886748798, + "grad_norm": 1.1980447825166638, + "learning_rate": 7.893282756839712e-06, + "loss": 0.869, + "step": 28310 + }, + { + "epoch": 1.1200537878937689, + "grad_norm": 1.1222536065585487, + "learning_rate": 7.891405826076875e-06, + "loss": 0.8894, + "step": 28320 + }, + { + "epoch": 1.120449287112658, + "grad_norm": 1.1394294249921872, + "learning_rate": 7.88952828296382e-06, + "loss": 0.9016, + "step": 28330 + }, + { + "epoch": 1.120844786331547, + "grad_norm": 1.1259107628540133, + "learning_rate": 7.88765012789818e-06, + "loss": 0.9094, + "step": 28340 + }, + { + "epoch": 1.121240285550436, + "grad_norm": 1.1498681421065784, + "learning_rate": 7.885771361277711e-06, + "loss": 0.8941, + "step": 28350 + }, + { + "epoch": 1.121635784769325, + "grad_norm": 1.1342837845882838, + "learning_rate": 7.883891983500309e-06, + "loss": 0.889, + "step": 28360 + }, + { + "epoch": 1.122031283988214, + "grad_norm": 1.1788708909394887, + "learning_rate": 7.882011994963994e-06, + "loss": 0.8766, + "step": 28370 + }, + { + "epoch": 1.122426783207103, + "grad_norm": 1.167966682234611, + "learning_rate": 7.880131396066913e-06, + "loss": 0.901, + "step": 28380 + }, + { + "epoch": 1.1228222824259921, + "grad_norm": 1.0482050306868889, + "learning_rate": 7.878250187207343e-06, + "loss": 0.8904, + "step": 28390 + }, + { + "epoch": 1.1232177816448812, + "grad_norm": 1.1262802081477237, + "learning_rate": 7.876368368783697e-06, + "loss": 0.8637, + "step": 28400 + }, + { + "epoch": 1.1236132808637702, + "grad_norm": 1.129571065163481, + "learning_rate": 7.874485941194508e-06, + "loss": 0.8939, + "step": 28410 + }, + { + "epoch": 1.1240087800826593, + "grad_norm": 1.1523677586553804, + "learning_rate": 7.872602904838442e-06, + "loss": 0.8835, + "step": 28420 + }, + { + "epoch": 1.1244042793015483, + "grad_norm": 1.1657147671994192, + "learning_rate": 7.870719260114295e-06, + "loss": 0.9079, + "step": 28430 + }, + { + "epoch": 1.1247997785204373, + "grad_norm": 1.2634129221404926, + "learning_rate": 7.868835007420992e-06, + "loss": 0.8806, + "step": 28440 + }, + { + "epoch": 1.1251952777393264, + "grad_norm": 1.1446032633226912, + "learning_rate": 7.866950147157584e-06, + "loss": 0.8812, + "step": 28450 + }, + { + "epoch": 1.1255907769582154, + "grad_norm": 1.24644506001461, + "learning_rate": 7.865064679723249e-06, + "loss": 0.8667, + "step": 28460 + }, + { + "epoch": 1.1259862761771045, + "grad_norm": 1.1820806072019319, + "learning_rate": 7.8631786055173e-06, + "loss": 0.8878, + "step": 28470 + }, + { + "epoch": 1.1263817753959935, + "grad_norm": 1.2198342469076937, + "learning_rate": 7.861291924939178e-06, + "loss": 0.8784, + "step": 28480 + }, + { + "epoch": 1.1267772746148825, + "grad_norm": 1.217815972872604, + "learning_rate": 7.859404638388443e-06, + "loss": 0.8882, + "step": 28490 + }, + { + "epoch": 1.1271727738337716, + "grad_norm": 1.2234378854603238, + "learning_rate": 7.857516746264798e-06, + "loss": 0.8568, + "step": 28500 + }, + { + "epoch": 1.1275682730526606, + "grad_norm": 1.0343819781590669, + "learning_rate": 7.855628248968057e-06, + "loss": 0.8823, + "step": 28510 + }, + { + "epoch": 1.1279637722715496, + "grad_norm": 1.1324461551860587, + "learning_rate": 7.853739146898179e-06, + "loss": 0.9039, + "step": 28520 + }, + { + "epoch": 1.1283592714904387, + "grad_norm": 1.1757742157096973, + "learning_rate": 7.85184944045524e-06, + "loss": 0.8713, + "step": 28530 + }, + { + "epoch": 1.1287547707093277, + "grad_norm": 1.0611857464814267, + "learning_rate": 7.849959130039446e-06, + "loss": 0.8885, + "step": 28540 + }, + { + "epoch": 1.129150269928217, + "grad_norm": 1.121404948439143, + "learning_rate": 7.848068216051135e-06, + "loss": 0.9004, + "step": 28550 + }, + { + "epoch": 1.129545769147106, + "grad_norm": 1.1360113325190624, + "learning_rate": 7.84617669889077e-06, + "loss": 0.8821, + "step": 28560 + }, + { + "epoch": 1.129941268365995, + "grad_norm": 1.161957591203711, + "learning_rate": 7.844284578958942e-06, + "loss": 0.8713, + "step": 28570 + }, + { + "epoch": 1.130336767584884, + "grad_norm": 1.1955714388665608, + "learning_rate": 7.842391856656368e-06, + "loss": 0.8754, + "step": 28580 + }, + { + "epoch": 1.1307322668037731, + "grad_norm": 1.237353010753038, + "learning_rate": 7.840498532383896e-06, + "loss": 0.8821, + "step": 28590 + }, + { + "epoch": 1.1311277660226622, + "grad_norm": 1.326735377756867, + "learning_rate": 7.838604606542498e-06, + "loss": 0.8754, + "step": 28600 + }, + { + "epoch": 1.1315232652415512, + "grad_norm": 1.1523039103824624, + "learning_rate": 7.836710079533276e-06, + "loss": 0.8901, + "step": 28610 + }, + { + "epoch": 1.1319187644604403, + "grad_norm": 1.2357558713593966, + "learning_rate": 7.83481495175746e-06, + "loss": 0.8719, + "step": 28620 + }, + { + "epoch": 1.1323142636793293, + "grad_norm": 1.078923022820184, + "learning_rate": 7.832919223616403e-06, + "loss": 0.8866, + "step": 28630 + }, + { + "epoch": 1.1327097628982183, + "grad_norm": 1.0768958958249693, + "learning_rate": 7.831022895511586e-06, + "loss": 0.8934, + "step": 28640 + }, + { + "epoch": 1.1331052621171074, + "grad_norm": 1.1672828453130446, + "learning_rate": 7.829125967844624e-06, + "loss": 0.8814, + "step": 28650 + }, + { + "epoch": 1.1335007613359964, + "grad_norm": 1.3018651463727677, + "learning_rate": 7.82722844101725e-06, + "loss": 0.8864, + "step": 28660 + }, + { + "epoch": 1.1338962605548855, + "grad_norm": 1.2506297134258788, + "learning_rate": 7.825330315431329e-06, + "loss": 0.8837, + "step": 28670 + }, + { + "epoch": 1.1342917597737745, + "grad_norm": 1.2187640150041792, + "learning_rate": 7.82343159148885e-06, + "loss": 0.8995, + "step": 28680 + }, + { + "epoch": 1.1346872589926635, + "grad_norm": 1.211016132170073, + "learning_rate": 7.821532269591934e-06, + "loss": 0.8844, + "step": 28690 + }, + { + "epoch": 1.1350827582115526, + "grad_norm": 1.2530868365689445, + "learning_rate": 7.819632350142822e-06, + "loss": 0.8807, + "step": 28700 + }, + { + "epoch": 1.1354782574304416, + "grad_norm": 1.193921393475841, + "learning_rate": 7.817731833543883e-06, + "loss": 0.8785, + "step": 28710 + }, + { + "epoch": 1.1358737566493307, + "grad_norm": 0.9978430926114077, + "learning_rate": 7.815830720197616e-06, + "loss": 0.9022, + "step": 28720 + }, + { + "epoch": 1.1362692558682197, + "grad_norm": 1.1946625840984992, + "learning_rate": 7.813929010506645e-06, + "loss": 0.8762, + "step": 28730 + }, + { + "epoch": 1.1366647550871087, + "grad_norm": 1.2591354732718374, + "learning_rate": 7.812026704873717e-06, + "loss": 0.8984, + "step": 28740 + }, + { + "epoch": 1.1370602543059978, + "grad_norm": 1.0347969077844694, + "learning_rate": 7.810123803701711e-06, + "loss": 0.8868, + "step": 28750 + }, + { + "epoch": 1.1374557535248868, + "grad_norm": 1.2015888244758948, + "learning_rate": 7.808220307393626e-06, + "loss": 0.8701, + "step": 28760 + }, + { + "epoch": 1.1378512527437759, + "grad_norm": 1.041837969689692, + "learning_rate": 7.80631621635259e-06, + "loss": 0.9248, + "step": 28770 + }, + { + "epoch": 1.138246751962665, + "grad_norm": 1.1292194797397903, + "learning_rate": 7.804411530981857e-06, + "loss": 0.8856, + "step": 28780 + }, + { + "epoch": 1.138642251181554, + "grad_norm": 1.0841712144383036, + "learning_rate": 7.802506251684809e-06, + "loss": 0.8935, + "step": 28790 + }, + { + "epoch": 1.139037750400443, + "grad_norm": 1.1216971304097392, + "learning_rate": 7.80060037886495e-06, + "loss": 0.8865, + "step": 28800 + }, + { + "epoch": 1.139433249619332, + "grad_norm": 1.0757648210359316, + "learning_rate": 7.79869391292591e-06, + "loss": 0.8871, + "step": 28810 + }, + { + "epoch": 1.139828748838221, + "grad_norm": 1.2629266635777194, + "learning_rate": 7.79678685427145e-06, + "loss": 0.8857, + "step": 28820 + }, + { + "epoch": 1.14022424805711, + "grad_norm": 1.137588931108426, + "learning_rate": 7.794879203305446e-06, + "loss": 0.9159, + "step": 28830 + }, + { + "epoch": 1.1406197472759991, + "grad_norm": 1.1231830208980025, + "learning_rate": 7.79297096043191e-06, + "loss": 0.8845, + "step": 28840 + }, + { + "epoch": 1.1410152464948882, + "grad_norm": 1.1633748835851536, + "learning_rate": 7.791062126054974e-06, + "loss": 0.8981, + "step": 28850 + }, + { + "epoch": 1.1414107457137772, + "grad_norm": 1.2635872037555984, + "learning_rate": 7.789152700578898e-06, + "loss": 0.8712, + "step": 28860 + }, + { + "epoch": 1.1418062449326662, + "grad_norm": 1.2331340283482086, + "learning_rate": 7.787242684408063e-06, + "loss": 0.8787, + "step": 28870 + }, + { + "epoch": 1.1422017441515553, + "grad_norm": 1.0370422569322812, + "learning_rate": 7.785332077946981e-06, + "loss": 0.8963, + "step": 28880 + }, + { + "epoch": 1.1425972433704443, + "grad_norm": 1.0289576023061546, + "learning_rate": 7.783420881600283e-06, + "loss": 0.8992, + "step": 28890 + }, + { + "epoch": 1.1429927425893334, + "grad_norm": 1.246364477870542, + "learning_rate": 7.781509095772727e-06, + "loss": 0.873, + "step": 28900 + }, + { + "epoch": 1.1433882418082224, + "grad_norm": 1.0686637257279392, + "learning_rate": 7.779596720869197e-06, + "loss": 0.8894, + "step": 28910 + }, + { + "epoch": 1.1437837410271114, + "grad_norm": 1.200909050117641, + "learning_rate": 7.777683757294704e-06, + "loss": 0.8828, + "step": 28920 + }, + { + "epoch": 1.1441792402460005, + "grad_norm": 1.1426696375182521, + "learning_rate": 7.77577020545438e-06, + "loss": 0.8781, + "step": 28930 + }, + { + "epoch": 1.1445747394648895, + "grad_norm": 1.0435177796640491, + "learning_rate": 7.77385606575348e-06, + "loss": 0.8898, + "step": 28940 + }, + { + "epoch": 1.1449702386837786, + "grad_norm": 1.155909040323496, + "learning_rate": 7.771941338597387e-06, + "loss": 0.8848, + "step": 28950 + }, + { + "epoch": 1.1453657379026676, + "grad_norm": 1.0991162110656718, + "learning_rate": 7.77002602439161e-06, + "loss": 0.8809, + "step": 28960 + }, + { + "epoch": 1.1457612371215566, + "grad_norm": 1.3654436389159246, + "learning_rate": 7.768110123541775e-06, + "loss": 0.8796, + "step": 28970 + }, + { + "epoch": 1.1461567363404457, + "grad_norm": 1.1537587675900443, + "learning_rate": 7.76619363645364e-06, + "loss": 0.909, + "step": 28980 + }, + { + "epoch": 1.1465522355593347, + "grad_norm": 1.0697272033235352, + "learning_rate": 7.764276563533087e-06, + "loss": 0.8892, + "step": 28990 + }, + { + "epoch": 1.1469477347782238, + "grad_norm": 1.3159768122619557, + "learning_rate": 7.762358905186112e-06, + "loss": 0.8651, + "step": 29000 + }, + { + "epoch": 1.1473432339971128, + "grad_norm": 1.3905139079102626, + "learning_rate": 7.760440661818848e-06, + "loss": 0.8786, + "step": 29010 + }, + { + "epoch": 1.1477387332160018, + "grad_norm": 1.0774056655973534, + "learning_rate": 7.758521833837544e-06, + "loss": 0.9042, + "step": 29020 + }, + { + "epoch": 1.1481342324348909, + "grad_norm": 1.1019975024925495, + "learning_rate": 7.756602421648576e-06, + "loss": 0.887, + "step": 29030 + }, + { + "epoch": 1.14852973165378, + "grad_norm": 1.147271902231054, + "learning_rate": 7.75468242565844e-06, + "loss": 0.8909, + "step": 29040 + }, + { + "epoch": 1.148925230872669, + "grad_norm": 1.0757767265055174, + "learning_rate": 7.75276184627376e-06, + "loss": 0.8947, + "step": 29050 + }, + { + "epoch": 1.149320730091558, + "grad_norm": 1.3263647051483423, + "learning_rate": 7.750840683901284e-06, + "loss": 0.8726, + "step": 29060 + }, + { + "epoch": 1.149716229310447, + "grad_norm": 1.208365905342983, + "learning_rate": 7.748918938947878e-06, + "loss": 0.8914, + "step": 29070 + }, + { + "epoch": 1.150111728529336, + "grad_norm": 1.1085705403269703, + "learning_rate": 7.746996611820534e-06, + "loss": 0.8851, + "step": 29080 + }, + { + "epoch": 1.1505072277482251, + "grad_norm": 1.1023262182528024, + "learning_rate": 7.74507370292637e-06, + "loss": 0.8884, + "step": 29090 + }, + { + "epoch": 1.1509027269671142, + "grad_norm": 1.2404080016517403, + "learning_rate": 7.743150212672628e-06, + "loss": 0.8761, + "step": 29100 + }, + { + "epoch": 1.1512982261860032, + "grad_norm": 1.2438165887873858, + "learning_rate": 7.741226141466665e-06, + "loss": 0.9016, + "step": 29110 + }, + { + "epoch": 1.1516937254048922, + "grad_norm": 1.1189188385441424, + "learning_rate": 7.739301489715968e-06, + "loss": 0.8896, + "step": 29120 + }, + { + "epoch": 1.1520892246237813, + "grad_norm": 1.1027902423933102, + "learning_rate": 7.737376257828146e-06, + "loss": 0.8788, + "step": 29130 + }, + { + "epoch": 1.1524847238426705, + "grad_norm": 1.3113043837539398, + "learning_rate": 7.73545044621093e-06, + "loss": 0.8927, + "step": 29140 + }, + { + "epoch": 1.1528802230615596, + "grad_norm": 1.211483238559461, + "learning_rate": 7.733524055272173e-06, + "loss": 0.8883, + "step": 29150 + }, + { + "epoch": 1.1532757222804486, + "grad_norm": 1.1649328856987111, + "learning_rate": 7.731597085419853e-06, + "loss": 0.8893, + "step": 29160 + }, + { + "epoch": 1.1536712214993377, + "grad_norm": 1.157832606910769, + "learning_rate": 7.729669537062069e-06, + "loss": 0.8873, + "step": 29170 + }, + { + "epoch": 1.1540667207182267, + "grad_norm": 1.1741372092507605, + "learning_rate": 7.727741410607042e-06, + "loss": 0.8723, + "step": 29180 + }, + { + "epoch": 1.1544622199371157, + "grad_norm": 1.1977631063198806, + "learning_rate": 7.725812706463116e-06, + "loss": 0.9027, + "step": 29190 + }, + { + "epoch": 1.1548577191560048, + "grad_norm": 1.1654335010269374, + "learning_rate": 7.723883425038759e-06, + "loss": 0.8615, + "step": 29200 + }, + { + "epoch": 1.1552532183748938, + "grad_norm": 1.0905358408224222, + "learning_rate": 7.721953566742558e-06, + "loss": 0.8721, + "step": 29210 + }, + { + "epoch": 1.1556487175937828, + "grad_norm": 1.0874304542981303, + "learning_rate": 7.720023131983224e-06, + "loss": 0.8811, + "step": 29220 + }, + { + "epoch": 1.1560442168126719, + "grad_norm": 1.3797745965288783, + "learning_rate": 7.71809212116959e-06, + "loss": 0.8823, + "step": 29230 + }, + { + "epoch": 1.156439716031561, + "grad_norm": 1.4178610596746921, + "learning_rate": 7.716160534710613e-06, + "loss": 0.8949, + "step": 29240 + }, + { + "epoch": 1.15683521525045, + "grad_norm": 1.0866428931632974, + "learning_rate": 7.71422837301537e-06, + "loss": 0.8733, + "step": 29250 + }, + { + "epoch": 1.157230714469339, + "grad_norm": 1.0956032993001137, + "learning_rate": 7.712295636493058e-06, + "loss": 0.8892, + "step": 29260 + }, + { + "epoch": 1.157626213688228, + "grad_norm": 1.334593375690845, + "learning_rate": 7.710362325552994e-06, + "loss": 0.8722, + "step": 29270 + }, + { + "epoch": 1.158021712907117, + "grad_norm": 1.3022906615402798, + "learning_rate": 7.708428440604627e-06, + "loss": 0.8712, + "step": 29280 + }, + { + "epoch": 1.1584172121260061, + "grad_norm": 1.3478174949321362, + "learning_rate": 7.706493982057516e-06, + "loss": 0.8932, + "step": 29290 + }, + { + "epoch": 1.1588127113448952, + "grad_norm": 1.324967160585542, + "learning_rate": 7.704558950321348e-06, + "loss": 0.8824, + "step": 29300 + }, + { + "epoch": 1.1592082105637842, + "grad_norm": 1.1943042609215033, + "learning_rate": 7.702623345805932e-06, + "loss": 0.8937, + "step": 29310 + }, + { + "epoch": 1.1596037097826732, + "grad_norm": 1.1677868388561077, + "learning_rate": 7.700687168921189e-06, + "loss": 0.8812, + "step": 29320 + }, + { + "epoch": 1.1599992090015623, + "grad_norm": 1.1426773226082723, + "learning_rate": 7.698750420077174e-06, + "loss": 0.892, + "step": 29330 + }, + { + "epoch": 1.1603947082204513, + "grad_norm": 1.1001815850326977, + "learning_rate": 7.696813099684056e-06, + "loss": 0.8754, + "step": 29340 + }, + { + "epoch": 1.1607902074393404, + "grad_norm": 1.256356849971641, + "learning_rate": 7.694875208152126e-06, + "loss": 0.8961, + "step": 29350 + }, + { + "epoch": 1.1611857066582294, + "grad_norm": 1.1715949183296233, + "learning_rate": 7.692936745891796e-06, + "loss": 0.8982, + "step": 29360 + }, + { + "epoch": 1.1615812058771184, + "grad_norm": 1.1536921404387677, + "learning_rate": 7.690997713313599e-06, + "loss": 0.8625, + "step": 29370 + }, + { + "epoch": 1.1619767050960075, + "grad_norm": 1.0931891575222479, + "learning_rate": 7.689058110828189e-06, + "loss": 0.8882, + "step": 29380 + }, + { + "epoch": 1.1623722043148965, + "grad_norm": 1.0880714299836853, + "learning_rate": 7.68711793884634e-06, + "loss": 0.8923, + "step": 29390 + }, + { + "epoch": 1.1627677035337856, + "grad_norm": 1.0809193901746335, + "learning_rate": 7.685177197778948e-06, + "loss": 0.8988, + "step": 29400 + }, + { + "epoch": 1.1631632027526746, + "grad_norm": 1.2882994424366716, + "learning_rate": 7.683235888037028e-06, + "loss": 0.9088, + "step": 29410 + }, + { + "epoch": 1.1635587019715636, + "grad_norm": 1.261155261186555, + "learning_rate": 7.681294010031719e-06, + "loss": 0.8872, + "step": 29420 + }, + { + "epoch": 1.1639542011904527, + "grad_norm": 1.104916235432575, + "learning_rate": 7.679351564174273e-06, + "loss": 0.8725, + "step": 29430 + }, + { + "epoch": 1.1643497004093417, + "grad_norm": 1.2184303141927162, + "learning_rate": 7.677408550876069e-06, + "loss": 0.9017, + "step": 29440 + }, + { + "epoch": 1.1647451996282308, + "grad_norm": 1.3714527644520074, + "learning_rate": 7.675464970548604e-06, + "loss": 0.895, + "step": 29450 + }, + { + "epoch": 1.1651406988471198, + "grad_norm": 1.057714246125476, + "learning_rate": 7.673520823603496e-06, + "loss": 0.8847, + "step": 29460 + }, + { + "epoch": 1.1655361980660088, + "grad_norm": 1.1613973437659404, + "learning_rate": 7.671576110452479e-06, + "loss": 0.8913, + "step": 29470 + }, + { + "epoch": 1.1659316972848979, + "grad_norm": 1.1202199557851293, + "learning_rate": 7.669630831507412e-06, + "loss": 0.8834, + "step": 29480 + }, + { + "epoch": 1.166327196503787, + "grad_norm": 1.181775828898434, + "learning_rate": 7.66768498718027e-06, + "loss": 0.8645, + "step": 29490 + }, + { + "epoch": 1.166722695722676, + "grad_norm": 1.2438773133442504, + "learning_rate": 7.665738577883155e-06, + "loss": 0.8844, + "step": 29500 + }, + { + "epoch": 1.167118194941565, + "grad_norm": 1.1461310396287698, + "learning_rate": 7.663791604028276e-06, + "loss": 0.8891, + "step": 29510 + }, + { + "epoch": 1.167513694160454, + "grad_norm": 1.11276964052036, + "learning_rate": 7.661844066027974e-06, + "loss": 0.9014, + "step": 29520 + }, + { + "epoch": 1.167909193379343, + "grad_norm": 1.187928335957745, + "learning_rate": 7.6598959642947e-06, + "loss": 0.8891, + "step": 29530 + }, + { + "epoch": 1.168304692598232, + "grad_norm": 1.1000482762167085, + "learning_rate": 7.657947299241031e-06, + "loss": 0.8972, + "step": 29540 + }, + { + "epoch": 1.1687001918171211, + "grad_norm": 1.3865771008026182, + "learning_rate": 7.655998071279663e-06, + "loss": 0.8526, + "step": 29550 + }, + { + "epoch": 1.1690956910360102, + "grad_norm": 1.2635972530918929, + "learning_rate": 7.654048280823404e-06, + "loss": 0.8805, + "step": 29560 + }, + { + "epoch": 1.1694911902548992, + "grad_norm": 1.1103108456563064, + "learning_rate": 7.652097928285188e-06, + "loss": 0.8936, + "step": 29570 + }, + { + "epoch": 1.1698866894737883, + "grad_norm": 1.2117154659725748, + "learning_rate": 7.650147014078069e-06, + "loss": 0.8735, + "step": 29580 + }, + { + "epoch": 1.1702821886926773, + "grad_norm": 1.257496495192249, + "learning_rate": 7.648195538615216e-06, + "loss": 0.8857, + "step": 29590 + }, + { + "epoch": 1.1706776879115663, + "grad_norm": 1.0930912157977482, + "learning_rate": 7.646243502309915e-06, + "loss": 0.8593, + "step": 29600 + }, + { + "epoch": 1.1710731871304554, + "grad_norm": 1.1981145172298138, + "learning_rate": 7.644290905575577e-06, + "loss": 0.8628, + "step": 29610 + }, + { + "epoch": 1.1714686863493444, + "grad_norm": 1.1603747325966454, + "learning_rate": 7.642337748825729e-06, + "loss": 0.8786, + "step": 29620 + }, + { + "epoch": 1.1718641855682335, + "grad_norm": 1.188747653562277, + "learning_rate": 7.640384032474013e-06, + "loss": 0.8858, + "step": 29630 + }, + { + "epoch": 1.1722596847871225, + "grad_norm": 1.2975011549925548, + "learning_rate": 7.638429756934196e-06, + "loss": 0.8735, + "step": 29640 + }, + { + "epoch": 1.1726551840060115, + "grad_norm": 1.0971934138813961, + "learning_rate": 7.636474922620156e-06, + "loss": 0.8812, + "step": 29650 + }, + { + "epoch": 1.1730506832249006, + "grad_norm": 1.1882480484422593, + "learning_rate": 7.634519529945899e-06, + "loss": 0.8619, + "step": 29660 + }, + { + "epoch": 1.1734461824437896, + "grad_norm": 1.216072478683306, + "learning_rate": 7.632563579325537e-06, + "loss": 0.895, + "step": 29670 + }, + { + "epoch": 1.1738416816626787, + "grad_norm": 1.174018572091714, + "learning_rate": 7.630607071173314e-06, + "loss": 0.8705, + "step": 29680 + }, + { + "epoch": 1.1742371808815677, + "grad_norm": 1.0417398323565927, + "learning_rate": 7.628650005903582e-06, + "loss": 0.8782, + "step": 29690 + }, + { + "epoch": 1.1746326801004567, + "grad_norm": 1.0115777617178112, + "learning_rate": 7.626692383930811e-06, + "loss": 0.8937, + "step": 29700 + }, + { + "epoch": 1.1750281793193458, + "grad_norm": 1.0298194108286844, + "learning_rate": 7.624734205669594e-06, + "loss": 0.8763, + "step": 29710 + }, + { + "epoch": 1.1754236785382348, + "grad_norm": 1.2451098686417932, + "learning_rate": 7.62277547153464e-06, + "loss": 0.8565, + "step": 29720 + }, + { + "epoch": 1.1758191777571239, + "grad_norm": 1.2874579115129148, + "learning_rate": 7.620816181940776e-06, + "loss": 0.8745, + "step": 29730 + }, + { + "epoch": 1.176214676976013, + "grad_norm": 1.4712033002626335, + "learning_rate": 7.618856337302944e-06, + "loss": 0.8836, + "step": 29740 + }, + { + "epoch": 1.176610176194902, + "grad_norm": 1.388935780961097, + "learning_rate": 7.616895938036207e-06, + "loss": 0.8719, + "step": 29750 + }, + { + "epoch": 1.177005675413791, + "grad_norm": 1.1775684299699603, + "learning_rate": 7.614934984555742e-06, + "loss": 0.8866, + "step": 29760 + }, + { + "epoch": 1.17740117463268, + "grad_norm": 1.1639626874243392, + "learning_rate": 7.6129734772768485e-06, + "loss": 0.8914, + "step": 29770 + }, + { + "epoch": 1.177796673851569, + "grad_norm": 1.0948925950309525, + "learning_rate": 7.611011416614937e-06, + "loss": 0.8813, + "step": 29780 + }, + { + "epoch": 1.178192173070458, + "grad_norm": 1.1279932390244016, + "learning_rate": 7.609048802985542e-06, + "loss": 0.8908, + "step": 29790 + }, + { + "epoch": 1.1785876722893471, + "grad_norm": 1.379998269514362, + "learning_rate": 7.607085636804308e-06, + "loss": 0.8873, + "step": 29800 + }, + { + "epoch": 1.1789831715082362, + "grad_norm": 1.1807386421380142, + "learning_rate": 7.605121918487002e-06, + "loss": 0.8842, + "step": 29810 + }, + { + "epoch": 1.1793786707271252, + "grad_norm": 1.144539320674775, + "learning_rate": 7.603157648449503e-06, + "loss": 0.8798, + "step": 29820 + }, + { + "epoch": 1.1797741699460143, + "grad_norm": 1.4020064432673762, + "learning_rate": 7.601192827107814e-06, + "loss": 0.8571, + "step": 29830 + }, + { + "epoch": 1.1801696691649033, + "grad_norm": 1.0939681126918015, + "learning_rate": 7.599227454878048e-06, + "loss": 0.879, + "step": 29840 + }, + { + "epoch": 1.1805651683837923, + "grad_norm": 1.155615890244559, + "learning_rate": 7.597261532176437e-06, + "loss": 0.8907, + "step": 29850 + }, + { + "epoch": 1.1809606676026814, + "grad_norm": 1.1912816217492002, + "learning_rate": 7.5952950594193295e-06, + "loss": 0.8865, + "step": 29860 + }, + { + "epoch": 1.1813561668215704, + "grad_norm": 1.1103606289751944, + "learning_rate": 7.593328037023193e-06, + "loss": 0.8893, + "step": 29870 + }, + { + "epoch": 1.1817516660404594, + "grad_norm": 1.183606053747966, + "learning_rate": 7.591360465404607e-06, + "loss": 0.8831, + "step": 29880 + }, + { + "epoch": 1.1821471652593487, + "grad_norm": 1.2617969120463344, + "learning_rate": 7.589392344980269e-06, + "loss": 0.8826, + "step": 29890 + }, + { + "epoch": 1.1825426644782377, + "grad_norm": 1.2071476689982257, + "learning_rate": 7.587423676166996e-06, + "loss": 0.88, + "step": 29900 + }, + { + "epoch": 1.1829381636971268, + "grad_norm": 1.1883887689888764, + "learning_rate": 7.585454459381716e-06, + "loss": 0.8799, + "step": 29910 + }, + { + "epoch": 1.1833336629160158, + "grad_norm": 1.241758646047502, + "learning_rate": 7.583484695041476e-06, + "loss": 0.8843, + "step": 29920 + }, + { + "epoch": 1.1837291621349049, + "grad_norm": 1.052165823201099, + "learning_rate": 7.581514383563438e-06, + "loss": 0.8853, + "step": 29930 + }, + { + "epoch": 1.184124661353794, + "grad_norm": 1.3103002800711077, + "learning_rate": 7.579543525364881e-06, + "loss": 0.8687, + "step": 29940 + }, + { + "epoch": 1.184520160572683, + "grad_norm": 1.1200602792263559, + "learning_rate": 7.577572120863199e-06, + "loss": 0.8808, + "step": 29950 + }, + { + "epoch": 1.184915659791572, + "grad_norm": 1.2705716109868155, + "learning_rate": 7.575600170475901e-06, + "loss": 0.8754, + "step": 29960 + }, + { + "epoch": 1.185311159010461, + "grad_norm": 1.1949716115294393, + "learning_rate": 7.573627674620612e-06, + "loss": 0.874, + "step": 29970 + }, + { + "epoch": 1.18570665822935, + "grad_norm": 1.0711905403840896, + "learning_rate": 7.571654633715073e-06, + "loss": 0.8919, + "step": 29980 + }, + { + "epoch": 1.186102157448239, + "grad_norm": 1.2587671858435558, + "learning_rate": 7.569681048177142e-06, + "loss": 0.8738, + "step": 29990 + }, + { + "epoch": 1.1864976566671281, + "grad_norm": 1.2775112551314405, + "learning_rate": 7.567706918424789e-06, + "loss": 0.8725, + "step": 30000 + }, + { + "epoch": 1.1868931558860172, + "grad_norm": 1.1885472933999128, + "learning_rate": 7.5657322448761e-06, + "loss": 0.8788, + "step": 30010 + }, + { + "epoch": 1.1872886551049062, + "grad_norm": 1.1186515377138577, + "learning_rate": 7.563757027949279e-06, + "loss": 0.8818, + "step": 30020 + }, + { + "epoch": 1.1876841543237953, + "grad_norm": 1.2993810336876137, + "learning_rate": 7.561781268062641e-06, + "loss": 0.8709, + "step": 30030 + }, + { + "epoch": 1.1880796535426843, + "grad_norm": 1.153115171529422, + "learning_rate": 7.559804965634621e-06, + "loss": 0.8681, + "step": 30040 + }, + { + "epoch": 1.1884751527615733, + "grad_norm": 1.2102730861638553, + "learning_rate": 7.557828121083764e-06, + "loss": 0.8748, + "step": 30050 + }, + { + "epoch": 1.1888706519804624, + "grad_norm": 1.5041728367497254, + "learning_rate": 7.555850734828732e-06, + "loss": 0.8663, + "step": 30060 + }, + { + "epoch": 1.1892661511993514, + "grad_norm": 1.2767644170458703, + "learning_rate": 7.553872807288303e-06, + "loss": 0.8635, + "step": 30070 + }, + { + "epoch": 1.1896616504182405, + "grad_norm": 1.1683450132636235, + "learning_rate": 7.551894338881365e-06, + "loss": 0.8552, + "step": 30080 + }, + { + "epoch": 1.1900571496371295, + "grad_norm": 1.2379071855299737, + "learning_rate": 7.5499153300269245e-06, + "loss": 0.8873, + "step": 30090 + }, + { + "epoch": 1.1904526488560185, + "grad_norm": 1.1647630688321888, + "learning_rate": 7.547935781144104e-06, + "loss": 0.8505, + "step": 30100 + }, + { + "epoch": 1.1908481480749076, + "grad_norm": 1.2765442384447536, + "learning_rate": 7.545955692652138e-06, + "loss": 0.8724, + "step": 30110 + }, + { + "epoch": 1.1912436472937966, + "grad_norm": 1.297724651854536, + "learning_rate": 7.543975064970374e-06, + "loss": 0.8742, + "step": 30120 + }, + { + "epoch": 1.1916391465126857, + "grad_norm": 1.1240698859136409, + "learning_rate": 7.541993898518274e-06, + "loss": 0.8984, + "step": 30130 + }, + { + "epoch": 1.1920346457315747, + "grad_norm": 1.3470036819026818, + "learning_rate": 7.540012193715416e-06, + "loss": 0.8799, + "step": 30140 + }, + { + "epoch": 1.1924301449504637, + "grad_norm": 1.2314084608642535, + "learning_rate": 7.538029950981491e-06, + "loss": 0.8791, + "step": 30150 + }, + { + "epoch": 1.1928256441693528, + "grad_norm": 1.1784692893783266, + "learning_rate": 7.536047170736305e-06, + "loss": 0.8854, + "step": 30160 + }, + { + "epoch": 1.1932211433882418, + "grad_norm": 1.14420482272867, + "learning_rate": 7.534063853399778e-06, + "loss": 0.8971, + "step": 30170 + }, + { + "epoch": 1.1936166426071309, + "grad_norm": 1.4487798439281692, + "learning_rate": 7.532079999391939e-06, + "loss": 0.8622, + "step": 30180 + }, + { + "epoch": 1.19401214182602, + "grad_norm": 1.0877524904221965, + "learning_rate": 7.530095609132936e-06, + "loss": 0.88, + "step": 30190 + }, + { + "epoch": 1.194407641044909, + "grad_norm": 1.234983092565341, + "learning_rate": 7.528110683043029e-06, + "loss": 0.8485, + "step": 30200 + }, + { + "epoch": 1.194803140263798, + "grad_norm": 1.1251153792094335, + "learning_rate": 7.526125221542593e-06, + "loss": 0.8829, + "step": 30210 + }, + { + "epoch": 1.195198639482687, + "grad_norm": 1.2267855953598872, + "learning_rate": 7.524139225052112e-06, + "loss": 0.8819, + "step": 30220 + }, + { + "epoch": 1.195594138701576, + "grad_norm": 1.5108971884967024, + "learning_rate": 7.522152693992187e-06, + "loss": 0.8712, + "step": 30230 + }, + { + "epoch": 1.195989637920465, + "grad_norm": 1.2659442593945363, + "learning_rate": 7.520165628783532e-06, + "loss": 0.878, + "step": 30240 + }, + { + "epoch": 1.1963851371393541, + "grad_norm": 1.1497900474574254, + "learning_rate": 7.518178029846972e-06, + "loss": 0.9073, + "step": 30250 + }, + { + "epoch": 1.1967806363582432, + "grad_norm": 1.2919209547468042, + "learning_rate": 7.516189897603448e-06, + "loss": 0.8426, + "step": 30260 + }, + { + "epoch": 1.1971761355771322, + "grad_norm": 1.2807626231224034, + "learning_rate": 7.514201232474012e-06, + "loss": 0.896, + "step": 30270 + }, + { + "epoch": 1.1975716347960212, + "grad_norm": 1.1555291983064853, + "learning_rate": 7.512212034879827e-06, + "loss": 0.8928, + "step": 30280 + }, + { + "epoch": 1.1979671340149103, + "grad_norm": 1.2648593824664756, + "learning_rate": 7.510222305242174e-06, + "loss": 0.8781, + "step": 30290 + }, + { + "epoch": 1.1983626332337993, + "grad_norm": 0.990387571846563, + "learning_rate": 7.508232043982443e-06, + "loss": 0.8943, + "step": 30300 + }, + { + "epoch": 1.1987581324526884, + "grad_norm": 1.2381759611804162, + "learning_rate": 7.506241251522135e-06, + "loss": 0.8692, + "step": 30310 + }, + { + "epoch": 1.1991536316715774, + "grad_norm": 1.3334554431564103, + "learning_rate": 7.5042499282828674e-06, + "loss": 0.8772, + "step": 30320 + }, + { + "epoch": 1.1995491308904664, + "grad_norm": 1.3819279434705443, + "learning_rate": 7.50225807468637e-06, + "loss": 0.8744, + "step": 30330 + }, + { + "epoch": 1.1999446301093555, + "grad_norm": 1.0502563676895056, + "learning_rate": 7.5002656911544795e-06, + "loss": 0.8953, + "step": 30340 + }, + { + "epoch": 1.2003401293282445, + "grad_norm": 1.4035638473126562, + "learning_rate": 7.498272778109152e-06, + "loss": 0.8698, + "step": 30350 + }, + { + "epoch": 1.2007356285471336, + "grad_norm": 1.3338299481943479, + "learning_rate": 7.49627933597245e-06, + "loss": 0.8816, + "step": 30360 + }, + { + "epoch": 1.2011311277660226, + "grad_norm": 1.2140920358452105, + "learning_rate": 7.494285365166552e-06, + "loss": 0.8594, + "step": 30370 + }, + { + "epoch": 1.2015266269849116, + "grad_norm": 1.2231030904066684, + "learning_rate": 7.492290866113746e-06, + "loss": 0.8767, + "step": 30380 + }, + { + "epoch": 1.2019221262038007, + "grad_norm": 1.2316438852221772, + "learning_rate": 7.490295839236432e-06, + "loss": 0.8808, + "step": 30390 + }, + { + "epoch": 1.2023176254226897, + "grad_norm": 1.1362333437213394, + "learning_rate": 7.488300284957125e-06, + "loss": 0.867, + "step": 30400 + }, + { + "epoch": 1.2027131246415788, + "grad_norm": 1.1500453213525457, + "learning_rate": 7.486304203698448e-06, + "loss": 0.8866, + "step": 30410 + }, + { + "epoch": 1.2031086238604678, + "grad_norm": 1.1506962740643867, + "learning_rate": 7.484307595883135e-06, + "loss": 0.8886, + "step": 30420 + }, + { + "epoch": 1.2035041230793568, + "grad_norm": 1.201408204583174, + "learning_rate": 7.482310461934036e-06, + "loss": 0.8845, + "step": 30430 + }, + { + "epoch": 1.2038996222982459, + "grad_norm": 1.2820270144495614, + "learning_rate": 7.480312802274108e-06, + "loss": 0.8769, + "step": 30440 + }, + { + "epoch": 1.204295121517135, + "grad_norm": 1.2680078696978063, + "learning_rate": 7.478314617326421e-06, + "loss": 0.8921, + "step": 30450 + }, + { + "epoch": 1.204690620736024, + "grad_norm": 1.394901389157302, + "learning_rate": 7.4763159075141576e-06, + "loss": 0.8772, + "step": 30460 + }, + { + "epoch": 1.205086119954913, + "grad_norm": 1.2219993598695291, + "learning_rate": 7.474316673260611e-06, + "loss": 0.8799, + "step": 30470 + }, + { + "epoch": 1.2054816191738023, + "grad_norm": 1.2392409578102421, + "learning_rate": 7.472316914989182e-06, + "loss": 0.8882, + "step": 30480 + }, + { + "epoch": 1.2058771183926913, + "grad_norm": 1.2441364063396931, + "learning_rate": 7.470316633123386e-06, + "loss": 0.861, + "step": 30490 + }, + { + "epoch": 1.2062726176115803, + "grad_norm": 1.1590666017279956, + "learning_rate": 7.468315828086849e-06, + "loss": 0.8659, + "step": 30500 + }, + { + "epoch": 1.2066681168304694, + "grad_norm": 1.2702387855919597, + "learning_rate": 7.46631450030331e-06, + "loss": 0.8672, + "step": 30510 + }, + { + "epoch": 1.2070636160493584, + "grad_norm": 1.209849087076642, + "learning_rate": 7.464312650196611e-06, + "loss": 0.8761, + "step": 30520 + }, + { + "epoch": 1.2074591152682475, + "grad_norm": 1.1434322955661456, + "learning_rate": 7.462310278190712e-06, + "loss": 0.8924, + "step": 30530 + }, + { + "epoch": 1.2078546144871365, + "grad_norm": 1.2656374759466265, + "learning_rate": 7.4603073847096815e-06, + "loss": 0.8682, + "step": 30540 + }, + { + "epoch": 1.2082501137060255, + "grad_norm": 1.4873812869617682, + "learning_rate": 7.458303970177697e-06, + "loss": 0.8901, + "step": 30550 + }, + { + "epoch": 1.2086456129249146, + "grad_norm": 1.0741380642659981, + "learning_rate": 7.456300035019048e-06, + "loss": 0.8773, + "step": 30560 + }, + { + "epoch": 1.2090411121438036, + "grad_norm": 1.1483662240291839, + "learning_rate": 7.454295579658133e-06, + "loss": 0.8761, + "step": 30570 + }, + { + "epoch": 1.2094366113626926, + "grad_norm": 1.2671918665761617, + "learning_rate": 7.452290604519461e-06, + "loss": 0.8786, + "step": 30580 + }, + { + "epoch": 1.2098321105815817, + "grad_norm": 1.0809221742041761, + "learning_rate": 7.450285110027653e-06, + "loss": 0.8728, + "step": 30590 + }, + { + "epoch": 1.2102276098004707, + "grad_norm": 1.187613536189826, + "learning_rate": 7.448279096607438e-06, + "loss": 0.8752, + "step": 30600 + }, + { + "epoch": 1.2106231090193598, + "grad_norm": 1.2879164023944698, + "learning_rate": 7.446272564683653e-06, + "loss": 0.8831, + "step": 30610 + }, + { + "epoch": 1.2110186082382488, + "grad_norm": 1.1623723799959365, + "learning_rate": 7.44426551468125e-06, + "loss": 0.867, + "step": 30620 + }, + { + "epoch": 1.2114141074571378, + "grad_norm": 1.3425602614978056, + "learning_rate": 7.442257947025286e-06, + "loss": 0.8804, + "step": 30630 + }, + { + "epoch": 1.2118096066760269, + "grad_norm": 1.1601354914649742, + "learning_rate": 7.44024986214093e-06, + "loss": 0.857, + "step": 30640 + }, + { + "epoch": 1.212205105894916, + "grad_norm": 1.225937988692282, + "learning_rate": 7.43824126045346e-06, + "loss": 0.883, + "step": 30650 + }, + { + "epoch": 1.212600605113805, + "grad_norm": 1.3907266664149571, + "learning_rate": 7.4362321423882655e-06, + "loss": 0.8629, + "step": 30660 + }, + { + "epoch": 1.212996104332694, + "grad_norm": 1.2297971837030044, + "learning_rate": 7.4342225083708385e-06, + "loss": 0.863, + "step": 30670 + }, + { + "epoch": 1.213391603551583, + "grad_norm": 1.1037882143071152, + "learning_rate": 7.432212358826789e-06, + "loss": 0.849, + "step": 30680 + }, + { + "epoch": 1.213787102770472, + "grad_norm": 1.1672533533360685, + "learning_rate": 7.430201694181831e-06, + "loss": 0.8918, + "step": 30690 + }, + { + "epoch": 1.2141826019893611, + "grad_norm": 1.1882231387364985, + "learning_rate": 7.428190514861789e-06, + "loss": 0.8719, + "step": 30700 + }, + { + "epoch": 1.2145781012082502, + "grad_norm": 1.1269240213938998, + "learning_rate": 7.426178821292596e-06, + "loss": 0.8918, + "step": 30710 + }, + { + "epoch": 1.2149736004271392, + "grad_norm": 1.1447869970913642, + "learning_rate": 7.424166613900294e-06, + "loss": 0.8776, + "step": 30720 + }, + { + "epoch": 1.2153690996460282, + "grad_norm": 1.1551866874166905, + "learning_rate": 7.422153893111035e-06, + "loss": 0.8933, + "step": 30730 + }, + { + "epoch": 1.2157645988649173, + "grad_norm": 1.2770598590349849, + "learning_rate": 7.420140659351078e-06, + "loss": 0.8532, + "step": 30740 + }, + { + "epoch": 1.2161600980838063, + "grad_norm": 1.2679285696559792, + "learning_rate": 7.4181269130467925e-06, + "loss": 0.8682, + "step": 30750 + }, + { + "epoch": 1.2165555973026954, + "grad_norm": 1.3037050216813681, + "learning_rate": 7.416112654624653e-06, + "loss": 0.8673, + "step": 30760 + }, + { + "epoch": 1.2169510965215844, + "grad_norm": 1.1891946357108039, + "learning_rate": 7.414097884511247e-06, + "loss": 0.8855, + "step": 30770 + }, + { + "epoch": 1.2173465957404734, + "grad_norm": 1.1199983984461903, + "learning_rate": 7.412082603133269e-06, + "loss": 0.8768, + "step": 30780 + }, + { + "epoch": 1.2177420949593625, + "grad_norm": 1.2732492102802262, + "learning_rate": 7.41006681091752e-06, + "loss": 0.8824, + "step": 30790 + }, + { + "epoch": 1.2181375941782515, + "grad_norm": 1.284205533910894, + "learning_rate": 7.408050508290908e-06, + "loss": 0.8613, + "step": 30800 + }, + { + "epoch": 1.2185330933971406, + "grad_norm": 1.1873043450084546, + "learning_rate": 7.4060336956804544e-06, + "loss": 0.8675, + "step": 30810 + }, + { + "epoch": 1.2189285926160296, + "grad_norm": 1.10807913536078, + "learning_rate": 7.404016373513286e-06, + "loss": 0.8849, + "step": 30820 + }, + { + "epoch": 1.2193240918349186, + "grad_norm": 1.1444073467795979, + "learning_rate": 7.401998542216634e-06, + "loss": 0.883, + "step": 30830 + }, + { + "epoch": 1.2197195910538077, + "grad_norm": 1.2277546753098685, + "learning_rate": 7.3999802022178444e-06, + "loss": 0.884, + "step": 30840 + }, + { + "epoch": 1.2201150902726967, + "grad_norm": 1.3438082688363076, + "learning_rate": 7.397961353944363e-06, + "loss": 0.8595, + "step": 30850 + }, + { + "epoch": 1.2205105894915858, + "grad_norm": 1.2793345614982596, + "learning_rate": 7.39594199782375e-06, + "loss": 0.858, + "step": 30860 + }, + { + "epoch": 1.2209060887104748, + "grad_norm": 1.047414218746642, + "learning_rate": 7.3939221342836685e-06, + "loss": 0.8765, + "step": 30870 + }, + { + "epoch": 1.2213015879293638, + "grad_norm": 0.9868602981324371, + "learning_rate": 7.391901763751893e-06, + "loss": 0.8717, + "step": 30880 + }, + { + "epoch": 1.2216970871482529, + "grad_norm": 1.0432859996866466, + "learning_rate": 7.389880886656302e-06, + "loss": 0.8845, + "step": 30890 + }, + { + "epoch": 1.222092586367142, + "grad_norm": 1.3761920878144458, + "learning_rate": 7.387859503424885e-06, + "loss": 0.863, + "step": 30900 + }, + { + "epoch": 1.222488085586031, + "grad_norm": 1.3523825898214374, + "learning_rate": 7.385837614485733e-06, + "loss": 0.87, + "step": 30910 + }, + { + "epoch": 1.22288358480492, + "grad_norm": 1.1706124295931761, + "learning_rate": 7.3838152202670475e-06, + "loss": 0.8789, + "step": 30920 + }, + { + "epoch": 1.223279084023809, + "grad_norm": 1.112920698040595, + "learning_rate": 7.38179232119714e-06, + "loss": 0.8622, + "step": 30930 + }, + { + "epoch": 1.223674583242698, + "grad_norm": 1.139550207736678, + "learning_rate": 7.379768917704423e-06, + "loss": 0.8849, + "step": 30940 + }, + { + "epoch": 1.224070082461587, + "grad_norm": 1.2182267428090179, + "learning_rate": 7.377745010217422e-06, + "loss": 0.8751, + "step": 30950 + }, + { + "epoch": 1.2244655816804761, + "grad_norm": 1.112034364641804, + "learning_rate": 7.375720599164762e-06, + "loss": 0.8563, + "step": 30960 + }, + { + "epoch": 1.2248610808993652, + "grad_norm": 1.2264328357299235, + "learning_rate": 7.373695684975181e-06, + "loss": 0.8519, + "step": 30970 + }, + { + "epoch": 1.2252565801182542, + "grad_norm": 1.249742937772663, + "learning_rate": 7.371670268077521e-06, + "loss": 0.8842, + "step": 30980 + }, + { + "epoch": 1.2256520793371433, + "grad_norm": 1.0666822579524449, + "learning_rate": 7.369644348900728e-06, + "loss": 0.8781, + "step": 30990 + }, + { + "epoch": 1.2260475785560323, + "grad_norm": 1.349618199259138, + "learning_rate": 7.367617927873861e-06, + "loss": 0.8565, + "step": 31000 + }, + { + "epoch": 1.2264430777749213, + "grad_norm": 1.3947666751637184, + "learning_rate": 7.365591005426079e-06, + "loss": 0.882, + "step": 31010 + }, + { + "epoch": 1.2268385769938104, + "grad_norm": 1.1658629143014931, + "learning_rate": 7.36356358198665e-06, + "loss": 0.8768, + "step": 31020 + }, + { + "epoch": 1.2272340762126994, + "grad_norm": 1.1997589208803603, + "learning_rate": 7.361535657984948e-06, + "loss": 0.8972, + "step": 31030 + }, + { + "epoch": 1.2276295754315885, + "grad_norm": 1.1546975370856547, + "learning_rate": 7.3595072338504515e-06, + "loss": 0.8625, + "step": 31040 + }, + { + "epoch": 1.2280250746504775, + "grad_norm": 1.2293308281607909, + "learning_rate": 7.357478310012744e-06, + "loss": 0.8672, + "step": 31050 + }, + { + "epoch": 1.2284205738693665, + "grad_norm": 1.247945308541937, + "learning_rate": 7.355448886901521e-06, + "loss": 0.868, + "step": 31060 + }, + { + "epoch": 1.2288160730882556, + "grad_norm": 1.3806279092443983, + "learning_rate": 7.353418964946579e-06, + "loss": 0.8608, + "step": 31070 + }, + { + "epoch": 1.2292115723071446, + "grad_norm": 1.2590724085519411, + "learning_rate": 7.3513885445778175e-06, + "loss": 0.8616, + "step": 31080 + }, + { + "epoch": 1.2296070715260337, + "grad_norm": 1.1164248425888081, + "learning_rate": 7.349357626225249e-06, + "loss": 0.8874, + "step": 31090 + }, + { + "epoch": 1.2300025707449227, + "grad_norm": 1.2414045650432723, + "learning_rate": 7.347326210318983e-06, + "loss": 0.8669, + "step": 31100 + }, + { + "epoch": 1.2303980699638117, + "grad_norm": 1.1907321293095392, + "learning_rate": 7.34529429728924e-06, + "loss": 0.8629, + "step": 31110 + }, + { + "epoch": 1.2307935691827008, + "grad_norm": 1.3220774501477472, + "learning_rate": 7.3432618875663465e-06, + "loss": 0.8654, + "step": 31120 + }, + { + "epoch": 1.2311890684015898, + "grad_norm": 1.1760025782629897, + "learning_rate": 7.341228981580729e-06, + "loss": 0.8547, + "step": 31130 + }, + { + "epoch": 1.2315845676204789, + "grad_norm": 1.242813154183523, + "learning_rate": 7.339195579762924e-06, + "loss": 0.8643, + "step": 31140 + }, + { + "epoch": 1.231980066839368, + "grad_norm": 1.295488838746389, + "learning_rate": 7.337161682543572e-06, + "loss": 0.8717, + "step": 31150 + }, + { + "epoch": 1.232375566058257, + "grad_norm": 1.3594940339402584, + "learning_rate": 7.335127290353415e-06, + "loss": 0.873, + "step": 31160 + }, + { + "epoch": 1.232771065277146, + "grad_norm": 1.2253792758014268, + "learning_rate": 7.333092403623304e-06, + "loss": 0.8618, + "step": 31170 + }, + { + "epoch": 1.233166564496035, + "grad_norm": 1.0445603472519083, + "learning_rate": 7.3310570227841934e-06, + "loss": 0.8651, + "step": 31180 + }, + { + "epoch": 1.233562063714924, + "grad_norm": 1.3084948848418392, + "learning_rate": 7.329021148267141e-06, + "loss": 0.8702, + "step": 31190 + }, + { + "epoch": 1.233957562933813, + "grad_norm": 1.22932538642094, + "learning_rate": 7.326984780503311e-06, + "loss": 0.8705, + "step": 31200 + }, + { + "epoch": 1.2343530621527021, + "grad_norm": 1.1620894275450713, + "learning_rate": 7.324947919923971e-06, + "loss": 0.8686, + "step": 31210 + }, + { + "epoch": 1.2347485613715912, + "grad_norm": 1.1566605689648233, + "learning_rate": 7.322910566960492e-06, + "loss": 0.8581, + "step": 31220 + }, + { + "epoch": 1.2351440605904804, + "grad_norm": 1.3485308482619998, + "learning_rate": 7.320872722044353e-06, + "loss": 0.8544, + "step": 31230 + }, + { + "epoch": 1.2355395598093695, + "grad_norm": 1.178213515724198, + "learning_rate": 7.318834385607132e-06, + "loss": 0.8667, + "step": 31240 + }, + { + "epoch": 1.2359350590282585, + "grad_norm": 1.4203837370416543, + "learning_rate": 7.316795558080515e-06, + "loss": 0.8629, + "step": 31250 + }, + { + "epoch": 1.2363305582471475, + "grad_norm": 1.018730164606751, + "learning_rate": 7.3147562398962905e-06, + "loss": 0.8747, + "step": 31260 + }, + { + "epoch": 1.2367260574660366, + "grad_norm": 1.173818356507461, + "learning_rate": 7.312716431486352e-06, + "loss": 0.8549, + "step": 31270 + }, + { + "epoch": 1.2371215566849256, + "grad_norm": 1.1226114907007296, + "learning_rate": 7.310676133282694e-06, + "loss": 0.8912, + "step": 31280 + }, + { + "epoch": 1.2375170559038147, + "grad_norm": 1.4679844843098098, + "learning_rate": 7.308635345717419e-06, + "loss": 0.8774, + "step": 31290 + }, + { + "epoch": 1.2379125551227037, + "grad_norm": 1.20059472400533, + "learning_rate": 7.306594069222727e-06, + "loss": 0.873, + "step": 31300 + }, + { + "epoch": 1.2383080543415927, + "grad_norm": 1.1476944818926023, + "learning_rate": 7.304552304230932e-06, + "loss": 0.8552, + "step": 31310 + }, + { + "epoch": 1.2387035535604818, + "grad_norm": 1.3089906317837405, + "learning_rate": 7.302510051174438e-06, + "loss": 0.8783, + "step": 31320 + }, + { + "epoch": 1.2390990527793708, + "grad_norm": 1.1961947022610788, + "learning_rate": 7.300467310485765e-06, + "loss": 0.8679, + "step": 31330 + }, + { + "epoch": 1.2394945519982599, + "grad_norm": 1.2936985679649506, + "learning_rate": 7.298424082597526e-06, + "loss": 0.8718, + "step": 31340 + }, + { + "epoch": 1.239890051217149, + "grad_norm": 1.2268852158815078, + "learning_rate": 7.2963803679424425e-06, + "loss": 0.8523, + "step": 31350 + }, + { + "epoch": 1.240285550436038, + "grad_norm": 1.298636821660816, + "learning_rate": 7.29433616695334e-06, + "loss": 0.8774, + "step": 31360 + }, + { + "epoch": 1.240681049654927, + "grad_norm": 1.1745987817692933, + "learning_rate": 7.292291480063145e-06, + "loss": 0.894, + "step": 31370 + }, + { + "epoch": 1.241076548873816, + "grad_norm": 1.1828074235921668, + "learning_rate": 7.290246307704886e-06, + "loss": 0.8853, + "step": 31380 + }, + { + "epoch": 1.241472048092705, + "grad_norm": 1.2138855329839873, + "learning_rate": 7.288200650311697e-06, + "loss": 0.8694, + "step": 31390 + }, + { + "epoch": 1.241867547311594, + "grad_norm": 1.2572049646820538, + "learning_rate": 7.286154508316809e-06, + "loss": 0.8573, + "step": 31400 + }, + { + "epoch": 1.2422630465304831, + "grad_norm": 1.1673004556088065, + "learning_rate": 7.284107882153566e-06, + "loss": 0.8509, + "step": 31410 + }, + { + "epoch": 1.2426585457493722, + "grad_norm": 1.185079575213692, + "learning_rate": 7.282060772255405e-06, + "loss": 0.8892, + "step": 31420 + }, + { + "epoch": 1.2430540449682612, + "grad_norm": 1.1505916080441212, + "learning_rate": 7.280013179055868e-06, + "loss": 0.8849, + "step": 31430 + }, + { + "epoch": 1.2434495441871503, + "grad_norm": 1.0699872828468424, + "learning_rate": 7.277965102988602e-06, + "loss": 0.8568, + "step": 31440 + }, + { + "epoch": 1.2438450434060393, + "grad_norm": 1.2363136720009182, + "learning_rate": 7.275916544487354e-06, + "loss": 0.8657, + "step": 31450 + }, + { + "epoch": 1.2442405426249283, + "grad_norm": 1.216989503342513, + "learning_rate": 7.273867503985973e-06, + "loss": 0.8732, + "step": 31460 + }, + { + "epoch": 1.2446360418438174, + "grad_norm": 1.129524993048562, + "learning_rate": 7.27181798191841e-06, + "loss": 0.8579, + "step": 31470 + }, + { + "epoch": 1.2450315410627064, + "grad_norm": 1.312519684527118, + "learning_rate": 7.26976797871872e-06, + "loss": 0.8722, + "step": 31480 + }, + { + "epoch": 1.2454270402815955, + "grad_norm": 1.1194329825990177, + "learning_rate": 7.2677174948210596e-06, + "loss": 0.861, + "step": 31490 + }, + { + "epoch": 1.2458225395004845, + "grad_norm": 1.1578306475693407, + "learning_rate": 7.265666530659683e-06, + "loss": 0.8624, + "step": 31500 + }, + { + "epoch": 1.2462180387193735, + "grad_norm": 1.1203388797303029, + "learning_rate": 7.263615086668951e-06, + "loss": 0.8739, + "step": 31510 + }, + { + "epoch": 1.2466135379382626, + "grad_norm": 1.1642138385927037, + "learning_rate": 7.261563163283327e-06, + "loss": 0.8655, + "step": 31520 + }, + { + "epoch": 1.2470090371571516, + "grad_norm": 1.2330200420848738, + "learning_rate": 7.259510760937368e-06, + "loss": 0.8702, + "step": 31530 + }, + { + "epoch": 1.2474045363760407, + "grad_norm": 1.0892217493232934, + "learning_rate": 7.257457880065742e-06, + "loss": 0.8817, + "step": 31540 + }, + { + "epoch": 1.2478000355949297, + "grad_norm": 1.295547559069332, + "learning_rate": 7.25540452110321e-06, + "loss": 0.8641, + "step": 31550 + }, + { + "epoch": 1.2481955348138187, + "grad_norm": 1.0413299693830773, + "learning_rate": 7.253350684484641e-06, + "loss": 0.8576, + "step": 31560 + }, + { + "epoch": 1.2485910340327078, + "grad_norm": 1.2151881161897842, + "learning_rate": 7.2512963706450026e-06, + "loss": 0.8564, + "step": 31570 + }, + { + "epoch": 1.2489865332515968, + "grad_norm": 1.2842122769745818, + "learning_rate": 7.249241580019363e-06, + "loss": 0.86, + "step": 31580 + }, + { + "epoch": 1.2493820324704858, + "grad_norm": 1.280145148146454, + "learning_rate": 7.247186313042891e-06, + "loss": 0.86, + "step": 31590 + }, + { + "epoch": 1.2497775316893749, + "grad_norm": 1.2544077681597052, + "learning_rate": 7.245130570150856e-06, + "loss": 0.8483, + "step": 31600 + }, + { + "epoch": 1.250173030908264, + "grad_norm": 1.2872839305033144, + "learning_rate": 7.243074351778631e-06, + "loss": 0.8715, + "step": 31610 + }, + { + "epoch": 1.250568530127153, + "grad_norm": 1.160940326062675, + "learning_rate": 7.2410176583616866e-06, + "loss": 0.8944, + "step": 31620 + }, + { + "epoch": 1.250964029346042, + "grad_norm": 1.0401148991120426, + "learning_rate": 7.238960490335597e-06, + "loss": 0.8754, + "step": 31630 + }, + { + "epoch": 1.251359528564931, + "grad_norm": 1.110946732267155, + "learning_rate": 7.236902848136033e-06, + "loss": 0.8779, + "step": 31640 + }, + { + "epoch": 1.25175502778382, + "grad_norm": 1.2022224003429425, + "learning_rate": 7.234844732198769e-06, + "loss": 0.8547, + "step": 31650 + }, + { + "epoch": 1.2521505270027091, + "grad_norm": 1.2423666740154775, + "learning_rate": 7.232786142959678e-06, + "loss": 0.8685, + "step": 31660 + }, + { + "epoch": 1.2525460262215982, + "grad_norm": 1.0535073112479556, + "learning_rate": 7.230727080854735e-06, + "loss": 0.8794, + "step": 31670 + }, + { + "epoch": 1.2529415254404872, + "grad_norm": 1.2446774591081935, + "learning_rate": 7.228667546320012e-06, + "loss": 0.853, + "step": 31680 + }, + { + "epoch": 1.2533370246593762, + "grad_norm": 1.1695945932793697, + "learning_rate": 7.226607539791686e-06, + "loss": 0.8715, + "step": 31690 + }, + { + "epoch": 1.2537325238782653, + "grad_norm": 1.2525059312609752, + "learning_rate": 7.224547061706031e-06, + "loss": 0.8781, + "step": 31700 + }, + { + "epoch": 1.2541280230971543, + "grad_norm": 1.107843234569712, + "learning_rate": 7.222486112499417e-06, + "loss": 0.8613, + "step": 31710 + }, + { + "epoch": 1.2545235223160434, + "grad_norm": 1.433472027281944, + "learning_rate": 7.220424692608322e-06, + "loss": 0.8629, + "step": 31720 + }, + { + "epoch": 1.2549190215349324, + "grad_norm": 1.2121744401639958, + "learning_rate": 7.218362802469318e-06, + "loss": 0.8724, + "step": 31730 + }, + { + "epoch": 1.2553145207538214, + "grad_norm": 1.245647044496111, + "learning_rate": 7.2163004425190766e-06, + "loss": 0.8755, + "step": 31740 + }, + { + "epoch": 1.2557100199727105, + "grad_norm": 1.1793159121658583, + "learning_rate": 7.214237613194372e-06, + "loss": 0.87, + "step": 31750 + }, + { + "epoch": 1.2561055191915995, + "grad_norm": 1.4154921806680583, + "learning_rate": 7.212174314932077e-06, + "loss": 0.8759, + "step": 31760 + }, + { + "epoch": 1.2565010184104886, + "grad_norm": 1.1570660480701687, + "learning_rate": 7.2101105481691605e-06, + "loss": 0.8484, + "step": 31770 + }, + { + "epoch": 1.2568965176293778, + "grad_norm": 1.3312105736122783, + "learning_rate": 7.2080463133426935e-06, + "loss": 0.8516, + "step": 31780 + }, + { + "epoch": 1.2572920168482669, + "grad_norm": 1.120168502556861, + "learning_rate": 7.205981610889846e-06, + "loss": 0.8654, + "step": 31790 + }, + { + "epoch": 1.257687516067156, + "grad_norm": 1.4031897475386614, + "learning_rate": 7.203916441247887e-06, + "loss": 0.8678, + "step": 31800 + }, + { + "epoch": 1.258083015286045, + "grad_norm": 1.2170041226692108, + "learning_rate": 7.201850804854182e-06, + "loss": 0.862, + "step": 31810 + }, + { + "epoch": 1.258478514504934, + "grad_norm": 1.1742007585537348, + "learning_rate": 7.199784702146202e-06, + "loss": 0.8603, + "step": 31820 + }, + { + "epoch": 1.258874013723823, + "grad_norm": 1.3371052527133733, + "learning_rate": 7.1977181335615085e-06, + "loss": 0.8589, + "step": 31830 + }, + { + "epoch": 1.259269512942712, + "grad_norm": 1.222790588857591, + "learning_rate": 7.195651099537765e-06, + "loss": 0.8806, + "step": 31840 + }, + { + "epoch": 1.259665012161601, + "grad_norm": 1.0876102133595091, + "learning_rate": 7.193583600512736e-06, + "loss": 0.874, + "step": 31850 + }, + { + "epoch": 1.2600605113804901, + "grad_norm": 1.1337590631700962, + "learning_rate": 7.191515636924281e-06, + "loss": 0.8524, + "step": 31860 + }, + { + "epoch": 1.2604560105993792, + "grad_norm": 1.3494189781290324, + "learning_rate": 7.189447209210359e-06, + "loss": 0.8762, + "step": 31870 + }, + { + "epoch": 1.2608515098182682, + "grad_norm": 1.2793363660690056, + "learning_rate": 7.187378317809028e-06, + "loss": 0.8639, + "step": 31880 + }, + { + "epoch": 1.2612470090371573, + "grad_norm": 1.0926760706820657, + "learning_rate": 7.185308963158445e-06, + "loss": 0.8548, + "step": 31890 + }, + { + "epoch": 1.2616425082560463, + "grad_norm": 1.4817884694980814, + "learning_rate": 7.183239145696862e-06, + "loss": 0.8669, + "step": 31900 + }, + { + "epoch": 1.2620380074749353, + "grad_norm": 1.138026427359992, + "learning_rate": 7.181168865862631e-06, + "loss": 0.8842, + "step": 31910 + }, + { + "epoch": 1.2624335066938244, + "grad_norm": 1.3158088417233433, + "learning_rate": 7.179098124094204e-06, + "loss": 0.8721, + "step": 31920 + }, + { + "epoch": 1.2628290059127134, + "grad_norm": 1.587337009026462, + "learning_rate": 7.177026920830125e-06, + "loss": 0.8675, + "step": 31930 + }, + { + "epoch": 1.2632245051316024, + "grad_norm": 1.2679009275670674, + "learning_rate": 7.174955256509043e-06, + "loss": 0.847, + "step": 31940 + }, + { + "epoch": 1.2636200043504915, + "grad_norm": 1.0965321520039089, + "learning_rate": 7.1728831315696986e-06, + "loss": 0.8765, + "step": 31950 + }, + { + "epoch": 1.2640155035693805, + "grad_norm": 1.3338168865013313, + "learning_rate": 7.170810546450934e-06, + "loss": 0.8782, + "step": 31960 + }, + { + "epoch": 1.2644110027882696, + "grad_norm": 1.269791458172327, + "learning_rate": 7.168737501591685e-06, + "loss": 0.8724, + "step": 31970 + }, + { + "epoch": 1.2648065020071586, + "grad_norm": 1.1935395137092817, + "learning_rate": 7.166663997430989e-06, + "loss": 0.8793, + "step": 31980 + }, + { + "epoch": 1.2652020012260476, + "grad_norm": 1.3230734527146772, + "learning_rate": 7.164590034407978e-06, + "loss": 0.8713, + "step": 31990 + }, + { + "epoch": 1.2655975004449367, + "grad_norm": 1.1794508212240498, + "learning_rate": 7.162515612961882e-06, + "loss": 0.8682, + "step": 32000 + }, + { + "epoch": 1.2659929996638257, + "grad_norm": 1.4078866696702395, + "learning_rate": 7.160440733532029e-06, + "loss": 0.8763, + "step": 32010 + }, + { + "epoch": 1.2663884988827148, + "grad_norm": 1.074316113246102, + "learning_rate": 7.15836539655784e-06, + "loss": 0.8666, + "step": 32020 + }, + { + "epoch": 1.2667839981016038, + "grad_norm": 1.149400299234721, + "learning_rate": 7.1562896024788385e-06, + "loss": 0.8615, + "step": 32030 + }, + { + "epoch": 1.2671794973204928, + "grad_norm": 1.0906993841549095, + "learning_rate": 7.15421335173464e-06, + "loss": 0.8645, + "step": 32040 + }, + { + "epoch": 1.2675749965393819, + "grad_norm": 1.2182768864036402, + "learning_rate": 7.152136644764961e-06, + "loss": 0.8609, + "step": 32050 + }, + { + "epoch": 1.267970495758271, + "grad_norm": 1.2005887287674193, + "learning_rate": 7.150059482009611e-06, + "loss": 0.8435, + "step": 32060 + }, + { + "epoch": 1.26836599497716, + "grad_norm": 1.1974936061138395, + "learning_rate": 7.1479818639084995e-06, + "loss": 0.8799, + "step": 32070 + }, + { + "epoch": 1.268761494196049, + "grad_norm": 1.2392282115498527, + "learning_rate": 7.145903790901627e-06, + "loss": 0.8801, + "step": 32080 + }, + { + "epoch": 1.269156993414938, + "grad_norm": 1.2631428065902746, + "learning_rate": 7.143825263429096e-06, + "loss": 0.8544, + "step": 32090 + }, + { + "epoch": 1.269552492633827, + "grad_norm": 1.2523901157944977, + "learning_rate": 7.141746281931104e-06, + "loss": 0.8688, + "step": 32100 + }, + { + "epoch": 1.2699479918527161, + "grad_norm": 1.3706967956352027, + "learning_rate": 7.139666846847942e-06, + "loss": 0.8651, + "step": 32110 + }, + { + "epoch": 1.2703434910716052, + "grad_norm": 1.2109669885347163, + "learning_rate": 7.137586958619996e-06, + "loss": 0.8679, + "step": 32120 + }, + { + "epoch": 1.2707389902904942, + "grad_norm": 1.3991337728841258, + "learning_rate": 7.135506617687757e-06, + "loss": 0.8409, + "step": 32130 + }, + { + "epoch": 1.2711344895093832, + "grad_norm": 1.1924039426635944, + "learning_rate": 7.133425824491801e-06, + "loss": 0.8689, + "step": 32140 + }, + { + "epoch": 1.2715299887282723, + "grad_norm": 1.362295132597007, + "learning_rate": 7.131344579472805e-06, + "loss": 0.8578, + "step": 32150 + }, + { + "epoch": 1.2719254879471613, + "grad_norm": 1.2343421632611244, + "learning_rate": 7.129262883071543e-06, + "loss": 0.8707, + "step": 32160 + }, + { + "epoch": 1.2723209871660504, + "grad_norm": 1.1894184926185967, + "learning_rate": 7.1271807357288806e-06, + "loss": 0.8517, + "step": 32170 + }, + { + "epoch": 1.2727164863849394, + "grad_norm": 1.3818950005212534, + "learning_rate": 7.125098137885782e-06, + "loss": 0.8613, + "step": 32180 + }, + { + "epoch": 1.2731119856038284, + "grad_norm": 1.1603924401387735, + "learning_rate": 7.123015089983305e-06, + "loss": 0.8647, + "step": 32190 + }, + { + "epoch": 1.2735074848227175, + "grad_norm": 1.3160384735486619, + "learning_rate": 7.120931592462605e-06, + "loss": 0.8531, + "step": 32200 + }, + { + "epoch": 1.2739029840416065, + "grad_norm": 1.2835214870555722, + "learning_rate": 7.118847645764928e-06, + "loss": 0.8579, + "step": 32210 + }, + { + "epoch": 1.2742984832604956, + "grad_norm": 1.388077953747811, + "learning_rate": 7.116763250331621e-06, + "loss": 0.8438, + "step": 32220 + }, + { + "epoch": 1.2746939824793846, + "grad_norm": 1.2908555534585417, + "learning_rate": 7.114678406604122e-06, + "loss": 0.8606, + "step": 32230 + }, + { + "epoch": 1.2750894816982736, + "grad_norm": 1.2526526548436479, + "learning_rate": 7.112593115023966e-06, + "loss": 0.8703, + "step": 32240 + }, + { + "epoch": 1.2754849809171627, + "grad_norm": 1.2778467661614983, + "learning_rate": 7.110507376032782e-06, + "loss": 0.875, + "step": 32250 + }, + { + "epoch": 1.2758804801360517, + "grad_norm": 1.307059533776455, + "learning_rate": 7.108421190072292e-06, + "loss": 0.8613, + "step": 32260 + }, + { + "epoch": 1.2762759793549407, + "grad_norm": 1.2749438945023903, + "learning_rate": 7.106334557584317e-06, + "loss": 0.8642, + "step": 32270 + }, + { + "epoch": 1.2766714785738298, + "grad_norm": 1.149785518292012, + "learning_rate": 7.104247479010769e-06, + "loss": 0.8633, + "step": 32280 + }, + { + "epoch": 1.2770669777927188, + "grad_norm": 1.1588261175108288, + "learning_rate": 7.1021599547936535e-06, + "loss": 0.8427, + "step": 32290 + }, + { + "epoch": 1.2774624770116079, + "grad_norm": 1.220578410689106, + "learning_rate": 7.100071985375077e-06, + "loss": 0.8694, + "step": 32300 + }, + { + "epoch": 1.277857976230497, + "grad_norm": 1.2845897819526524, + "learning_rate": 7.097983571197231e-06, + "loss": 0.8577, + "step": 32310 + }, + { + "epoch": 1.278253475449386, + "grad_norm": 1.157867286741088, + "learning_rate": 7.095894712702408e-06, + "loss": 0.8766, + "step": 32320 + }, + { + "epoch": 1.278648974668275, + "grad_norm": 1.1440947803786123, + "learning_rate": 7.093805410332992e-06, + "loss": 0.8814, + "step": 32330 + }, + { + "epoch": 1.279044473887164, + "grad_norm": 1.2427330508660894, + "learning_rate": 7.091715664531462e-06, + "loss": 0.8721, + "step": 32340 + }, + { + "epoch": 1.279439973106053, + "grad_norm": 1.155313234504232, + "learning_rate": 7.089625475740389e-06, + "loss": 0.8949, + "step": 32350 + }, + { + "epoch": 1.279835472324942, + "grad_norm": 1.2304585447473886, + "learning_rate": 7.0875348444024415e-06, + "loss": 0.8515, + "step": 32360 + }, + { + "epoch": 1.2802309715438311, + "grad_norm": 1.1588000821111213, + "learning_rate": 7.085443770960377e-06, + "loss": 0.8797, + "step": 32370 + }, + { + "epoch": 1.2806264707627202, + "grad_norm": 1.4707994405581124, + "learning_rate": 7.083352255857051e-06, + "loss": 0.8673, + "step": 32380 + }, + { + "epoch": 1.2810219699816092, + "grad_norm": 1.2138252837973547, + "learning_rate": 7.081260299535408e-06, + "loss": 0.866, + "step": 32390 + }, + { + "epoch": 1.2814174692004983, + "grad_norm": 1.1581313079229234, + "learning_rate": 7.079167902438491e-06, + "loss": 0.8479, + "step": 32400 + }, + { + "epoch": 1.2818129684193873, + "grad_norm": 1.3744146620827908, + "learning_rate": 7.0770750650094335e-06, + "loss": 0.8642, + "step": 32410 + }, + { + "epoch": 1.2822084676382763, + "grad_norm": 1.3730341534264714, + "learning_rate": 7.07498178769146e-06, + "loss": 0.849, + "step": 32420 + }, + { + "epoch": 1.2826039668571654, + "grad_norm": 1.2149439420355683, + "learning_rate": 7.072888070927896e-06, + "loss": 0.8782, + "step": 32430 + }, + { + "epoch": 1.2829994660760544, + "grad_norm": 1.2047264973344871, + "learning_rate": 7.070793915162149e-06, + "loss": 0.8697, + "step": 32440 + }, + { + "epoch": 1.2833949652949435, + "grad_norm": 1.241366024965804, + "learning_rate": 7.06869932083773e-06, + "loss": 0.8654, + "step": 32450 + }, + { + "epoch": 1.2837904645138325, + "grad_norm": 1.2673240329572115, + "learning_rate": 7.066604288398235e-06, + "loss": 0.8397, + "step": 32460 + }, + { + "epoch": 1.2841859637327215, + "grad_norm": 1.4167497689562447, + "learning_rate": 7.064508818287357e-06, + "loss": 0.8651, + "step": 32470 + }, + { + "epoch": 1.2845814629516106, + "grad_norm": 1.3695440545744886, + "learning_rate": 7.06241291094888e-06, + "loss": 0.8683, + "step": 32480 + }, + { + "epoch": 1.2849769621704996, + "grad_norm": 1.2498082325079158, + "learning_rate": 7.060316566826684e-06, + "loss": 0.8492, + "step": 32490 + }, + { + "epoch": 1.2853724613893887, + "grad_norm": 1.267632121482906, + "learning_rate": 7.0582197863647375e-06, + "loss": 0.8679, + "step": 32500 + }, + { + "epoch": 1.2857679606082777, + "grad_norm": 1.2407303913357426, + "learning_rate": 7.0561225700071e-06, + "loss": 0.8603, + "step": 32510 + }, + { + "epoch": 1.2861634598271667, + "grad_norm": 1.0509310829115002, + "learning_rate": 7.054024918197928e-06, + "loss": 0.8571, + "step": 32520 + }, + { + "epoch": 1.2865589590460558, + "grad_norm": 1.1969784447272407, + "learning_rate": 7.0519268313814696e-06, + "loss": 0.8197, + "step": 32530 + }, + { + "epoch": 1.2869544582649448, + "grad_norm": 1.1805631308961655, + "learning_rate": 7.049828310002063e-06, + "loss": 0.878, + "step": 32540 + }, + { + "epoch": 1.2873499574838339, + "grad_norm": 1.211590238409041, + "learning_rate": 7.047729354504136e-06, + "loss": 0.881, + "step": 32550 + }, + { + "epoch": 1.287745456702723, + "grad_norm": 1.3159108830343862, + "learning_rate": 7.045629965332215e-06, + "loss": 0.8667, + "step": 32560 + }, + { + "epoch": 1.288140955921612, + "grad_norm": 1.306342828026448, + "learning_rate": 7.0435301429309145e-06, + "loss": 0.8824, + "step": 32570 + }, + { + "epoch": 1.288536455140501, + "grad_norm": 1.3214931389988513, + "learning_rate": 7.041429887744938e-06, + "loss": 0.8465, + "step": 32580 + }, + { + "epoch": 1.28893195435939, + "grad_norm": 0.9889328389785522, + "learning_rate": 7.039329200219087e-06, + "loss": 0.858, + "step": 32590 + }, + { + "epoch": 1.289327453578279, + "grad_norm": 1.1349958066128845, + "learning_rate": 7.0372280807982484e-06, + "loss": 0.8712, + "step": 32600 + }, + { + "epoch": 1.289722952797168, + "grad_norm": 1.258006047398987, + "learning_rate": 7.035126529927405e-06, + "loss": 0.847, + "step": 32610 + }, + { + "epoch": 1.2901184520160573, + "grad_norm": 1.1023639853239255, + "learning_rate": 7.033024548051629e-06, + "loss": 0.8653, + "step": 32620 + }, + { + "epoch": 1.2905139512349464, + "grad_norm": 1.3152795537492687, + "learning_rate": 7.030922135616083e-06, + "loss": 0.8741, + "step": 32630 + }, + { + "epoch": 1.2909094504538354, + "grad_norm": 1.1806476932698309, + "learning_rate": 7.028819293066024e-06, + "loss": 0.8672, + "step": 32640 + }, + { + "epoch": 1.2913049496727245, + "grad_norm": 1.1141376258193387, + "learning_rate": 7.026716020846796e-06, + "loss": 0.8784, + "step": 32650 + }, + { + "epoch": 1.2917004488916135, + "grad_norm": 1.2529273972844328, + "learning_rate": 7.0246123194038365e-06, + "loss": 0.8804, + "step": 32660 + }, + { + "epoch": 1.2920959481105025, + "grad_norm": 1.4931997642184218, + "learning_rate": 7.022508189182674e-06, + "loss": 0.8303, + "step": 32670 + }, + { + "epoch": 1.2924914473293916, + "grad_norm": 1.3230802120903722, + "learning_rate": 7.020403630628928e-06, + "loss": 0.8566, + "step": 32680 + }, + { + "epoch": 1.2928869465482806, + "grad_norm": 1.227822924653676, + "learning_rate": 7.018298644188306e-06, + "loss": 0.8539, + "step": 32690 + }, + { + "epoch": 1.2932824457671697, + "grad_norm": 1.1533594887464231, + "learning_rate": 7.016193230306609e-06, + "loss": 0.8614, + "step": 32700 + }, + { + "epoch": 1.2936779449860587, + "grad_norm": 1.1929006616099462, + "learning_rate": 7.014087389429729e-06, + "loss": 0.8575, + "step": 32710 + }, + { + "epoch": 1.2940734442049477, + "grad_norm": 1.1832882787041001, + "learning_rate": 7.011981122003644e-06, + "loss": 0.8453, + "step": 32720 + }, + { + "epoch": 1.2944689434238368, + "grad_norm": 1.1516203437159556, + "learning_rate": 7.009874428474428e-06, + "loss": 0.8541, + "step": 32730 + }, + { + "epoch": 1.2948644426427258, + "grad_norm": 1.2584537903986432, + "learning_rate": 7.007767309288241e-06, + "loss": 0.8529, + "step": 32740 + }, + { + "epoch": 1.2952599418616149, + "grad_norm": 1.279138274868544, + "learning_rate": 7.005659764891336e-06, + "loss": 0.881, + "step": 32750 + }, + { + "epoch": 1.295655441080504, + "grad_norm": 1.2066545095486305, + "learning_rate": 7.003551795730053e-06, + "loss": 0.8529, + "step": 32760 + }, + { + "epoch": 1.296050940299393, + "grad_norm": 1.330795180593828, + "learning_rate": 7.001443402250827e-06, + "loss": 0.8731, + "step": 32770 + }, + { + "epoch": 1.296446439518282, + "grad_norm": 1.2399417490803584, + "learning_rate": 6.999334584900176e-06, + "loss": 0.8499, + "step": 32780 + }, + { + "epoch": 1.296841938737171, + "grad_norm": 1.1636648029297088, + "learning_rate": 6.997225344124713e-06, + "loss": 0.8628, + "step": 32790 + }, + { + "epoch": 1.29723743795606, + "grad_norm": 1.307264817924364, + "learning_rate": 6.99511568037114e-06, + "loss": 0.8615, + "step": 32800 + }, + { + "epoch": 1.297632937174949, + "grad_norm": 1.1892063499841834, + "learning_rate": 6.993005594086245e-06, + "loss": 0.8601, + "step": 32810 + }, + { + "epoch": 1.2980284363938381, + "grad_norm": 1.2240503677208405, + "learning_rate": 6.99089508571691e-06, + "loss": 0.8462, + "step": 32820 + }, + { + "epoch": 1.2984239356127272, + "grad_norm": 1.2772172323264246, + "learning_rate": 6.988784155710104e-06, + "loss": 0.8637, + "step": 32830 + }, + { + "epoch": 1.2988194348316162, + "grad_norm": 1.2301862880175405, + "learning_rate": 6.9866728045128865e-06, + "loss": 0.8436, + "step": 32840 + }, + { + "epoch": 1.2992149340505053, + "grad_norm": 1.6513206215345482, + "learning_rate": 6.9845610325724055e-06, + "loss": 0.8576, + "step": 32850 + }, + { + "epoch": 1.2996104332693943, + "grad_norm": 1.239212025168037, + "learning_rate": 6.982448840335898e-06, + "loss": 0.8627, + "step": 32860 + }, + { + "epoch": 1.3000059324882833, + "grad_norm": 1.2851046566080357, + "learning_rate": 6.980336228250688e-06, + "loss": 0.8559, + "step": 32870 + }, + { + "epoch": 1.3004014317071724, + "grad_norm": 1.5152286359169753, + "learning_rate": 6.978223196764193e-06, + "loss": 0.8715, + "step": 32880 + }, + { + "epoch": 1.3007969309260614, + "grad_norm": 1.1709620035776358, + "learning_rate": 6.976109746323918e-06, + "loss": 0.88, + "step": 32890 + }, + { + "epoch": 1.3011924301449505, + "grad_norm": 1.3306903177738802, + "learning_rate": 6.973995877377452e-06, + "loss": 0.8581, + "step": 32900 + }, + { + "epoch": 1.3015879293638395, + "grad_norm": 1.330333660144508, + "learning_rate": 6.971881590372478e-06, + "loss": 0.8667, + "step": 32910 + }, + { + "epoch": 1.3019834285827285, + "grad_norm": 1.3420758118515692, + "learning_rate": 6.969766885756768e-06, + "loss": 0.8543, + "step": 32920 + }, + { + "epoch": 1.3023789278016176, + "grad_norm": 1.1526467787467316, + "learning_rate": 6.967651763978176e-06, + "loss": 0.8798, + "step": 32930 + }, + { + "epoch": 1.3027744270205066, + "grad_norm": 1.1873607573663607, + "learning_rate": 6.96553622548465e-06, + "loss": 0.8423, + "step": 32940 + }, + { + "epoch": 1.3031699262393956, + "grad_norm": 1.0944298290898553, + "learning_rate": 6.963420270724226e-06, + "loss": 0.8833, + "step": 32950 + }, + { + "epoch": 1.3035654254582847, + "grad_norm": 1.3518531121827924, + "learning_rate": 6.961303900145026e-06, + "loss": 0.8568, + "step": 32960 + }, + { + "epoch": 1.3039609246771737, + "grad_norm": 1.1321459510444931, + "learning_rate": 6.959187114195263e-06, + "loss": 0.8567, + "step": 32970 + }, + { + "epoch": 1.3043564238960628, + "grad_norm": 1.327022156615552, + "learning_rate": 6.957069913323235e-06, + "loss": 0.8443, + "step": 32980 + }, + { + "epoch": 1.3047519231149518, + "grad_norm": 1.2173157735998366, + "learning_rate": 6.954952297977326e-06, + "loss": 0.8665, + "step": 32990 + }, + { + "epoch": 1.3051474223338408, + "grad_norm": 1.0807481929278648, + "learning_rate": 6.952834268606012e-06, + "loss": 0.8777, + "step": 33000 + }, + { + "epoch": 1.3055429215527299, + "grad_norm": 1.3742677359684001, + "learning_rate": 6.95071582565786e-06, + "loss": 0.8525, + "step": 33010 + }, + { + "epoch": 1.305938420771619, + "grad_norm": 1.3606600450995943, + "learning_rate": 6.948596969581514e-06, + "loss": 0.8384, + "step": 33020 + }, + { + "epoch": 1.306333919990508, + "grad_norm": 1.136158220829523, + "learning_rate": 6.9464777008257134e-06, + "loss": 0.8752, + "step": 33030 + }, + { + "epoch": 1.306729419209397, + "grad_norm": 1.1861248660522725, + "learning_rate": 6.944358019839282e-06, + "loss": 0.8538, + "step": 33040 + }, + { + "epoch": 1.307124918428286, + "grad_norm": 1.329806111620537, + "learning_rate": 6.942237927071136e-06, + "loss": 0.8732, + "step": 33050 + }, + { + "epoch": 1.307520417647175, + "grad_norm": 1.206357477593603, + "learning_rate": 6.940117422970269e-06, + "loss": 0.8291, + "step": 33060 + }, + { + "epoch": 1.3079159168660641, + "grad_norm": 1.3414244290383166, + "learning_rate": 6.937996507985772e-06, + "loss": 0.8689, + "step": 33070 + }, + { + "epoch": 1.3083114160849532, + "grad_norm": 1.137362444088307, + "learning_rate": 6.935875182566817e-06, + "loss": 0.8615, + "step": 33080 + }, + { + "epoch": 1.3087069153038422, + "grad_norm": 1.252691957291996, + "learning_rate": 6.933753447162663e-06, + "loss": 0.8665, + "step": 33090 + }, + { + "epoch": 1.3091024145227312, + "grad_norm": 1.2378405415961447, + "learning_rate": 6.931631302222659e-06, + "loss": 0.8657, + "step": 33100 + }, + { + "epoch": 1.3094979137416203, + "grad_norm": 1.2054794615371671, + "learning_rate": 6.929508748196238e-06, + "loss": 0.8667, + "step": 33110 + }, + { + "epoch": 1.3098934129605095, + "grad_norm": 1.291034891508623, + "learning_rate": 6.9273857855329205e-06, + "loss": 0.8463, + "step": 33120 + }, + { + "epoch": 1.3102889121793986, + "grad_norm": 1.3620104750879796, + "learning_rate": 6.9252624146823145e-06, + "loss": 0.8573, + "step": 33130 + }, + { + "epoch": 1.3106844113982876, + "grad_norm": 1.1131473303813413, + "learning_rate": 6.923138636094112e-06, + "loss": 0.8596, + "step": 33140 + }, + { + "epoch": 1.3110799106171767, + "grad_norm": 1.344715684183009, + "learning_rate": 6.921014450218096e-06, + "loss": 0.8556, + "step": 33150 + }, + { + "epoch": 1.3114754098360657, + "grad_norm": 1.3155803367901568, + "learning_rate": 6.9188898575041285e-06, + "loss": 0.8596, + "step": 33160 + }, + { + "epoch": 1.3118709090549547, + "grad_norm": 1.3241146761456173, + "learning_rate": 6.916764858402165e-06, + "loss": 0.8465, + "step": 33170 + }, + { + "epoch": 1.3122664082738438, + "grad_norm": 1.143356208665181, + "learning_rate": 6.914639453362243e-06, + "loss": 0.873, + "step": 33180 + }, + { + "epoch": 1.3126619074927328, + "grad_norm": 1.0973811587257378, + "learning_rate": 6.912513642834486e-06, + "loss": 0.8522, + "step": 33190 + }, + { + "epoch": 1.3130574067116219, + "grad_norm": 1.185291657251887, + "learning_rate": 6.910387427269105e-06, + "loss": 0.8321, + "step": 33200 + }, + { + "epoch": 1.313452905930511, + "grad_norm": 1.2142142428488651, + "learning_rate": 6.908260807116397e-06, + "loss": 0.8692, + "step": 33210 + }, + { + "epoch": 1.3138484051494, + "grad_norm": 1.1024804805762416, + "learning_rate": 6.906133782826743e-06, + "loss": 0.8821, + "step": 33220 + }, + { + "epoch": 1.314243904368289, + "grad_norm": 1.1021963214872073, + "learning_rate": 6.904006354850609e-06, + "loss": 0.8774, + "step": 33230 + }, + { + "epoch": 1.314639403587178, + "grad_norm": 1.1987763176503048, + "learning_rate": 6.90187852363855e-06, + "loss": 0.872, + "step": 33240 + }, + { + "epoch": 1.315034902806067, + "grad_norm": 1.3930806447843427, + "learning_rate": 6.899750289641203e-06, + "loss": 0.8433, + "step": 33250 + }, + { + "epoch": 1.315430402024956, + "grad_norm": 1.247507100098502, + "learning_rate": 6.89762165330929e-06, + "loss": 0.8576, + "step": 33260 + }, + { + "epoch": 1.3158259012438451, + "grad_norm": 1.2370389380123525, + "learning_rate": 6.895492615093622e-06, + "loss": 0.8533, + "step": 33270 + }, + { + "epoch": 1.3162214004627342, + "grad_norm": 1.158688418804201, + "learning_rate": 6.893363175445091e-06, + "loss": 0.8695, + "step": 33280 + }, + { + "epoch": 1.3166168996816232, + "grad_norm": 1.2126993221703164, + "learning_rate": 6.891233334814679e-06, + "loss": 0.8694, + "step": 33290 + }, + { + "epoch": 1.3170123989005122, + "grad_norm": 1.086577661018436, + "learning_rate": 6.889103093653446e-06, + "loss": 0.8553, + "step": 33300 + }, + { + "epoch": 1.3174078981194013, + "grad_norm": 1.2486947425332644, + "learning_rate": 6.886972452412544e-06, + "loss": 0.8694, + "step": 33310 + }, + { + "epoch": 1.3178033973382903, + "grad_norm": 1.1760538021656208, + "learning_rate": 6.8848414115432015e-06, + "loss": 0.8639, + "step": 33320 + }, + { + "epoch": 1.3181988965571794, + "grad_norm": 1.1807796116246225, + "learning_rate": 6.882709971496742e-06, + "loss": 0.8454, + "step": 33330 + }, + { + "epoch": 1.3185943957760684, + "grad_norm": 1.240878394666726, + "learning_rate": 6.880578132724564e-06, + "loss": 0.8546, + "step": 33340 + }, + { + "epoch": 1.3189898949949574, + "grad_norm": 1.1069008351349787, + "learning_rate": 6.8784458956781585e-06, + "loss": 0.8496, + "step": 33350 + }, + { + "epoch": 1.3193853942138465, + "grad_norm": 1.2234019262391245, + "learning_rate": 6.876313260809091e-06, + "loss": 0.8555, + "step": 33360 + }, + { + "epoch": 1.3197808934327355, + "grad_norm": 1.2111034994332135, + "learning_rate": 6.8741802285690215e-06, + "loss": 0.8604, + "step": 33370 + }, + { + "epoch": 1.3201763926516246, + "grad_norm": 1.3217721402187455, + "learning_rate": 6.872046799409688e-06, + "loss": 0.8537, + "step": 33380 + }, + { + "epoch": 1.3205718918705136, + "grad_norm": 1.4869478535469731, + "learning_rate": 6.869912973782916e-06, + "loss": 0.8646, + "step": 33390 + }, + { + "epoch": 1.3209673910894026, + "grad_norm": 1.3113989351988935, + "learning_rate": 6.8677787521406106e-06, + "loss": 0.8518, + "step": 33400 + }, + { + "epoch": 1.3213628903082917, + "grad_norm": 1.2487815947994365, + "learning_rate": 6.865644134934765e-06, + "loss": 0.8489, + "step": 33410 + }, + { + "epoch": 1.3217583895271807, + "grad_norm": 1.3276884583763622, + "learning_rate": 6.863509122617455e-06, + "loss": 0.8794, + "step": 33420 + }, + { + "epoch": 1.3221538887460698, + "grad_norm": 1.2430325573801633, + "learning_rate": 6.861373715640838e-06, + "loss": 0.8557, + "step": 33430 + }, + { + "epoch": 1.3225493879649588, + "grad_norm": 1.1889424770083057, + "learning_rate": 6.859237914457158e-06, + "loss": 0.8544, + "step": 33440 + }, + { + "epoch": 1.3229448871838478, + "grad_norm": 1.0634004600507065, + "learning_rate": 6.857101719518741e-06, + "loss": 0.8466, + "step": 33450 + }, + { + "epoch": 1.3233403864027369, + "grad_norm": 1.3031637661453688, + "learning_rate": 6.854965131277994e-06, + "loss": 0.8485, + "step": 33460 + }, + { + "epoch": 1.323735885621626, + "grad_norm": 1.3549717105227457, + "learning_rate": 6.8528281501874125e-06, + "loss": 0.8518, + "step": 33470 + }, + { + "epoch": 1.324131384840515, + "grad_norm": 1.2769228880239212, + "learning_rate": 6.850690776699574e-06, + "loss": 0.8333, + "step": 33480 + }, + { + "epoch": 1.324526884059404, + "grad_norm": 1.4371119507840768, + "learning_rate": 6.848553011267133e-06, + "loss": 0.8389, + "step": 33490 + }, + { + "epoch": 1.324922383278293, + "grad_norm": 1.1592394839803528, + "learning_rate": 6.846414854342834e-06, + "loss": 0.852, + "step": 33500 + }, + { + "epoch": 1.325317882497182, + "grad_norm": 1.1356945536930556, + "learning_rate": 6.844276306379502e-06, + "loss": 0.8475, + "step": 33510 + }, + { + "epoch": 1.3257133817160711, + "grad_norm": 1.3949911427559105, + "learning_rate": 6.8421373678300455e-06, + "loss": 0.8538, + "step": 33520 + }, + { + "epoch": 1.3261088809349602, + "grad_norm": 1.0593474204779023, + "learning_rate": 6.839998039147454e-06, + "loss": 0.8697, + "step": 33530 + }, + { + "epoch": 1.3265043801538492, + "grad_norm": 1.318571640354346, + "learning_rate": 6.837858320784801e-06, + "loss": 0.8623, + "step": 33540 + }, + { + "epoch": 1.3268998793727382, + "grad_norm": 1.2059943115831888, + "learning_rate": 6.835718213195242e-06, + "loss": 0.8628, + "step": 33550 + }, + { + "epoch": 1.3272953785916273, + "grad_norm": 1.55282491071514, + "learning_rate": 6.833577716832016e-06, + "loss": 0.8562, + "step": 33560 + }, + { + "epoch": 1.3276908778105163, + "grad_norm": 1.1657498620919489, + "learning_rate": 6.831436832148443e-06, + "loss": 0.8569, + "step": 33570 + }, + { + "epoch": 1.3280863770294054, + "grad_norm": 1.2380918473400322, + "learning_rate": 6.829295559597924e-06, + "loss": 0.8212, + "step": 33580 + }, + { + "epoch": 1.3284818762482944, + "grad_norm": 1.2489695909262502, + "learning_rate": 6.827153899633947e-06, + "loss": 0.865, + "step": 33590 + }, + { + "epoch": 1.3288773754671834, + "grad_norm": 1.3043823752910852, + "learning_rate": 6.825011852710077e-06, + "loss": 0.8637, + "step": 33600 + }, + { + "epoch": 1.3292728746860725, + "grad_norm": 1.2951103741104748, + "learning_rate": 6.822869419279963e-06, + "loss": 0.8836, + "step": 33610 + }, + { + "epoch": 1.3296683739049615, + "grad_norm": 1.2701479460945941, + "learning_rate": 6.8207265997973335e-06, + "loss": 0.8493, + "step": 33620 + }, + { + "epoch": 1.3300638731238505, + "grad_norm": 1.2454785089849922, + "learning_rate": 6.818583394716005e-06, + "loss": 0.8623, + "step": 33630 + }, + { + "epoch": 1.3304593723427396, + "grad_norm": 1.0708018093820824, + "learning_rate": 6.816439804489869e-06, + "loss": 0.8566, + "step": 33640 + }, + { + "epoch": 1.3308548715616286, + "grad_norm": 1.4554464804721812, + "learning_rate": 6.814295829572904e-06, + "loss": 0.8418, + "step": 33650 + }, + { + "epoch": 1.3312503707805177, + "grad_norm": 1.1497445530154269, + "learning_rate": 6.812151470419164e-06, + "loss": 0.8757, + "step": 33660 + }, + { + "epoch": 1.3316458699994067, + "grad_norm": 1.3327215587089964, + "learning_rate": 6.810006727482789e-06, + "loss": 0.8093, + "step": 33670 + }, + { + "epoch": 1.3320413692182957, + "grad_norm": 1.2933360065205524, + "learning_rate": 6.807861601217998e-06, + "loss": 0.8532, + "step": 33680 + }, + { + "epoch": 1.3324368684371848, + "grad_norm": 1.3914905252875656, + "learning_rate": 6.805716092079093e-06, + "loss": 0.861, + "step": 33690 + }, + { + "epoch": 1.3328323676560738, + "grad_norm": 1.200523756782097, + "learning_rate": 6.803570200520455e-06, + "loss": 0.8316, + "step": 33700 + }, + { + "epoch": 1.3332278668749629, + "grad_norm": 1.1503021028837674, + "learning_rate": 6.801423926996547e-06, + "loss": 0.8557, + "step": 33710 + }, + { + "epoch": 1.333623366093852, + "grad_norm": 1.4068837535234189, + "learning_rate": 6.799277271961915e-06, + "loss": 0.8661, + "step": 33720 + }, + { + "epoch": 1.334018865312741, + "grad_norm": 1.1911164786380948, + "learning_rate": 6.797130235871182e-06, + "loss": 0.8535, + "step": 33730 + }, + { + "epoch": 1.33441436453163, + "grad_norm": 1.1319739215457856, + "learning_rate": 6.794982819179053e-06, + "loss": 0.8611, + "step": 33740 + }, + { + "epoch": 1.334809863750519, + "grad_norm": 1.2673198760909448, + "learning_rate": 6.7928350223403164e-06, + "loss": 0.8591, + "step": 33750 + }, + { + "epoch": 1.335205362969408, + "grad_norm": 1.0168022634246991, + "learning_rate": 6.790686845809835e-06, + "loss": 0.8707, + "step": 33760 + }, + { + "epoch": 1.335600862188297, + "grad_norm": 1.2179878258959906, + "learning_rate": 6.788538290042559e-06, + "loss": 0.8362, + "step": 33770 + }, + { + "epoch": 1.3359963614071861, + "grad_norm": 1.2245559461484272, + "learning_rate": 6.7863893554935165e-06, + "loss": 0.8525, + "step": 33780 + }, + { + "epoch": 1.3363918606260752, + "grad_norm": 1.2498041038841285, + "learning_rate": 6.784240042617811e-06, + "loss": 0.8831, + "step": 33790 + }, + { + "epoch": 1.3367873598449642, + "grad_norm": 1.1430104987234004, + "learning_rate": 6.782090351870634e-06, + "loss": 0.8652, + "step": 33800 + }, + { + "epoch": 1.3371828590638533, + "grad_norm": 1.3902395695236862, + "learning_rate": 6.77994028370725e-06, + "loss": 0.8486, + "step": 33810 + }, + { + "epoch": 1.3375783582827423, + "grad_norm": 1.4514697568846802, + "learning_rate": 6.777789838583009e-06, + "loss": 0.8411, + "step": 33820 + }, + { + "epoch": 1.3379738575016313, + "grad_norm": 1.337396064903591, + "learning_rate": 6.775639016953337e-06, + "loss": 0.875, + "step": 33830 + }, + { + "epoch": 1.3383693567205204, + "grad_norm": 1.3687051820712348, + "learning_rate": 6.773487819273743e-06, + "loss": 0.8469, + "step": 33840 + }, + { + "epoch": 1.3387648559394094, + "grad_norm": 1.2431201248578243, + "learning_rate": 6.771336245999812e-06, + "loss": 0.8405, + "step": 33850 + }, + { + "epoch": 1.3391603551582985, + "grad_norm": 1.3211559631646084, + "learning_rate": 6.769184297587211e-06, + "loss": 0.8824, + "step": 33860 + }, + { + "epoch": 1.3395558543771875, + "grad_norm": 1.2124737620325858, + "learning_rate": 6.767031974491686e-06, + "loss": 0.8442, + "step": 33870 + }, + { + "epoch": 1.3399513535960765, + "grad_norm": 1.3429892973983668, + "learning_rate": 6.7648792771690605e-06, + "loss": 0.8538, + "step": 33880 + }, + { + "epoch": 1.3403468528149656, + "grad_norm": 1.2078114742764365, + "learning_rate": 6.762726206075243e-06, + "loss": 0.8583, + "step": 33890 + }, + { + "epoch": 1.3407423520338546, + "grad_norm": 1.2348618830683444, + "learning_rate": 6.760572761666213e-06, + "loss": 0.8637, + "step": 33900 + }, + { + "epoch": 1.3411378512527437, + "grad_norm": 1.1934770414561016, + "learning_rate": 6.758418944398034e-06, + "loss": 0.8365, + "step": 33910 + }, + { + "epoch": 1.3415333504716327, + "grad_norm": 1.3589564437514263, + "learning_rate": 6.75626475472685e-06, + "loss": 0.844, + "step": 33920 + }, + { + "epoch": 1.3419288496905217, + "grad_norm": 1.1779583198085724, + "learning_rate": 6.754110193108878e-06, + "loss": 0.8483, + "step": 33930 + }, + { + "epoch": 1.3423243489094108, + "grad_norm": 1.299740785271766, + "learning_rate": 6.751955260000419e-06, + "loss": 0.8646, + "step": 33940 + }, + { + "epoch": 1.3427198481282998, + "grad_norm": 1.5451437127726837, + "learning_rate": 6.74979995585785e-06, + "loss": 0.8327, + "step": 33950 + }, + { + "epoch": 1.343115347347189, + "grad_norm": 1.1520028216361868, + "learning_rate": 6.74764428113763e-06, + "loss": 0.8452, + "step": 33960 + }, + { + "epoch": 1.343510846566078, + "grad_norm": 1.4193225343393296, + "learning_rate": 6.7454882362962914e-06, + "loss": 0.8527, + "step": 33970 + }, + { + "epoch": 1.3439063457849671, + "grad_norm": 1.3050468634412833, + "learning_rate": 6.743331821790449e-06, + "loss": 0.8536, + "step": 33980 + }, + { + "epoch": 1.3443018450038562, + "grad_norm": 1.1616293448504966, + "learning_rate": 6.741175038076792e-06, + "loss": 0.8427, + "step": 33990 + }, + { + "epoch": 1.3446973442227452, + "grad_norm": 1.1662503446306764, + "learning_rate": 6.739017885612094e-06, + "loss": 0.8633, + "step": 34000 + }, + { + "epoch": 1.3450928434416343, + "grad_norm": 1.2772301061234932, + "learning_rate": 6.7368603648532e-06, + "loss": 0.8514, + "step": 34010 + }, + { + "epoch": 1.3454883426605233, + "grad_norm": 1.4417098315570034, + "learning_rate": 6.7347024762570366e-06, + "loss": 0.8354, + "step": 34020 + }, + { + "epoch": 1.3458838418794123, + "grad_norm": 1.164445967431401, + "learning_rate": 6.732544220280609e-06, + "loss": 0.8376, + "step": 34030 + }, + { + "epoch": 1.3462793410983014, + "grad_norm": 1.251551643742644, + "learning_rate": 6.730385597380997e-06, + "loss": 0.8511, + "step": 34040 + }, + { + "epoch": 1.3466748403171904, + "grad_norm": 1.1414447379897568, + "learning_rate": 6.728226608015361e-06, + "loss": 0.8296, + "step": 34050 + }, + { + "epoch": 1.3470703395360795, + "grad_norm": 1.2432911912788316, + "learning_rate": 6.726067252640938e-06, + "loss": 0.8437, + "step": 34060 + }, + { + "epoch": 1.3474658387549685, + "grad_norm": 1.3156479686323665, + "learning_rate": 6.723907531715042e-06, + "loss": 0.8248, + "step": 34070 + }, + { + "epoch": 1.3478613379738575, + "grad_norm": 1.3211936006580414, + "learning_rate": 6.721747445695065e-06, + "loss": 0.8214, + "step": 34080 + }, + { + "epoch": 1.3482568371927466, + "grad_norm": 1.3908615375464553, + "learning_rate": 6.719586995038478e-06, + "loss": 0.8495, + "step": 34090 + }, + { + "epoch": 1.3486523364116356, + "grad_norm": 1.3802599431946245, + "learning_rate": 6.717426180202824e-06, + "loss": 0.8368, + "step": 34100 + }, + { + "epoch": 1.3490478356305247, + "grad_norm": 1.2727497744015028, + "learning_rate": 6.715265001645727e-06, + "loss": 0.8541, + "step": 34110 + }, + { + "epoch": 1.3494433348494137, + "grad_norm": 1.4506087109245869, + "learning_rate": 6.713103459824892e-06, + "loss": 0.8588, + "step": 34120 + }, + { + "epoch": 1.3498388340683027, + "grad_norm": 1.3463215218321456, + "learning_rate": 6.710941555198092e-06, + "loss": 0.843, + "step": 34130 + }, + { + "epoch": 1.3502343332871918, + "grad_norm": 1.3087961888434054, + "learning_rate": 6.708779288223182e-06, + "loss": 0.8451, + "step": 34140 + }, + { + "epoch": 1.3506298325060808, + "grad_norm": 1.1124481495965013, + "learning_rate": 6.706616659358094e-06, + "loss": 0.8414, + "step": 34150 + }, + { + "epoch": 1.3510253317249699, + "grad_norm": 1.2690093272598362, + "learning_rate": 6.704453669060838e-06, + "loss": 0.8534, + "step": 34160 + }, + { + "epoch": 1.351420830943859, + "grad_norm": 1.1536777586610498, + "learning_rate": 6.702290317789493e-06, + "loss": 0.8673, + "step": 34170 + }, + { + "epoch": 1.351816330162748, + "grad_norm": 1.378546467478324, + "learning_rate": 6.700126606002224e-06, + "loss": 0.8476, + "step": 34180 + }, + { + "epoch": 1.352211829381637, + "grad_norm": 1.1295504136948702, + "learning_rate": 6.697962534157266e-06, + "loss": 0.8521, + "step": 34190 + }, + { + "epoch": 1.352607328600526, + "grad_norm": 1.2999837116331456, + "learning_rate": 6.695798102712934e-06, + "loss": 0.843, + "step": 34200 + }, + { + "epoch": 1.353002827819415, + "grad_norm": 1.453651270531441, + "learning_rate": 6.693633312127617e-06, + "loss": 0.8429, + "step": 34210 + }, + { + "epoch": 1.353398327038304, + "grad_norm": 1.0953123100106354, + "learning_rate": 6.691468162859779e-06, + "loss": 0.8411, + "step": 34220 + }, + { + "epoch": 1.3537938262571931, + "grad_norm": 1.410424971680694, + "learning_rate": 6.689302655367962e-06, + "loss": 0.85, + "step": 34230 + }, + { + "epoch": 1.3541893254760822, + "grad_norm": 1.225733223059829, + "learning_rate": 6.687136790110786e-06, + "loss": 0.8684, + "step": 34240 + }, + { + "epoch": 1.3545848246949712, + "grad_norm": 1.18010720556023, + "learning_rate": 6.68497056754694e-06, + "loss": 0.8368, + "step": 34250 + }, + { + "epoch": 1.3549803239138603, + "grad_norm": 1.3575984163740973, + "learning_rate": 6.682803988135196e-06, + "loss": 0.8386, + "step": 34260 + }, + { + "epoch": 1.3553758231327493, + "grad_norm": 1.2307639756995665, + "learning_rate": 6.680637052334399e-06, + "loss": 0.855, + "step": 34270 + }, + { + "epoch": 1.3557713223516383, + "grad_norm": 1.4622867136964524, + "learning_rate": 6.678469760603465e-06, + "loss": 0.8356, + "step": 34280 + }, + { + "epoch": 1.3561668215705274, + "grad_norm": 1.2123444778673635, + "learning_rate": 6.676302113401393e-06, + "loss": 0.863, + "step": 34290 + }, + { + "epoch": 1.3565623207894164, + "grad_norm": 1.1566884293731667, + "learning_rate": 6.674134111187252e-06, + "loss": 0.8462, + "step": 34300 + }, + { + "epoch": 1.3569578200083054, + "grad_norm": 1.5245027198315069, + "learning_rate": 6.671965754420187e-06, + "loss": 0.8551, + "step": 34310 + }, + { + "epoch": 1.3573533192271945, + "grad_norm": 1.5503509937131879, + "learning_rate": 6.669797043559419e-06, + "loss": 0.8534, + "step": 34320 + }, + { + "epoch": 1.3577488184460835, + "grad_norm": 1.5161801579136003, + "learning_rate": 6.667627979064246e-06, + "loss": 0.8629, + "step": 34330 + }, + { + "epoch": 1.3581443176649726, + "grad_norm": 1.3933323360333776, + "learning_rate": 6.665458561394037e-06, + "loss": 0.8188, + "step": 34340 + }, + { + "epoch": 1.3585398168838616, + "grad_norm": 1.2125322859327403, + "learning_rate": 6.663288791008238e-06, + "loss": 0.8671, + "step": 34350 + }, + { + "epoch": 1.3589353161027506, + "grad_norm": 1.09181004939391, + "learning_rate": 6.661118668366369e-06, + "loss": 0.8522, + "step": 34360 + }, + { + "epoch": 1.3593308153216397, + "grad_norm": 1.1541803179161123, + "learning_rate": 6.658948193928023e-06, + "loss": 0.8571, + "step": 34370 + }, + { + "epoch": 1.3597263145405287, + "grad_norm": 1.1401198392344825, + "learning_rate": 6.656777368152871e-06, + "loss": 0.8594, + "step": 34380 + }, + { + "epoch": 1.3601218137594178, + "grad_norm": 1.0777414809532697, + "learning_rate": 6.654606191500659e-06, + "loss": 0.8619, + "step": 34390 + }, + { + "epoch": 1.3605173129783068, + "grad_norm": 1.182354873528943, + "learning_rate": 6.6524346644311995e-06, + "loss": 0.8496, + "step": 34400 + }, + { + "epoch": 1.3609128121971958, + "grad_norm": 1.22832152985886, + "learning_rate": 6.65026278740439e-06, + "loss": 0.8707, + "step": 34410 + }, + { + "epoch": 1.3613083114160849, + "grad_norm": 1.0734913876282295, + "learning_rate": 6.648090560880194e-06, + "loss": 0.8555, + "step": 34420 + }, + { + "epoch": 1.361703810634974, + "grad_norm": 1.3219991065495482, + "learning_rate": 6.645917985318653e-06, + "loss": 0.8441, + "step": 34430 + }, + { + "epoch": 1.362099309853863, + "grad_norm": 1.3031809090034743, + "learning_rate": 6.6437450611798805e-06, + "loss": 0.8383, + "step": 34440 + }, + { + "epoch": 1.362494809072752, + "grad_norm": 1.2354011042759256, + "learning_rate": 6.641571788924065e-06, + "loss": 0.8269, + "step": 34450 + }, + { + "epoch": 1.3628903082916413, + "grad_norm": 1.2798956619441368, + "learning_rate": 6.63939816901147e-06, + "loss": 0.8216, + "step": 34460 + }, + { + "epoch": 1.3632858075105303, + "grad_norm": 1.3927668271795661, + "learning_rate": 6.637224201902427e-06, + "loss": 0.8109, + "step": 34470 + }, + { + "epoch": 1.3636813067294193, + "grad_norm": 1.2234481608957164, + "learning_rate": 6.635049888057348e-06, + "loss": 0.8444, + "step": 34480 + }, + { + "epoch": 1.3640768059483084, + "grad_norm": 1.2518580063674394, + "learning_rate": 6.632875227936715e-06, + "loss": 0.8522, + "step": 34490 + }, + { + "epoch": 1.3644723051671974, + "grad_norm": 1.4531456630970876, + "learning_rate": 6.6307002220010826e-06, + "loss": 0.821, + "step": 34500 + }, + { + "epoch": 1.3648678043860865, + "grad_norm": 1.269765392683203, + "learning_rate": 6.6285248707110816e-06, + "loss": 0.8729, + "step": 34510 + }, + { + "epoch": 1.3652633036049755, + "grad_norm": 1.275439457296418, + "learning_rate": 6.626349174527413e-06, + "loss": 0.8522, + "step": 34520 + }, + { + "epoch": 1.3656588028238645, + "grad_norm": 1.247794904540906, + "learning_rate": 6.624173133910852e-06, + "loss": 0.8498, + "step": 34530 + }, + { + "epoch": 1.3660543020427536, + "grad_norm": 1.3635336662393571, + "learning_rate": 6.621996749322247e-06, + "loss": 0.8154, + "step": 34540 + }, + { + "epoch": 1.3664498012616426, + "grad_norm": 1.2233774831688915, + "learning_rate": 6.619820021222518e-06, + "loss": 0.8413, + "step": 34550 + }, + { + "epoch": 1.3668453004805317, + "grad_norm": 1.4141520546962856, + "learning_rate": 6.61764295007266e-06, + "loss": 0.839, + "step": 34560 + }, + { + "epoch": 1.3672407996994207, + "grad_norm": 1.22293431899252, + "learning_rate": 6.61546553633374e-06, + "loss": 0.8267, + "step": 34570 + }, + { + "epoch": 1.3676362989183097, + "grad_norm": 1.1000438815382645, + "learning_rate": 6.613287780466895e-06, + "loss": 0.8605, + "step": 34580 + }, + { + "epoch": 1.3680317981371988, + "grad_norm": 1.1860041159559163, + "learning_rate": 6.6111096829333374e-06, + "loss": 0.8306, + "step": 34590 + }, + { + "epoch": 1.3684272973560878, + "grad_norm": 1.1880673513559445, + "learning_rate": 6.608931244194352e-06, + "loss": 0.8447, + "step": 34600 + }, + { + "epoch": 1.3688227965749769, + "grad_norm": 1.1607562036365922, + "learning_rate": 6.606752464711292e-06, + "loss": 0.8428, + "step": 34610 + }, + { + "epoch": 1.369218295793866, + "grad_norm": 1.3842871467397313, + "learning_rate": 6.604573344945587e-06, + "loss": 0.8142, + "step": 34620 + }, + { + "epoch": 1.369613795012755, + "grad_norm": 1.320298925785561, + "learning_rate": 6.602393885358737e-06, + "loss": 0.8763, + "step": 34630 + }, + { + "epoch": 1.370009294231644, + "grad_norm": 1.2504716389353607, + "learning_rate": 6.600214086412317e-06, + "loss": 0.8396, + "step": 34640 + }, + { + "epoch": 1.370404793450533, + "grad_norm": 1.3968478723056403, + "learning_rate": 6.598033948567968e-06, + "loss": 0.8349, + "step": 34650 + }, + { + "epoch": 1.370800292669422, + "grad_norm": 1.5643325909913062, + "learning_rate": 6.595853472287405e-06, + "loss": 0.845, + "step": 34660 + }, + { + "epoch": 1.371195791888311, + "grad_norm": 1.4103997179463394, + "learning_rate": 6.5936726580324174e-06, + "loss": 0.8409, + "step": 34670 + }, + { + "epoch": 1.3715912911072001, + "grad_norm": 1.305450632095221, + "learning_rate": 6.5914915062648645e-06, + "loss": 0.844, + "step": 34680 + }, + { + "epoch": 1.3719867903260892, + "grad_norm": 1.1567822256370635, + "learning_rate": 6.589310017446675e-06, + "loss": 0.8573, + "step": 34690 + }, + { + "epoch": 1.3723822895449782, + "grad_norm": 1.196494153500487, + "learning_rate": 6.587128192039854e-06, + "loss": 0.8542, + "step": 34700 + }, + { + "epoch": 1.3727777887638672, + "grad_norm": 1.2730896697999838, + "learning_rate": 6.584946030506473e-06, + "loss": 0.8669, + "step": 34710 + }, + { + "epoch": 1.3731732879827563, + "grad_norm": 1.474010777500466, + "learning_rate": 6.5827635333086745e-06, + "loss": 0.8581, + "step": 34720 + }, + { + "epoch": 1.3735687872016453, + "grad_norm": 1.2589007677404542, + "learning_rate": 6.580580700908677e-06, + "loss": 0.8618, + "step": 34730 + }, + { + "epoch": 1.3739642864205344, + "grad_norm": 1.381117426136932, + "learning_rate": 6.578397533768765e-06, + "loss": 0.8431, + "step": 34740 + }, + { + "epoch": 1.3743597856394234, + "grad_norm": 1.2641158865669493, + "learning_rate": 6.576214032351298e-06, + "loss": 0.8439, + "step": 34750 + }, + { + "epoch": 1.3747552848583124, + "grad_norm": 1.3668795160919065, + "learning_rate": 6.574030197118703e-06, + "loss": 0.8275, + "step": 34760 + }, + { + "epoch": 1.3751507840772015, + "grad_norm": 1.0883437943024825, + "learning_rate": 6.5718460285334775e-06, + "loss": 0.844, + "step": 34770 + }, + { + "epoch": 1.3755462832960905, + "grad_norm": 1.2244829089854836, + "learning_rate": 6.5696615270581936e-06, + "loss": 0.8334, + "step": 34780 + }, + { + "epoch": 1.3759417825149796, + "grad_norm": 1.5283477695391745, + "learning_rate": 6.567476693155489e-06, + "loss": 0.8374, + "step": 34790 + }, + { + "epoch": 1.3763372817338686, + "grad_norm": 1.178492190700992, + "learning_rate": 6.565291527288076e-06, + "loss": 0.862, + "step": 34800 + }, + { + "epoch": 1.3767327809527576, + "grad_norm": 1.561607205011345, + "learning_rate": 6.563106029918733e-06, + "loss": 0.8537, + "step": 34810 + }, + { + "epoch": 1.3771282801716467, + "grad_norm": 1.331429117616901, + "learning_rate": 6.5609202015103145e-06, + "loss": 0.8465, + "step": 34820 + }, + { + "epoch": 1.3775237793905357, + "grad_norm": 1.1821776862930318, + "learning_rate": 6.558734042525738e-06, + "loss": 0.8703, + "step": 34830 + }, + { + "epoch": 1.3779192786094248, + "grad_norm": 1.442958513220388, + "learning_rate": 6.556547553427996e-06, + "loss": 0.8476, + "step": 34840 + }, + { + "epoch": 1.3783147778283138, + "grad_norm": 1.366212394312256, + "learning_rate": 6.554360734680148e-06, + "loss": 0.8386, + "step": 34850 + }, + { + "epoch": 1.3787102770472028, + "grad_norm": 1.2017621365721718, + "learning_rate": 6.552173586745327e-06, + "loss": 0.8558, + "step": 34860 + }, + { + "epoch": 1.3791057762660919, + "grad_norm": 1.3860663031720417, + "learning_rate": 6.549986110086733e-06, + "loss": 0.8349, + "step": 34870 + }, + { + "epoch": 1.379501275484981, + "grad_norm": 1.2532840687301947, + "learning_rate": 6.547798305167637e-06, + "loss": 0.8376, + "step": 34880 + }, + { + "epoch": 1.37989677470387, + "grad_norm": 1.18442886458687, + "learning_rate": 6.545610172451374e-06, + "loss": 0.8441, + "step": 34890 + }, + { + "epoch": 1.380292273922759, + "grad_norm": 1.0962881909095727, + "learning_rate": 6.543421712401356e-06, + "loss": 0.84, + "step": 34900 + }, + { + "epoch": 1.380687773141648, + "grad_norm": 1.1955005513882742, + "learning_rate": 6.541232925481064e-06, + "loss": 0.8486, + "step": 34910 + }, + { + "epoch": 1.381083272360537, + "grad_norm": 1.1142234667903104, + "learning_rate": 6.539043812154042e-06, + "loss": 0.8517, + "step": 34920 + }, + { + "epoch": 1.3814787715794261, + "grad_norm": 1.3360733090763144, + "learning_rate": 6.536854372883907e-06, + "loss": 0.8432, + "step": 34930 + }, + { + "epoch": 1.3818742707983152, + "grad_norm": 1.129238387752422, + "learning_rate": 6.534664608134347e-06, + "loss": 0.8493, + "step": 34940 + }, + { + "epoch": 1.3822697700172042, + "grad_norm": 1.3742686297098083, + "learning_rate": 6.532474518369114e-06, + "loss": 0.8096, + "step": 34950 + }, + { + "epoch": 1.3826652692360932, + "grad_norm": 1.107977512462662, + "learning_rate": 6.530284104052034e-06, + "loss": 0.8471, + "step": 34960 + }, + { + "epoch": 1.3830607684549823, + "grad_norm": 1.2079645630114648, + "learning_rate": 6.5280933656469966e-06, + "loss": 0.8422, + "step": 34970 + }, + { + "epoch": 1.3834562676738713, + "grad_norm": 1.248101903361315, + "learning_rate": 6.525902303617964e-06, + "loss": 0.839, + "step": 34980 + }, + { + "epoch": 1.3838517668927603, + "grad_norm": 1.2550406829010792, + "learning_rate": 6.5237109184289645e-06, + "loss": 0.8557, + "step": 34990 + }, + { + "epoch": 1.3842472661116494, + "grad_norm": 1.201568184603744, + "learning_rate": 6.5215192105440986e-06, + "loss": 0.8299, + "step": 35000 + }, + { + "epoch": 1.3846427653305384, + "grad_norm": 1.3283519945453417, + "learning_rate": 6.519327180427532e-06, + "loss": 0.8368, + "step": 35010 + }, + { + "epoch": 1.3850382645494275, + "grad_norm": 1.31814484909803, + "learning_rate": 6.5171348285434965e-06, + "loss": 0.8475, + "step": 35020 + }, + { + "epoch": 1.3854337637683165, + "grad_norm": 1.2239876943549137, + "learning_rate": 6.514942155356295e-06, + "loss": 0.8545, + "step": 35030 + }, + { + "epoch": 1.3858292629872055, + "grad_norm": 1.514229252479359, + "learning_rate": 6.512749161330302e-06, + "loss": 0.8487, + "step": 35040 + }, + { + "epoch": 1.3862247622060946, + "grad_norm": 1.3827485601012202, + "learning_rate": 6.510555846929952e-06, + "loss": 0.8478, + "step": 35050 + }, + { + "epoch": 1.3866202614249836, + "grad_norm": 1.4008524664576336, + "learning_rate": 6.508362212619752e-06, + "loss": 0.8395, + "step": 35060 + }, + { + "epoch": 1.3870157606438727, + "grad_norm": 1.298598566093244, + "learning_rate": 6.506168258864278e-06, + "loss": 0.8348, + "step": 35070 + }, + { + "epoch": 1.3874112598627617, + "grad_norm": 1.1360269054483327, + "learning_rate": 6.503973986128171e-06, + "loss": 0.828, + "step": 35080 + }, + { + "epoch": 1.3878067590816507, + "grad_norm": 1.4389049366690272, + "learning_rate": 6.5017793948761384e-06, + "loss": 0.8268, + "step": 35090 + }, + { + "epoch": 1.3882022583005398, + "grad_norm": 1.2444008890484277, + "learning_rate": 6.499584485572959e-06, + "loss": 0.8358, + "step": 35100 + }, + { + "epoch": 1.3885977575194288, + "grad_norm": 1.3307423019019782, + "learning_rate": 6.497389258683477e-06, + "loss": 0.8327, + "step": 35110 + }, + { + "epoch": 1.3889932567383179, + "grad_norm": 1.269584181289205, + "learning_rate": 6.495193714672604e-06, + "loss": 0.8289, + "step": 35120 + }, + { + "epoch": 1.389388755957207, + "grad_norm": 1.2438158441455647, + "learning_rate": 6.49299785400532e-06, + "loss": 0.8602, + "step": 35130 + }, + { + "epoch": 1.389784255176096, + "grad_norm": 1.1507379633257733, + "learning_rate": 6.490801677146666e-06, + "loss": 0.8424, + "step": 35140 + }, + { + "epoch": 1.390179754394985, + "grad_norm": 1.3685481941316784, + "learning_rate": 6.488605184561758e-06, + "loss": 0.8402, + "step": 35150 + }, + { + "epoch": 1.390575253613874, + "grad_norm": 1.1434655976407526, + "learning_rate": 6.486408376715776e-06, + "loss": 0.8555, + "step": 35160 + }, + { + "epoch": 1.390970752832763, + "grad_norm": 1.1771405203531213, + "learning_rate": 6.484211254073965e-06, + "loss": 0.8352, + "step": 35170 + }, + { + "epoch": 1.391366252051652, + "grad_norm": 1.2965298049115275, + "learning_rate": 6.482013817101637e-06, + "loss": 0.8527, + "step": 35180 + }, + { + "epoch": 1.3917617512705411, + "grad_norm": 1.1961016513012066, + "learning_rate": 6.479816066264174e-06, + "loss": 0.8689, + "step": 35190 + }, + { + "epoch": 1.3921572504894302, + "grad_norm": 1.3965942730266359, + "learning_rate": 6.47761800202702e-06, + "loss": 0.8459, + "step": 35200 + }, + { + "epoch": 1.3925527497083192, + "grad_norm": 1.396158719703272, + "learning_rate": 6.475419624855688e-06, + "loss": 0.8479, + "step": 35210 + }, + { + "epoch": 1.3929482489272083, + "grad_norm": 1.3669426589320808, + "learning_rate": 6.473220935215756e-06, + "loss": 0.8302, + "step": 35220 + }, + { + "epoch": 1.3933437481460973, + "grad_norm": 1.245650658848858, + "learning_rate": 6.471021933572871e-06, + "loss": 0.8407, + "step": 35230 + }, + { + "epoch": 1.3937392473649863, + "grad_norm": 1.3690219620216622, + "learning_rate": 6.4688226203927405e-06, + "loss": 0.8263, + "step": 35240 + }, + { + "epoch": 1.3941347465838754, + "grad_norm": 1.2439929336670337, + "learning_rate": 6.466622996141145e-06, + "loss": 0.849, + "step": 35250 + }, + { + "epoch": 1.3945302458027644, + "grad_norm": 1.3512940828896634, + "learning_rate": 6.464423061283925e-06, + "loss": 0.8168, + "step": 35260 + }, + { + "epoch": 1.3949257450216535, + "grad_norm": 1.343577251605921, + "learning_rate": 6.462222816286989e-06, + "loss": 0.8197, + "step": 35270 + }, + { + "epoch": 1.3953212442405425, + "grad_norm": 1.1356073346978275, + "learning_rate": 6.460022261616312e-06, + "loss": 0.8872, + "step": 35280 + }, + { + "epoch": 1.3957167434594315, + "grad_norm": 1.6360293026849844, + "learning_rate": 6.457821397737932e-06, + "loss": 0.838, + "step": 35290 + }, + { + "epoch": 1.3961122426783208, + "grad_norm": 1.2370393670702475, + "learning_rate": 6.455620225117957e-06, + "loss": 0.8389, + "step": 35300 + }, + { + "epoch": 1.3965077418972098, + "grad_norm": 1.1784010165804566, + "learning_rate": 6.453418744222557e-06, + "loss": 0.8389, + "step": 35310 + }, + { + "epoch": 1.3969032411160989, + "grad_norm": 1.5870653058814774, + "learning_rate": 6.451216955517965e-06, + "loss": 0.8416, + "step": 35320 + }, + { + "epoch": 1.397298740334988, + "grad_norm": 1.2228657375375633, + "learning_rate": 6.449014859470486e-06, + "loss": 0.8507, + "step": 35330 + }, + { + "epoch": 1.397694239553877, + "grad_norm": 1.4883230500958542, + "learning_rate": 6.446812456546483e-06, + "loss": 0.8496, + "step": 35340 + }, + { + "epoch": 1.398089738772766, + "grad_norm": 1.2356745485880056, + "learning_rate": 6.444609747212389e-06, + "loss": 0.833, + "step": 35350 + }, + { + "epoch": 1.398485237991655, + "grad_norm": 1.426448244450631, + "learning_rate": 6.4424067319347e-06, + "loss": 0.8633, + "step": 35360 + }, + { + "epoch": 1.398880737210544, + "grad_norm": 1.2529223923630732, + "learning_rate": 6.44020341117998e-06, + "loss": 0.847, + "step": 35370 + }, + { + "epoch": 1.399276236429433, + "grad_norm": 1.307762331688145, + "learning_rate": 6.437999785414848e-06, + "loss": 0.8665, + "step": 35380 + }, + { + "epoch": 1.3996717356483221, + "grad_norm": 1.4419356349775445, + "learning_rate": 6.435795855105997e-06, + "loss": 0.8564, + "step": 35390 + }, + { + "epoch": 1.4000672348672112, + "grad_norm": 1.3323260987161705, + "learning_rate": 6.433591620720184e-06, + "loss": 0.8247, + "step": 35400 + }, + { + "epoch": 1.4004627340861002, + "grad_norm": 1.2299128387363623, + "learning_rate": 6.431387082724225e-06, + "loss": 0.8564, + "step": 35410 + }, + { + "epoch": 1.4008582333049893, + "grad_norm": 1.1487522444268858, + "learning_rate": 6.4291822415850055e-06, + "loss": 0.8352, + "step": 35420 + }, + { + "epoch": 1.4012537325238783, + "grad_norm": 1.24818888196066, + "learning_rate": 6.4269770977694725e-06, + "loss": 0.8652, + "step": 35430 + }, + { + "epoch": 1.4016492317427673, + "grad_norm": 1.1213990095052735, + "learning_rate": 6.424771651744638e-06, + "loss": 0.8485, + "step": 35440 + }, + { + "epoch": 1.4020447309616564, + "grad_norm": 1.3691700422138138, + "learning_rate": 6.422565903977576e-06, + "loss": 0.8526, + "step": 35450 + }, + { + "epoch": 1.4024402301805454, + "grad_norm": 1.324240156135189, + "learning_rate": 6.4203598549354274e-06, + "loss": 0.8728, + "step": 35460 + }, + { + "epoch": 1.4028357293994345, + "grad_norm": 1.583707136556271, + "learning_rate": 6.4181535050853945e-06, + "loss": 0.8281, + "step": 35470 + }, + { + "epoch": 1.4032312286183235, + "grad_norm": 1.6615911204562193, + "learning_rate": 6.415946854894746e-06, + "loss": 0.846, + "step": 35480 + }, + { + "epoch": 1.4036267278372125, + "grad_norm": 1.2580396177304356, + "learning_rate": 6.413739904830813e-06, + "loss": 0.8255, + "step": 35490 + }, + { + "epoch": 1.4040222270561016, + "grad_norm": 1.3012727725653996, + "learning_rate": 6.411532655360988e-06, + "loss": 0.8625, + "step": 35500 + }, + { + "epoch": 1.4044177262749906, + "grad_norm": 1.2657373861715762, + "learning_rate": 6.40932510695273e-06, + "loss": 0.8416, + "step": 35510 + }, + { + "epoch": 1.4048132254938797, + "grad_norm": 1.5397906675769235, + "learning_rate": 6.4071172600735576e-06, + "loss": 0.8549, + "step": 35520 + }, + { + "epoch": 1.4052087247127687, + "grad_norm": 1.0685179877664777, + "learning_rate": 6.404909115191057e-06, + "loss": 0.8546, + "step": 35530 + }, + { + "epoch": 1.4056042239316577, + "grad_norm": 1.1858034096888677, + "learning_rate": 6.402700672772875e-06, + "loss": 0.8248, + "step": 35540 + }, + { + "epoch": 1.4059997231505468, + "grad_norm": 1.2330167468195987, + "learning_rate": 6.400491933286721e-06, + "loss": 0.8444, + "step": 35550 + }, + { + "epoch": 1.4063952223694358, + "grad_norm": 1.3432494390382297, + "learning_rate": 6.398282897200371e-06, + "loss": 0.8498, + "step": 35560 + }, + { + "epoch": 1.4067907215883249, + "grad_norm": 1.3558475405289643, + "learning_rate": 6.396073564981658e-06, + "loss": 0.828, + "step": 35570 + }, + { + "epoch": 1.407186220807214, + "grad_norm": 1.5339159318347542, + "learning_rate": 6.393863937098481e-06, + "loss": 0.8322, + "step": 35580 + }, + { + "epoch": 1.407581720026103, + "grad_norm": 1.2150207588374102, + "learning_rate": 6.391654014018802e-06, + "loss": 0.8533, + "step": 35590 + }, + { + "epoch": 1.407977219244992, + "grad_norm": 1.6270728236824081, + "learning_rate": 6.389443796210646e-06, + "loss": 0.8331, + "step": 35600 + }, + { + "epoch": 1.408372718463881, + "grad_norm": 1.1381829426689705, + "learning_rate": 6.387233284142098e-06, + "loss": 0.8313, + "step": 35610 + }, + { + "epoch": 1.40876821768277, + "grad_norm": 1.18783313903544, + "learning_rate": 6.385022478281307e-06, + "loss": 0.8435, + "step": 35620 + }, + { + "epoch": 1.409163716901659, + "grad_norm": 1.130601306789989, + "learning_rate": 6.382811379096483e-06, + "loss": 0.8355, + "step": 35630 + }, + { + "epoch": 1.4095592161205481, + "grad_norm": 1.3362577266410565, + "learning_rate": 6.3805999870558995e-06, + "loss": 0.8428, + "step": 35640 + }, + { + "epoch": 1.4099547153394372, + "grad_norm": 1.3882280481564724, + "learning_rate": 6.378388302627891e-06, + "loss": 0.8199, + "step": 35650 + }, + { + "epoch": 1.4103502145583262, + "grad_norm": 1.2303027361035306, + "learning_rate": 6.376176326280855e-06, + "loss": 0.842, + "step": 35660 + }, + { + "epoch": 1.4107457137772152, + "grad_norm": 1.466240552654296, + "learning_rate": 6.373964058483251e-06, + "loss": 0.8297, + "step": 35670 + }, + { + "epoch": 1.4111412129961043, + "grad_norm": 1.126554194397866, + "learning_rate": 6.371751499703598e-06, + "loss": 0.8276, + "step": 35680 + }, + { + "epoch": 1.4115367122149933, + "grad_norm": 1.3181356263017643, + "learning_rate": 6.369538650410478e-06, + "loss": 0.8321, + "step": 35690 + }, + { + "epoch": 1.4119322114338824, + "grad_norm": 1.4080446612084359, + "learning_rate": 6.3673255110725356e-06, + "loss": 0.8373, + "step": 35700 + }, + { + "epoch": 1.4123277106527714, + "grad_norm": 1.0789373482032585, + "learning_rate": 6.365112082158475e-06, + "loss": 0.8319, + "step": 35710 + }, + { + "epoch": 1.4127232098716604, + "grad_norm": 1.3387775402171154, + "learning_rate": 6.362898364137064e-06, + "loss": 0.8595, + "step": 35720 + }, + { + "epoch": 1.4131187090905495, + "grad_norm": 1.386778635985283, + "learning_rate": 6.360684357477127e-06, + "loss": 0.8472, + "step": 35730 + }, + { + "epoch": 1.4135142083094385, + "grad_norm": 1.2753648162906723, + "learning_rate": 6.358470062647555e-06, + "loss": 0.8258, + "step": 35740 + }, + { + "epoch": 1.4139097075283276, + "grad_norm": 1.4459847948997728, + "learning_rate": 6.356255480117297e-06, + "loss": 0.8419, + "step": 35750 + }, + { + "epoch": 1.4143052067472166, + "grad_norm": 1.4685685665231003, + "learning_rate": 6.354040610355365e-06, + "loss": 0.8439, + "step": 35760 + }, + { + "epoch": 1.4147007059661056, + "grad_norm": 1.1569408781766877, + "learning_rate": 6.351825453830829e-06, + "loss": 0.8598, + "step": 35770 + }, + { + "epoch": 1.4150962051849947, + "grad_norm": 1.2892616812704085, + "learning_rate": 6.349610011012821e-06, + "loss": 0.8508, + "step": 35780 + }, + { + "epoch": 1.4154917044038837, + "grad_norm": 1.3250786277952604, + "learning_rate": 6.347394282370535e-06, + "loss": 0.845, + "step": 35790 + }, + { + "epoch": 1.415887203622773, + "grad_norm": 1.4777746649431074, + "learning_rate": 6.345178268373224e-06, + "loss": 0.8106, + "step": 35800 + }, + { + "epoch": 1.416282702841662, + "grad_norm": 1.125273496040668, + "learning_rate": 6.342961969490201e-06, + "loss": 0.8476, + "step": 35810 + }, + { + "epoch": 1.416678202060551, + "grad_norm": 1.2900326565884856, + "learning_rate": 6.340745386190841e-06, + "loss": 0.8413, + "step": 35820 + }, + { + "epoch": 1.41707370127944, + "grad_norm": 1.386187861282352, + "learning_rate": 6.338528518944578e-06, + "loss": 0.8378, + "step": 35830 + }, + { + "epoch": 1.4174692004983291, + "grad_norm": 1.270854554693669, + "learning_rate": 6.3363113682209066e-06, + "loss": 0.8253, + "step": 35840 + }, + { + "epoch": 1.4178646997172182, + "grad_norm": 1.3890910628886686, + "learning_rate": 6.334093934489381e-06, + "loss": 0.8226, + "step": 35850 + }, + { + "epoch": 1.4182601989361072, + "grad_norm": 1.2469668696035763, + "learning_rate": 6.331876218219618e-06, + "loss": 0.8273, + "step": 35860 + }, + { + "epoch": 1.4186556981549963, + "grad_norm": 1.259823769335166, + "learning_rate": 6.3296582198812885e-06, + "loss": 0.8445, + "step": 35870 + }, + { + "epoch": 1.4190511973738853, + "grad_norm": 1.1622495021388048, + "learning_rate": 6.3274399399441265e-06, + "loss": 0.862, + "step": 35880 + }, + { + "epoch": 1.4194466965927743, + "grad_norm": 1.3640435855142807, + "learning_rate": 6.32522137887793e-06, + "loss": 0.8449, + "step": 35890 + }, + { + "epoch": 1.4198421958116634, + "grad_norm": 1.2751562298144632, + "learning_rate": 6.323002537152547e-06, + "loss": 0.8459, + "step": 35900 + }, + { + "epoch": 1.4202376950305524, + "grad_norm": 1.291417434227309, + "learning_rate": 6.320783415237894e-06, + "loss": 0.8209, + "step": 35910 + }, + { + "epoch": 1.4206331942494415, + "grad_norm": 1.2525985481641728, + "learning_rate": 6.318564013603942e-06, + "loss": 0.8231, + "step": 35920 + }, + { + "epoch": 1.4210286934683305, + "grad_norm": 1.3795717676304924, + "learning_rate": 6.316344332720721e-06, + "loss": 0.846, + "step": 35930 + }, + { + "epoch": 1.4214241926872195, + "grad_norm": 1.3725137266759215, + "learning_rate": 6.314124373058321e-06, + "loss": 0.8365, + "step": 35940 + }, + { + "epoch": 1.4218196919061086, + "grad_norm": 1.1988483831546013, + "learning_rate": 6.311904135086894e-06, + "loss": 0.8445, + "step": 35950 + }, + { + "epoch": 1.4222151911249976, + "grad_norm": 1.2198387293440427, + "learning_rate": 6.309683619276648e-06, + "loss": 0.825, + "step": 35960 + }, + { + "epoch": 1.4226106903438867, + "grad_norm": 1.489283287702978, + "learning_rate": 6.307462826097847e-06, + "loss": 0.8443, + "step": 35970 + }, + { + "epoch": 1.4230061895627757, + "grad_norm": 1.2697809338910795, + "learning_rate": 6.30524175602082e-06, + "loss": 0.8466, + "step": 35980 + }, + { + "epoch": 1.4234016887816647, + "grad_norm": 1.3898897181345853, + "learning_rate": 6.303020409515952e-06, + "loss": 0.8369, + "step": 35990 + }, + { + "epoch": 1.4237971880005538, + "grad_norm": 1.2151170739728856, + "learning_rate": 6.300798787053684e-06, + "loss": 0.8345, + "step": 36000 + }, + { + "epoch": 1.4241926872194428, + "grad_norm": 1.072928600803555, + "learning_rate": 6.2985768891045176e-06, + "loss": 0.8405, + "step": 36010 + }, + { + "epoch": 1.4245881864383318, + "grad_norm": 1.3656338652832785, + "learning_rate": 6.2963547161390125e-06, + "loss": 0.8405, + "step": 36020 + }, + { + "epoch": 1.4249836856572209, + "grad_norm": 1.1683124030180605, + "learning_rate": 6.2941322686277905e-06, + "loss": 0.8585, + "step": 36030 + }, + { + "epoch": 1.42537918487611, + "grad_norm": 1.383226672256156, + "learning_rate": 6.291909547041524e-06, + "loss": 0.8517, + "step": 36040 + }, + { + "epoch": 1.425774684094999, + "grad_norm": 1.2166065982446987, + "learning_rate": 6.289686551850949e-06, + "loss": 0.8507, + "step": 36050 + }, + { + "epoch": 1.426170183313888, + "grad_norm": 1.5109477176491386, + "learning_rate": 6.287463283526858e-06, + "loss": 0.8425, + "step": 36060 + }, + { + "epoch": 1.426565682532777, + "grad_norm": 1.3533527668678242, + "learning_rate": 6.285239742540099e-06, + "loss": 0.8278, + "step": 36070 + }, + { + "epoch": 1.426961181751666, + "grad_norm": 1.1536430583476678, + "learning_rate": 6.283015929361583e-06, + "loss": 0.8631, + "step": 36080 + }, + { + "epoch": 1.4273566809705551, + "grad_norm": 1.4189550506206117, + "learning_rate": 6.280791844462273e-06, + "loss": 0.831, + "step": 36090 + }, + { + "epoch": 1.4277521801894442, + "grad_norm": 1.284261736956499, + "learning_rate": 6.278567488313194e-06, + "loss": 0.8235, + "step": 36100 + }, + { + "epoch": 1.4281476794083332, + "grad_norm": 1.0926733735522753, + "learning_rate": 6.276342861385426e-06, + "loss": 0.8407, + "step": 36110 + }, + { + "epoch": 1.4285431786272222, + "grad_norm": 1.1675223229528713, + "learning_rate": 6.274117964150106e-06, + "loss": 0.8389, + "step": 36120 + }, + { + "epoch": 1.4289386778461113, + "grad_norm": 1.257592852723291, + "learning_rate": 6.2718927970784285e-06, + "loss": 0.8375, + "step": 36130 + }, + { + "epoch": 1.4293341770650003, + "grad_norm": 1.655834860263871, + "learning_rate": 6.269667360641648e-06, + "loss": 0.8429, + "step": 36140 + }, + { + "epoch": 1.4297296762838894, + "grad_norm": 1.1256766569647347, + "learning_rate": 6.2674416553110715e-06, + "loss": 0.8389, + "step": 36150 + }, + { + "epoch": 1.4301251755027784, + "grad_norm": 1.406554157074674, + "learning_rate": 6.265215681558069e-06, + "loss": 0.8639, + "step": 36160 + }, + { + "epoch": 1.4305206747216674, + "grad_norm": 1.1943407246557791, + "learning_rate": 6.2629894398540594e-06, + "loss": 0.8287, + "step": 36170 + }, + { + "epoch": 1.4309161739405565, + "grad_norm": 1.2222021787711124, + "learning_rate": 6.260762930670524e-06, + "loss": 0.8659, + "step": 36180 + }, + { + "epoch": 1.4313116731594455, + "grad_norm": 1.270196597741624, + "learning_rate": 6.2585361544790005e-06, + "loss": 0.8282, + "step": 36190 + }, + { + "epoch": 1.4317071723783346, + "grad_norm": 1.1585839158589433, + "learning_rate": 6.25630911175108e-06, + "loss": 0.8464, + "step": 36200 + }, + { + "epoch": 1.4321026715972236, + "grad_norm": 1.3003342717934459, + "learning_rate": 6.254081802958414e-06, + "loss": 0.8554, + "step": 36210 + }, + { + "epoch": 1.4324981708161126, + "grad_norm": 1.453361295281751, + "learning_rate": 6.251854228572706e-06, + "loss": 0.8402, + "step": 36220 + }, + { + "epoch": 1.4328936700350017, + "grad_norm": 1.3499377670640456, + "learning_rate": 6.249626389065721e-06, + "loss": 0.8237, + "step": 36230 + }, + { + "epoch": 1.4332891692538907, + "grad_norm": 1.391179876041505, + "learning_rate": 6.2473982849092744e-06, + "loss": 0.8317, + "step": 36240 + }, + { + "epoch": 1.4336846684727798, + "grad_norm": 1.6524970818903704, + "learning_rate": 6.245169916575241e-06, + "loss": 0.8197, + "step": 36250 + }, + { + "epoch": 1.4340801676916688, + "grad_norm": 1.3887354337349944, + "learning_rate": 6.242941284535553e-06, + "loss": 0.8229, + "step": 36260 + }, + { + "epoch": 1.4344756669105578, + "grad_norm": 1.3847941138826125, + "learning_rate": 6.240712389262195e-06, + "loss": 0.8206, + "step": 36270 + }, + { + "epoch": 1.4348711661294469, + "grad_norm": 1.4589975520150786, + "learning_rate": 6.2384832312272085e-06, + "loss": 0.847, + "step": 36280 + }, + { + "epoch": 1.435266665348336, + "grad_norm": 1.248488246101786, + "learning_rate": 6.236253810902693e-06, + "loss": 0.8496, + "step": 36290 + }, + { + "epoch": 1.435662164567225, + "grad_norm": 1.4083224349325778, + "learning_rate": 6.234024128760799e-06, + "loss": 0.8422, + "step": 36300 + }, + { + "epoch": 1.436057663786114, + "grad_norm": 1.2919165400337989, + "learning_rate": 6.231794185273736e-06, + "loss": 0.833, + "step": 36310 + }, + { + "epoch": 1.436453163005003, + "grad_norm": 1.3020070575781513, + "learning_rate": 6.229563980913768e-06, + "loss": 0.8357, + "step": 36320 + }, + { + "epoch": 1.436848662223892, + "grad_norm": 1.2850784721037123, + "learning_rate": 6.2273335161532135e-06, + "loss": 0.8185, + "step": 36330 + }, + { + "epoch": 1.437244161442781, + "grad_norm": 1.4126580330451735, + "learning_rate": 6.225102791464448e-06, + "loss": 0.8427, + "step": 36340 + }, + { + "epoch": 1.4376396606616701, + "grad_norm": 1.2994788736487213, + "learning_rate": 6.2228718073199e-06, + "loss": 0.8447, + "step": 36350 + }, + { + "epoch": 1.4380351598805592, + "grad_norm": 1.3033189197562505, + "learning_rate": 6.220640564192053e-06, + "loss": 0.847, + "step": 36360 + }, + { + "epoch": 1.4384306590994482, + "grad_norm": 1.1641926442040482, + "learning_rate": 6.218409062553448e-06, + "loss": 0.8416, + "step": 36370 + }, + { + "epoch": 1.4388261583183373, + "grad_norm": 1.2430912178324904, + "learning_rate": 6.216177302876676e-06, + "loss": 0.8626, + "step": 36380 + }, + { + "epoch": 1.4392216575372263, + "grad_norm": 1.3809163425879087, + "learning_rate": 6.213945285634388e-06, + "loss": 0.8308, + "step": 36390 + }, + { + "epoch": 1.4396171567561153, + "grad_norm": 1.1200464988948764, + "learning_rate": 6.2117130112992864e-06, + "loss": 0.8321, + "step": 36400 + }, + { + "epoch": 1.4400126559750044, + "grad_norm": 1.493049074585919, + "learning_rate": 6.209480480344127e-06, + "loss": 0.8316, + "step": 36410 + }, + { + "epoch": 1.4404081551938934, + "grad_norm": 1.0941038659433808, + "learning_rate": 6.2072476932417235e-06, + "loss": 0.8461, + "step": 36420 + }, + { + "epoch": 1.4408036544127825, + "grad_norm": 1.1525182827238343, + "learning_rate": 6.205014650464943e-06, + "loss": 0.8512, + "step": 36430 + }, + { + "epoch": 1.4411991536316715, + "grad_norm": 1.4073071119697516, + "learning_rate": 6.202781352486702e-06, + "loss": 0.8353, + "step": 36440 + }, + { + "epoch": 1.4415946528505605, + "grad_norm": 1.4640427658019493, + "learning_rate": 6.200547799779977e-06, + "loss": 0.859, + "step": 36450 + }, + { + "epoch": 1.4419901520694496, + "grad_norm": 1.1224909034560724, + "learning_rate": 6.198313992817796e-06, + "loss": 0.8536, + "step": 36460 + }, + { + "epoch": 1.4423856512883386, + "grad_norm": 1.2596345934323583, + "learning_rate": 6.1960799320732416e-06, + "loss": 0.8172, + "step": 36470 + }, + { + "epoch": 1.4427811505072277, + "grad_norm": 1.2262890682973089, + "learning_rate": 6.1938456180194496e-06, + "loss": 0.8738, + "step": 36480 + }, + { + "epoch": 1.4431766497261167, + "grad_norm": 1.3084410516890757, + "learning_rate": 6.191611051129608e-06, + "loss": 0.821, + "step": 36490 + }, + { + "epoch": 1.4435721489450057, + "grad_norm": 1.1307231105980697, + "learning_rate": 6.1893762318769614e-06, + "loss": 0.823, + "step": 36500 + }, + { + "epoch": 1.4439676481638948, + "grad_norm": 1.3384245037477136, + "learning_rate": 6.187141160734804e-06, + "loss": 0.8252, + "step": 36510 + }, + { + "epoch": 1.4443631473827838, + "grad_norm": 1.0311457217061848, + "learning_rate": 6.184905838176488e-06, + "loss": 0.8374, + "step": 36520 + }, + { + "epoch": 1.4447586466016729, + "grad_norm": 1.2301340613615994, + "learning_rate": 6.182670264675415e-06, + "loss": 0.8385, + "step": 36530 + }, + { + "epoch": 1.445154145820562, + "grad_norm": 1.3233738786300002, + "learning_rate": 6.180434440705043e-06, + "loss": 0.8366, + "step": 36540 + }, + { + "epoch": 1.445549645039451, + "grad_norm": 1.4269324920365625, + "learning_rate": 6.178198366738879e-06, + "loss": 0.8388, + "step": 36550 + }, + { + "epoch": 1.44594514425834, + "grad_norm": 1.083358497306535, + "learning_rate": 6.175962043250487e-06, + "loss": 0.8248, + "step": 36560 + }, + { + "epoch": 1.446340643477229, + "grad_norm": 1.2333263118621456, + "learning_rate": 6.17372547071348e-06, + "loss": 0.8408, + "step": 36570 + }, + { + "epoch": 1.446736142696118, + "grad_norm": 1.4728295180519482, + "learning_rate": 6.1714886496015254e-06, + "loss": 0.8318, + "step": 36580 + }, + { + "epoch": 1.447131641915007, + "grad_norm": 1.3062771841053484, + "learning_rate": 6.1692515803883465e-06, + "loss": 0.8378, + "step": 36590 + }, + { + "epoch": 1.4475271411338961, + "grad_norm": 1.4450326937321798, + "learning_rate": 6.167014263547716e-06, + "loss": 0.8217, + "step": 36600 + }, + { + "epoch": 1.4479226403527852, + "grad_norm": 1.3443426761961468, + "learning_rate": 6.1647766995534565e-06, + "loss": 0.844, + "step": 36610 + }, + { + "epoch": 1.4483181395716742, + "grad_norm": 1.219261278239387, + "learning_rate": 6.162538888879448e-06, + "loss": 0.8169, + "step": 36620 + }, + { + "epoch": 1.4487136387905633, + "grad_norm": 1.4063018306499722, + "learning_rate": 6.1603008319996194e-06, + "loss": 0.8392, + "step": 36630 + }, + { + "epoch": 1.4491091380094525, + "grad_norm": 1.36000701869999, + "learning_rate": 6.158062529387952e-06, + "loss": 0.8119, + "step": 36640 + }, + { + "epoch": 1.4495046372283416, + "grad_norm": 1.2319344010193962, + "learning_rate": 6.1558239815184825e-06, + "loss": 0.8055, + "step": 36650 + }, + { + "epoch": 1.4499001364472306, + "grad_norm": 1.2903105245619124, + "learning_rate": 6.1535851888652966e-06, + "loss": 0.8218, + "step": 36660 + }, + { + "epoch": 1.4502956356661196, + "grad_norm": 1.3412628803988895, + "learning_rate": 6.151346151902529e-06, + "loss": 0.8244, + "step": 36670 + }, + { + "epoch": 1.4506911348850087, + "grad_norm": 1.4157507052119405, + "learning_rate": 6.149106871104371e-06, + "loss": 0.8246, + "step": 36680 + }, + { + "epoch": 1.4510866341038977, + "grad_norm": 1.2650881608747293, + "learning_rate": 6.1468673469450655e-06, + "loss": 0.8474, + "step": 36690 + }, + { + "epoch": 1.4514821333227867, + "grad_norm": 1.4125684288823825, + "learning_rate": 6.144627579898904e-06, + "loss": 0.8311, + "step": 36700 + }, + { + "epoch": 1.4518776325416758, + "grad_norm": 1.516154326386266, + "learning_rate": 6.142387570440231e-06, + "loss": 0.8156, + "step": 36710 + }, + { + "epoch": 1.4522731317605648, + "grad_norm": 1.19680621479771, + "learning_rate": 6.140147319043444e-06, + "loss": 0.8556, + "step": 36720 + }, + { + "epoch": 1.4526686309794539, + "grad_norm": 1.387951408188166, + "learning_rate": 6.1379068261829855e-06, + "loss": 0.8196, + "step": 36730 + }, + { + "epoch": 1.453064130198343, + "grad_norm": 1.5084295505917156, + "learning_rate": 6.135666092333356e-06, + "loss": 0.8352, + "step": 36740 + }, + { + "epoch": 1.453459629417232, + "grad_norm": 1.3980000408581248, + "learning_rate": 6.133425117969105e-06, + "loss": 0.8103, + "step": 36750 + }, + { + "epoch": 1.453855128636121, + "grad_norm": 1.4649643342356247, + "learning_rate": 6.131183903564833e-06, + "loss": 0.8393, + "step": 36760 + }, + { + "epoch": 1.45425062785501, + "grad_norm": 1.283565079727838, + "learning_rate": 6.12894244959519e-06, + "loss": 0.8378, + "step": 36770 + }, + { + "epoch": 1.454646127073899, + "grad_norm": 1.1290183544167811, + "learning_rate": 6.126700756534877e-06, + "loss": 0.8406, + "step": 36780 + }, + { + "epoch": 1.455041626292788, + "grad_norm": 1.1936587206096785, + "learning_rate": 6.124458824858647e-06, + "loss": 0.8159, + "step": 36790 + }, + { + "epoch": 1.4554371255116771, + "grad_norm": 1.5963402188574836, + "learning_rate": 6.122216655041301e-06, + "loss": 0.8476, + "step": 36800 + }, + { + "epoch": 1.4558326247305662, + "grad_norm": 1.3528111394379871, + "learning_rate": 6.119974247557694e-06, + "loss": 0.8291, + "step": 36810 + }, + { + "epoch": 1.4562281239494552, + "grad_norm": 1.3036486327570647, + "learning_rate": 6.117731602882729e-06, + "loss": 0.821, + "step": 36820 + }, + { + "epoch": 1.4566236231683443, + "grad_norm": 1.1921543304832969, + "learning_rate": 6.115488721491361e-06, + "loss": 0.8385, + "step": 36830 + }, + { + "epoch": 1.4570191223872333, + "grad_norm": 1.421552177735124, + "learning_rate": 6.113245603858592e-06, + "loss": 0.8188, + "step": 36840 + }, + { + "epoch": 1.4574146216061223, + "grad_norm": 1.183513087872989, + "learning_rate": 6.1110022504594755e-06, + "loss": 0.8481, + "step": 36850 + }, + { + "epoch": 1.4578101208250114, + "grad_norm": 1.3333946298831874, + "learning_rate": 6.108758661769117e-06, + "loss": 0.83, + "step": 36860 + }, + { + "epoch": 1.4582056200439004, + "grad_norm": 1.4122821665540046, + "learning_rate": 6.10651483826267e-06, + "loss": 0.8096, + "step": 36870 + }, + { + "epoch": 1.4586011192627895, + "grad_norm": 1.131854316531454, + "learning_rate": 6.1042707804153354e-06, + "loss": 0.8241, + "step": 36880 + }, + { + "epoch": 1.4589966184816785, + "grad_norm": 1.4914839187652236, + "learning_rate": 6.10202648870237e-06, + "loss": 0.8208, + "step": 36890 + }, + { + "epoch": 1.4593921177005675, + "grad_norm": 1.3434019975159093, + "learning_rate": 6.099781963599074e-06, + "loss": 0.7995, + "step": 36900 + }, + { + "epoch": 1.4597876169194566, + "grad_norm": 1.5275499973186812, + "learning_rate": 6.097537205580799e-06, + "loss": 0.8319, + "step": 36910 + }, + { + "epoch": 1.4601831161383456, + "grad_norm": 1.2879733736229924, + "learning_rate": 6.095292215122948e-06, + "loss": 0.8359, + "step": 36920 + }, + { + "epoch": 1.4605786153572347, + "grad_norm": 1.182222761910963, + "learning_rate": 6.093046992700969e-06, + "loss": 0.8134, + "step": 36930 + }, + { + "epoch": 1.4609741145761237, + "grad_norm": 1.4132263899337456, + "learning_rate": 6.090801538790364e-06, + "loss": 0.8306, + "step": 36940 + }, + { + "epoch": 1.4613696137950127, + "grad_norm": 1.229301065063994, + "learning_rate": 6.088555853866681e-06, + "loss": 0.8213, + "step": 36950 + }, + { + "epoch": 1.4617651130139018, + "grad_norm": 1.1830134562667467, + "learning_rate": 6.086309938405517e-06, + "loss": 0.8205, + "step": 36960 + }, + { + "epoch": 1.4621606122327908, + "grad_norm": 1.4243336232067556, + "learning_rate": 6.084063792882521e-06, + "loss": 0.8258, + "step": 36970 + }, + { + "epoch": 1.4625561114516799, + "grad_norm": 1.2494875825490133, + "learning_rate": 6.081817417773385e-06, + "loss": 0.8442, + "step": 36980 + }, + { + "epoch": 1.462951610670569, + "grad_norm": 1.2359816079357258, + "learning_rate": 6.079570813553852e-06, + "loss": 0.8278, + "step": 36990 + }, + { + "epoch": 1.463347109889458, + "grad_norm": 1.490059423288807, + "learning_rate": 6.077323980699717e-06, + "loss": 0.8276, + "step": 37000 + }, + { + "epoch": 1.463742609108347, + "grad_norm": 1.1769040254613954, + "learning_rate": 6.075076919686821e-06, + "loss": 0.8385, + "step": 37010 + }, + { + "epoch": 1.464138108327236, + "grad_norm": 1.22391141428612, + "learning_rate": 6.07282963099105e-06, + "loss": 0.8415, + "step": 37020 + }, + { + "epoch": 1.464533607546125, + "grad_norm": 1.2994719463292181, + "learning_rate": 6.070582115088346e-06, + "loss": 0.8313, + "step": 37030 + }, + { + "epoch": 1.464929106765014, + "grad_norm": 1.341785209887261, + "learning_rate": 6.0683343724546896e-06, + "loss": 0.8283, + "step": 37040 + }, + { + "epoch": 1.4653246059839031, + "grad_norm": 1.1625691324888021, + "learning_rate": 6.066086403566116e-06, + "loss": 0.8064, + "step": 37050 + }, + { + "epoch": 1.4657201052027922, + "grad_norm": 1.36481340690133, + "learning_rate": 6.063838208898706e-06, + "loss": 0.8031, + "step": 37060 + }, + { + "epoch": 1.4661156044216812, + "grad_norm": 1.4192336644419723, + "learning_rate": 6.06158978892859e-06, + "loss": 0.8467, + "step": 37070 + }, + { + "epoch": 1.4665111036405702, + "grad_norm": 1.186430424563176, + "learning_rate": 6.059341144131945e-06, + "loss": 0.8396, + "step": 37080 + }, + { + "epoch": 1.4669066028594593, + "grad_norm": 1.2446238173646171, + "learning_rate": 6.057092274984992e-06, + "loss": 0.8187, + "step": 37090 + }, + { + "epoch": 1.4673021020783483, + "grad_norm": 1.3571676136191617, + "learning_rate": 6.054843181964009e-06, + "loss": 0.8398, + "step": 37100 + }, + { + "epoch": 1.4676976012972374, + "grad_norm": 1.5060615168232727, + "learning_rate": 6.052593865545308e-06, + "loss": 0.8517, + "step": 37110 + }, + { + "epoch": 1.4680931005161264, + "grad_norm": 1.0813971615890645, + "learning_rate": 6.050344326205262e-06, + "loss": 0.8287, + "step": 37120 + }, + { + "epoch": 1.4684885997350154, + "grad_norm": 1.1773988089100857, + "learning_rate": 6.048094564420282e-06, + "loss": 0.8245, + "step": 37130 + }, + { + "epoch": 1.4688840989539047, + "grad_norm": 1.423950483262472, + "learning_rate": 6.0458445806668285e-06, + "loss": 0.8006, + "step": 37140 + }, + { + "epoch": 1.4692795981727937, + "grad_norm": 1.1024504405308695, + "learning_rate": 6.043594375421411e-06, + "loss": 0.8469, + "step": 37150 + }, + { + "epoch": 1.4696750973916828, + "grad_norm": 1.3152587216061993, + "learning_rate": 6.041343949160584e-06, + "loss": 0.8365, + "step": 37160 + }, + { + "epoch": 1.4700705966105718, + "grad_norm": 1.462331091482157, + "learning_rate": 6.039093302360949e-06, + "loss": 0.8293, + "step": 37170 + }, + { + "epoch": 1.4704660958294609, + "grad_norm": 1.424162052591826, + "learning_rate": 6.036842435499154e-06, + "loss": 0.8276, + "step": 37180 + }, + { + "epoch": 1.47086159504835, + "grad_norm": 1.0954194165113156, + "learning_rate": 6.034591349051895e-06, + "loss": 0.7963, + "step": 37190 + }, + { + "epoch": 1.471257094267239, + "grad_norm": 1.20751470439838, + "learning_rate": 6.032340043495912e-06, + "loss": 0.8328, + "step": 37200 + }, + { + "epoch": 1.471652593486128, + "grad_norm": 1.4879899591354708, + "learning_rate": 6.030088519307996e-06, + "loss": 0.8196, + "step": 37210 + }, + { + "epoch": 1.472048092705017, + "grad_norm": 1.2449159188305148, + "learning_rate": 6.0278367769649794e-06, + "loss": 0.8467, + "step": 37220 + }, + { + "epoch": 1.472443591923906, + "grad_norm": 1.437138029614525, + "learning_rate": 6.02558481694374e-06, + "loss": 0.8196, + "step": 37230 + }, + { + "epoch": 1.472839091142795, + "grad_norm": 1.4491916110053167, + "learning_rate": 6.023332639721209e-06, + "loss": 0.8327, + "step": 37240 + }, + { + "epoch": 1.4732345903616841, + "grad_norm": 1.1596629612558578, + "learning_rate": 6.021080245774356e-06, + "loss": 0.8317, + "step": 37250 + }, + { + "epoch": 1.4736300895805732, + "grad_norm": 1.2298806725917324, + "learning_rate": 6.0188276355802e-06, + "loss": 0.8263, + "step": 37260 + }, + { + "epoch": 1.4740255887994622, + "grad_norm": 1.2669568040670434, + "learning_rate": 6.016574809615807e-06, + "loss": 0.8364, + "step": 37270 + }, + { + "epoch": 1.4744210880183513, + "grad_norm": 1.583310653351235, + "learning_rate": 6.014321768358284e-06, + "loss": 0.8376, + "step": 37280 + }, + { + "epoch": 1.4748165872372403, + "grad_norm": 1.183803876079209, + "learning_rate": 6.0120685122847874e-06, + "loss": 0.8211, + "step": 37290 + }, + { + "epoch": 1.4752120864561293, + "grad_norm": 1.3282787364058297, + "learning_rate": 6.009815041872521e-06, + "loss": 0.8275, + "step": 37300 + }, + { + "epoch": 1.4756075856750184, + "grad_norm": 1.0757896601858954, + "learning_rate": 6.007561357598728e-06, + "loss": 0.8158, + "step": 37310 + }, + { + "epoch": 1.4760030848939074, + "grad_norm": 1.2939334557774662, + "learning_rate": 6.005307459940701e-06, + "loss": 0.8133, + "step": 37320 + }, + { + "epoch": 1.4763985841127965, + "grad_norm": 1.3771816047501217, + "learning_rate": 6.003053349375778e-06, + "loss": 0.8434, + "step": 37330 + }, + { + "epoch": 1.4767940833316855, + "grad_norm": 1.4588897531342568, + "learning_rate": 6.00079902638134e-06, + "loss": 0.8416, + "step": 37340 + }, + { + "epoch": 1.4771895825505745, + "grad_norm": 1.2612260439292184, + "learning_rate": 5.998544491434813e-06, + "loss": 0.838, + "step": 37350 + }, + { + "epoch": 1.4775850817694636, + "grad_norm": 1.2659100294037402, + "learning_rate": 5.99628974501367e-06, + "loss": 0.8596, + "step": 37360 + }, + { + "epoch": 1.4779805809883526, + "grad_norm": 1.259165546633377, + "learning_rate": 5.994034787595428e-06, + "loss": 0.8336, + "step": 37370 + }, + { + "epoch": 1.4783760802072416, + "grad_norm": 1.5323825732780485, + "learning_rate": 5.991779619657648e-06, + "loss": 0.7855, + "step": 37380 + }, + { + "epoch": 1.4787715794261307, + "grad_norm": 1.3628968111836337, + "learning_rate": 5.989524241677937e-06, + "loss": 0.846, + "step": 37390 + }, + { + "epoch": 1.4791670786450197, + "grad_norm": 1.1787882684609086, + "learning_rate": 5.987268654133943e-06, + "loss": 0.8469, + "step": 37400 + }, + { + "epoch": 1.4795625778639088, + "grad_norm": 1.2524777227261528, + "learning_rate": 5.985012857503363e-06, + "loss": 0.807, + "step": 37410 + }, + { + "epoch": 1.4799580770827978, + "grad_norm": 1.2042231250137807, + "learning_rate": 5.982756852263933e-06, + "loss": 0.8332, + "step": 37420 + }, + { + "epoch": 1.4803535763016868, + "grad_norm": 1.2762052873356005, + "learning_rate": 5.980500638893441e-06, + "loss": 0.7956, + "step": 37430 + }, + { + "epoch": 1.4807490755205759, + "grad_norm": 1.3528425128326769, + "learning_rate": 5.978244217869711e-06, + "loss": 0.8336, + "step": 37440 + }, + { + "epoch": 1.481144574739465, + "grad_norm": 1.3367232606982922, + "learning_rate": 5.9759875896706144e-06, + "loss": 0.8522, + "step": 37450 + }, + { + "epoch": 1.481540073958354, + "grad_norm": 1.1979888871980506, + "learning_rate": 5.973730754774068e-06, + "loss": 0.8423, + "step": 37460 + }, + { + "epoch": 1.481935573177243, + "grad_norm": 1.2021681961300046, + "learning_rate": 5.97147371365803e-06, + "loss": 0.8368, + "step": 37470 + }, + { + "epoch": 1.482331072396132, + "grad_norm": 1.4639923441014542, + "learning_rate": 5.969216466800503e-06, + "loss": 0.854, + "step": 37480 + }, + { + "epoch": 1.482726571615021, + "grad_norm": 1.3036101092964403, + "learning_rate": 5.966959014679532e-06, + "loss": 0.8394, + "step": 37490 + }, + { + "epoch": 1.4831220708339101, + "grad_norm": 1.1478280757190291, + "learning_rate": 5.9647013577732085e-06, + "loss": 0.8338, + "step": 37500 + }, + { + "epoch": 1.4835175700527992, + "grad_norm": 1.3669648344395982, + "learning_rate": 5.962443496559664e-06, + "loss": 0.8299, + "step": 37510 + }, + { + "epoch": 1.4839130692716882, + "grad_norm": 1.4891719958825982, + "learning_rate": 5.960185431517078e-06, + "loss": 0.8272, + "step": 37520 + }, + { + "epoch": 1.4843085684905772, + "grad_norm": 1.2579270922428791, + "learning_rate": 5.957927163123666e-06, + "loss": 0.8445, + "step": 37530 + }, + { + "epoch": 1.4847040677094663, + "grad_norm": 1.4280221776954765, + "learning_rate": 5.9556686918576925e-06, + "loss": 0.8271, + "step": 37540 + }, + { + "epoch": 1.4850995669283553, + "grad_norm": 1.1818080329358007, + "learning_rate": 5.953410018197463e-06, + "loss": 0.8366, + "step": 37550 + }, + { + "epoch": 1.4854950661472444, + "grad_norm": 1.1059097849343014, + "learning_rate": 5.951151142621326e-06, + "loss": 0.8188, + "step": 37560 + }, + { + "epoch": 1.4858905653661334, + "grad_norm": 1.345852113770174, + "learning_rate": 5.948892065607671e-06, + "loss": 0.8114, + "step": 37570 + }, + { + "epoch": 1.4862860645850224, + "grad_norm": 1.2958864841619009, + "learning_rate": 5.946632787634935e-06, + "loss": 0.7949, + "step": 37580 + }, + { + "epoch": 1.4866815638039115, + "grad_norm": 1.2859607747850244, + "learning_rate": 5.94437330918159e-06, + "loss": 0.8301, + "step": 37590 + }, + { + "epoch": 1.4870770630228005, + "grad_norm": 1.3126549447166243, + "learning_rate": 5.942113630726159e-06, + "loss": 0.8546, + "step": 37600 + }, + { + "epoch": 1.4874725622416896, + "grad_norm": 1.4237549890014323, + "learning_rate": 5.939853752747201e-06, + "loss": 0.8307, + "step": 37610 + }, + { + "epoch": 1.4878680614605786, + "grad_norm": 1.2107557720978932, + "learning_rate": 5.93759367572332e-06, + "loss": 0.8443, + "step": 37620 + }, + { + "epoch": 1.4882635606794676, + "grad_norm": 1.2695176508678736, + "learning_rate": 5.935333400133161e-06, + "loss": 0.8311, + "step": 37630 + }, + { + "epoch": 1.4886590598983567, + "grad_norm": 1.3945072147387503, + "learning_rate": 5.9330729264554134e-06, + "loss": 0.827, + "step": 37640 + }, + { + "epoch": 1.4890545591172457, + "grad_norm": 1.2194459760584162, + "learning_rate": 5.930812255168805e-06, + "loss": 0.8389, + "step": 37650 + }, + { + "epoch": 1.4894500583361348, + "grad_norm": 1.1869027175102502, + "learning_rate": 5.928551386752108e-06, + "loss": 0.8567, + "step": 37660 + }, + { + "epoch": 1.4898455575550238, + "grad_norm": 1.2630589516417647, + "learning_rate": 5.9262903216841335e-06, + "loss": 0.8087, + "step": 37670 + }, + { + "epoch": 1.4902410567739128, + "grad_norm": 1.1144199673584247, + "learning_rate": 5.92402906044374e-06, + "loss": 0.8266, + "step": 37680 + }, + { + "epoch": 1.4906365559928019, + "grad_norm": 1.2847134248761494, + "learning_rate": 5.921767603509822e-06, + "loss": 0.833, + "step": 37690 + }, + { + "epoch": 1.491032055211691, + "grad_norm": 1.701931469947877, + "learning_rate": 5.919505951361317e-06, + "loss": 0.8221, + "step": 37700 + }, + { + "epoch": 1.49142755443058, + "grad_norm": 1.1979917350570792, + "learning_rate": 5.917244104477205e-06, + "loss": 0.8411, + "step": 37710 + }, + { + "epoch": 1.491823053649469, + "grad_norm": 1.2665887906873012, + "learning_rate": 5.914982063336507e-06, + "loss": 0.8031, + "step": 37720 + }, + { + "epoch": 1.492218552868358, + "grad_norm": 1.2559536434755125, + "learning_rate": 5.9127198284182815e-06, + "loss": 0.8494, + "step": 37730 + }, + { + "epoch": 1.492614052087247, + "grad_norm": 1.1510544961983886, + "learning_rate": 5.9104574002016345e-06, + "loss": 0.8332, + "step": 37740 + }, + { + "epoch": 1.493009551306136, + "grad_norm": 1.156594506877186, + "learning_rate": 5.908194779165709e-06, + "loss": 0.8304, + "step": 37750 + }, + { + "epoch": 1.4934050505250251, + "grad_norm": 1.38808820957291, + "learning_rate": 5.905931965789688e-06, + "loss": 0.8192, + "step": 37760 + }, + { + "epoch": 1.4938005497439142, + "grad_norm": 1.3605551768171145, + "learning_rate": 5.903668960552797e-06, + "loss": 0.848, + "step": 37770 + }, + { + "epoch": 1.4941960489628032, + "grad_norm": 1.302042985659683, + "learning_rate": 5.9014057639343025e-06, + "loss": 0.8347, + "step": 37780 + }, + { + "epoch": 1.4945915481816923, + "grad_norm": 1.0357546967364923, + "learning_rate": 5.89914237641351e-06, + "loss": 0.8469, + "step": 37790 + }, + { + "epoch": 1.4949870474005813, + "grad_norm": 1.6929498588586158, + "learning_rate": 5.896878798469766e-06, + "loss": 0.8396, + "step": 37800 + }, + { + "epoch": 1.4953825466194703, + "grad_norm": 1.2867864716344224, + "learning_rate": 5.894615030582458e-06, + "loss": 0.813, + "step": 37810 + }, + { + "epoch": 1.4957780458383594, + "grad_norm": 1.4546439129944488, + "learning_rate": 5.892351073231015e-06, + "loss": 0.7983, + "step": 37820 + }, + { + "epoch": 1.4961735450572484, + "grad_norm": 1.2433124092412393, + "learning_rate": 5.8900869268949e-06, + "loss": 0.8437, + "step": 37830 + }, + { + "epoch": 1.4965690442761375, + "grad_norm": 1.1100349419437714, + "learning_rate": 5.887822592053624e-06, + "loss": 0.8461, + "step": 37840 + }, + { + "epoch": 1.4969645434950265, + "grad_norm": 1.2182905934411132, + "learning_rate": 5.885558069186735e-06, + "loss": 0.8467, + "step": 37850 + }, + { + "epoch": 1.4973600427139155, + "grad_norm": 1.5044139421435199, + "learning_rate": 5.883293358773816e-06, + "loss": 0.8312, + "step": 37860 + }, + { + "epoch": 1.4977555419328046, + "grad_norm": 1.1134418938793038, + "learning_rate": 5.881028461294497e-06, + "loss": 0.824, + "step": 37870 + }, + { + "epoch": 1.4981510411516936, + "grad_norm": 1.3832119834690153, + "learning_rate": 5.878763377228445e-06, + "loss": 0.8512, + "step": 37880 + }, + { + "epoch": 1.4985465403705827, + "grad_norm": 1.3427739153613933, + "learning_rate": 5.876498107055364e-06, + "loss": 0.8188, + "step": 37890 + }, + { + "epoch": 1.4989420395894717, + "grad_norm": 1.2139155355396918, + "learning_rate": 5.874232651255e-06, + "loss": 0.8164, + "step": 37900 + }, + { + "epoch": 1.4993375388083607, + "grad_norm": 1.4188225361176172, + "learning_rate": 5.871967010307138e-06, + "loss": 0.843, + "step": 37910 + }, + { + "epoch": 1.4997330380272498, + "grad_norm": 1.383659359057121, + "learning_rate": 5.8697011846916015e-06, + "loss": 0.8425, + "step": 37920 + }, + { + "epoch": 1.5001285372461388, + "grad_norm": 1.5233622271899518, + "learning_rate": 5.867435174888255e-06, + "loss": 0.8268, + "step": 37930 + }, + { + "epoch": 1.5005240364650279, + "grad_norm": 1.347771444737541, + "learning_rate": 5.8651689813769985e-06, + "loss": 0.8022, + "step": 37940 + }, + { + "epoch": 1.500919535683917, + "grad_norm": 1.3324271983277738, + "learning_rate": 5.862902604637776e-06, + "loss": 0.7998, + "step": 37950 + }, + { + "epoch": 1.501315034902806, + "grad_norm": 1.465709640988423, + "learning_rate": 5.860636045150564e-06, + "loss": 0.8259, + "step": 37960 + }, + { + "epoch": 1.501710534121695, + "grad_norm": 1.2580932460732994, + "learning_rate": 5.858369303395381e-06, + "loss": 0.8217, + "step": 37970 + }, + { + "epoch": 1.502106033340584, + "grad_norm": 1.366923687056205, + "learning_rate": 5.856102379852286e-06, + "loss": 0.8249, + "step": 37980 + }, + { + "epoch": 1.502501532559473, + "grad_norm": 1.1264984940009826, + "learning_rate": 5.853835275001376e-06, + "loss": 0.8122, + "step": 37990 + }, + { + "epoch": 1.502897031778362, + "grad_norm": 1.6154287876343538, + "learning_rate": 5.85156798932278e-06, + "loss": 0.821, + "step": 38000 + }, + { + "epoch": 1.5032925309972511, + "grad_norm": 1.248839585307766, + "learning_rate": 5.8493005232966745e-06, + "loss": 0.8242, + "step": 38010 + }, + { + "epoch": 1.5036880302161402, + "grad_norm": 1.3317870582014166, + "learning_rate": 5.847032877403269e-06, + "loss": 0.8282, + "step": 38020 + }, + { + "epoch": 1.5040835294350292, + "grad_norm": 1.2429661026045349, + "learning_rate": 5.844765052122811e-06, + "loss": 0.8127, + "step": 38030 + }, + { + "epoch": 1.5044790286539182, + "grad_norm": 1.3860656503761577, + "learning_rate": 5.842497047935587e-06, + "loss": 0.8107, + "step": 38040 + }, + { + "epoch": 1.5048745278728073, + "grad_norm": 1.3758474824046636, + "learning_rate": 5.840228865321923e-06, + "loss": 0.8025, + "step": 38050 + }, + { + "epoch": 1.5052700270916963, + "grad_norm": 1.4312116041976664, + "learning_rate": 5.837960504762179e-06, + "loss": 0.8288, + "step": 38060 + }, + { + "epoch": 1.5056655263105856, + "grad_norm": 1.1801788536325402, + "learning_rate": 5.835691966736758e-06, + "loss": 0.8433, + "step": 38070 + }, + { + "epoch": 1.5060610255294746, + "grad_norm": 1.3758792360793468, + "learning_rate": 5.833423251726095e-06, + "loss": 0.806, + "step": 38080 + }, + { + "epoch": 1.5064565247483637, + "grad_norm": 1.2949389581126536, + "learning_rate": 5.8311543602106645e-06, + "loss": 0.8246, + "step": 38090 + }, + { + "epoch": 1.5068520239672527, + "grad_norm": 1.2828666889168154, + "learning_rate": 5.82888529267098e-06, + "loss": 0.8297, + "step": 38100 + }, + { + "epoch": 1.5072475231861417, + "grad_norm": 1.3342565393902486, + "learning_rate": 5.826616049587592e-06, + "loss": 0.8145, + "step": 38110 + }, + { + "epoch": 1.5076430224050308, + "grad_norm": 1.3157019500235356, + "learning_rate": 5.824346631441087e-06, + "loss": 0.816, + "step": 38120 + }, + { + "epoch": 1.5080385216239198, + "grad_norm": 1.655739436905969, + "learning_rate": 5.822077038712088e-06, + "loss": 0.8184, + "step": 38130 + }, + { + "epoch": 1.5084340208428089, + "grad_norm": 1.4923378536205325, + "learning_rate": 5.819807271881256e-06, + "loss": 0.8384, + "step": 38140 + }, + { + "epoch": 1.508829520061698, + "grad_norm": 1.2598760898229802, + "learning_rate": 5.817537331429288e-06, + "loss": 0.839, + "step": 38150 + }, + { + "epoch": 1.509225019280587, + "grad_norm": 1.2044143246664853, + "learning_rate": 5.815267217836921e-06, + "loss": 0.8273, + "step": 38160 + }, + { + "epoch": 1.509620518499476, + "grad_norm": 1.2279039866063677, + "learning_rate": 5.812996931584923e-06, + "loss": 0.8377, + "step": 38170 + }, + { + "epoch": 1.510016017718365, + "grad_norm": 1.4662122144383176, + "learning_rate": 5.810726473154105e-06, + "loss": 0.8309, + "step": 38180 + }, + { + "epoch": 1.510411516937254, + "grad_norm": 1.2157490964608086, + "learning_rate": 5.808455843025309e-06, + "loss": 0.8147, + "step": 38190 + }, + { + "epoch": 1.510807016156143, + "grad_norm": 1.1510863989916291, + "learning_rate": 5.806185041679415e-06, + "loss": 0.8442, + "step": 38200 + }, + { + "epoch": 1.5112025153750321, + "grad_norm": 1.431497260337306, + "learning_rate": 5.803914069597342e-06, + "loss": 0.8076, + "step": 38210 + }, + { + "epoch": 1.5115980145939212, + "grad_norm": 1.2612330752418943, + "learning_rate": 5.801642927260042e-06, + "loss": 0.8295, + "step": 38220 + }, + { + "epoch": 1.5119935138128102, + "grad_norm": 1.7086484756243665, + "learning_rate": 5.7993716151485035e-06, + "loss": 0.8128, + "step": 38230 + }, + { + "epoch": 1.5123890130316993, + "grad_norm": 1.5717014989466744, + "learning_rate": 5.797100133743752e-06, + "loss": 0.8167, + "step": 38240 + }, + { + "epoch": 1.5127845122505883, + "grad_norm": 1.4099521550238716, + "learning_rate": 5.794828483526848e-06, + "loss": 0.8107, + "step": 38250 + }, + { + "epoch": 1.5131800114694773, + "grad_norm": 1.2373223873898105, + "learning_rate": 5.792556664978888e-06, + "loss": 0.8382, + "step": 38260 + }, + { + "epoch": 1.5135755106883664, + "grad_norm": 1.2390773061535927, + "learning_rate": 5.790284678581005e-06, + "loss": 0.7994, + "step": 38270 + }, + { + "epoch": 1.5139710099072554, + "grad_norm": 1.4390099847144664, + "learning_rate": 5.788012524814366e-06, + "loss": 0.8317, + "step": 38280 + }, + { + "epoch": 1.5143665091261445, + "grad_norm": 1.274542478005831, + "learning_rate": 5.785740204160175e-06, + "loss": 0.8163, + "step": 38290 + }, + { + "epoch": 1.5147620083450335, + "grad_norm": 1.3614774800966511, + "learning_rate": 5.783467717099669e-06, + "loss": 0.811, + "step": 38300 + }, + { + "epoch": 1.5151575075639225, + "grad_norm": 1.2151554711209784, + "learning_rate": 5.7811950641141255e-06, + "loss": 0.8232, + "step": 38310 + }, + { + "epoch": 1.5155530067828116, + "grad_norm": 1.2655687855067057, + "learning_rate": 5.778922245684849e-06, + "loss": 0.8149, + "step": 38320 + }, + { + "epoch": 1.5159485060017006, + "grad_norm": 1.3683987419298556, + "learning_rate": 5.776649262293187e-06, + "loss": 0.8318, + "step": 38330 + }, + { + "epoch": 1.5163440052205897, + "grad_norm": 1.1952612250781876, + "learning_rate": 5.774376114420516e-06, + "loss": 0.8197, + "step": 38340 + }, + { + "epoch": 1.5167395044394787, + "grad_norm": 1.362103756476559, + "learning_rate": 5.77210280254825e-06, + "loss": 0.8202, + "step": 38350 + }, + { + "epoch": 1.5171350036583677, + "grad_norm": 1.4018015765718346, + "learning_rate": 5.769829327157839e-06, + "loss": 0.8355, + "step": 38360 + }, + { + "epoch": 1.5175305028772568, + "grad_norm": 1.192026713496226, + "learning_rate": 5.767555688730766e-06, + "loss": 0.8268, + "step": 38370 + }, + { + "epoch": 1.5179260020961458, + "grad_norm": 1.4241913696623545, + "learning_rate": 5.765281887748547e-06, + "loss": 0.8198, + "step": 38380 + }, + { + "epoch": 1.5183215013150348, + "grad_norm": 1.1317361164699646, + "learning_rate": 5.763007924692736e-06, + "loss": 0.8242, + "step": 38390 + }, + { + "epoch": 1.518717000533924, + "grad_norm": 1.1389342124437951, + "learning_rate": 5.760733800044918e-06, + "loss": 0.8207, + "step": 38400 + }, + { + "epoch": 1.5191124997528132, + "grad_norm": 1.4947415854809551, + "learning_rate": 5.758459514286714e-06, + "loss": 0.8075, + "step": 38410 + }, + { + "epoch": 1.5195079989717022, + "grad_norm": 1.3781584549107786, + "learning_rate": 5.756185067899779e-06, + "loss": 0.804, + "step": 38420 + }, + { + "epoch": 1.5199034981905912, + "grad_norm": 1.5099450872277451, + "learning_rate": 5.753910461365803e-06, + "loss": 0.8226, + "step": 38430 + }, + { + "epoch": 1.5202989974094803, + "grad_norm": 1.4031260466275093, + "learning_rate": 5.751635695166506e-06, + "loss": 0.8405, + "step": 38440 + }, + { + "epoch": 1.5206944966283693, + "grad_norm": 1.487364272797396, + "learning_rate": 5.749360769783646e-06, + "loss": 0.7901, + "step": 38450 + }, + { + "epoch": 1.5210899958472583, + "grad_norm": 1.3330407414576706, + "learning_rate": 5.747085685699014e-06, + "loss": 0.8431, + "step": 38460 + }, + { + "epoch": 1.5214854950661474, + "grad_norm": 1.293191964983223, + "learning_rate": 5.744810443394433e-06, + "loss": 0.8364, + "step": 38470 + }, + { + "epoch": 1.5218809942850364, + "grad_norm": 1.2398317086987976, + "learning_rate": 5.74253504335176e-06, + "loss": 0.8296, + "step": 38480 + }, + { + "epoch": 1.5222764935039255, + "grad_norm": 1.3165726996602467, + "learning_rate": 5.740259486052885e-06, + "loss": 0.8137, + "step": 38490 + }, + { + "epoch": 1.5226719927228145, + "grad_norm": 1.5773461675449112, + "learning_rate": 5.737983771979735e-06, + "loss": 0.8151, + "step": 38500 + }, + { + "epoch": 1.5230674919417035, + "grad_norm": 1.5939505912160084, + "learning_rate": 5.735707901614265e-06, + "loss": 0.8113, + "step": 38510 + }, + { + "epoch": 1.5234629911605926, + "grad_norm": 1.152646501145914, + "learning_rate": 5.733431875438465e-06, + "loss": 0.8243, + "step": 38520 + }, + { + "epoch": 1.5238584903794816, + "grad_norm": 1.4188531112423826, + "learning_rate": 5.731155693934358e-06, + "loss": 0.8408, + "step": 38530 + }, + { + "epoch": 1.5242539895983707, + "grad_norm": 1.3942520359681672, + "learning_rate": 5.728879357584003e-06, + "loss": 0.8056, + "step": 38540 + }, + { + "epoch": 1.5246494888172597, + "grad_norm": 1.3777318987014022, + "learning_rate": 5.7266028668694865e-06, + "loss": 0.8124, + "step": 38550 + }, + { + "epoch": 1.5250449880361487, + "grad_norm": 1.274104693710163, + "learning_rate": 5.724326222272933e-06, + "loss": 0.8262, + "step": 38560 + }, + { + "epoch": 1.5254404872550378, + "grad_norm": 1.2887173676796957, + "learning_rate": 5.722049424276492e-06, + "loss": 0.8273, + "step": 38570 + }, + { + "epoch": 1.5258359864739268, + "grad_norm": 1.1666885061187304, + "learning_rate": 5.719772473362355e-06, + "loss": 0.8394, + "step": 38580 + }, + { + "epoch": 1.5262314856928159, + "grad_norm": 1.3426124384217368, + "learning_rate": 5.717495370012739e-06, + "loss": 0.8173, + "step": 38590 + }, + { + "epoch": 1.526626984911705, + "grad_norm": 1.2059245551634135, + "learning_rate": 5.715218114709895e-06, + "loss": 0.8444, + "step": 38600 + }, + { + "epoch": 1.527022484130594, + "grad_norm": 1.2570956074149853, + "learning_rate": 5.712940707936109e-06, + "loss": 0.827, + "step": 38610 + }, + { + "epoch": 1.527417983349483, + "grad_norm": 1.2693911755556908, + "learning_rate": 5.710663150173696e-06, + "loss": 0.818, + "step": 38620 + }, + { + "epoch": 1.527813482568372, + "grad_norm": 1.2997327068177411, + "learning_rate": 5.708385441905001e-06, + "loss": 0.8258, + "step": 38630 + }, + { + "epoch": 1.528208981787261, + "grad_norm": 1.342851020895316, + "learning_rate": 5.706107583612406e-06, + "loss": 0.8016, + "step": 38640 + }, + { + "epoch": 1.52860448100615, + "grad_norm": 1.1774531878378258, + "learning_rate": 5.703829575778322e-06, + "loss": 0.8402, + "step": 38650 + }, + { + "epoch": 1.5289999802250391, + "grad_norm": 1.278433876473585, + "learning_rate": 5.701551418885192e-06, + "loss": 0.8189, + "step": 38660 + }, + { + "epoch": 1.5293954794439282, + "grad_norm": 1.68305763879144, + "learning_rate": 5.6992731134154925e-06, + "loss": 0.7945, + "step": 38670 + }, + { + "epoch": 1.5297909786628172, + "grad_norm": 1.5358066405040551, + "learning_rate": 5.6969946598517256e-06, + "loss": 0.8316, + "step": 38680 + }, + { + "epoch": 1.5301864778817063, + "grad_norm": 1.4941027103869715, + "learning_rate": 5.6947160586764315e-06, + "loss": 0.8441, + "step": 38690 + }, + { + "epoch": 1.5305819771005953, + "grad_norm": 1.5287196036694184, + "learning_rate": 5.692437310372179e-06, + "loss": 0.8315, + "step": 38700 + }, + { + "epoch": 1.5309774763194843, + "grad_norm": 1.3015179121246494, + "learning_rate": 5.690158415421565e-06, + "loss": 0.82, + "step": 38710 + }, + { + "epoch": 1.5313729755383734, + "grad_norm": 1.3168439931327458, + "learning_rate": 5.687879374307223e-06, + "loss": 0.824, + "step": 38720 + }, + { + "epoch": 1.5317684747572624, + "grad_norm": 1.2524563671976618, + "learning_rate": 5.685600187511815e-06, + "loss": 0.8358, + "step": 38730 + }, + { + "epoch": 1.5321639739761514, + "grad_norm": 1.602970456884873, + "learning_rate": 5.683320855518034e-06, + "loss": 0.8079, + "step": 38740 + }, + { + "epoch": 1.5325594731950405, + "grad_norm": 1.087151182620091, + "learning_rate": 5.681041378808602e-06, + "loss": 0.8203, + "step": 38750 + }, + { + "epoch": 1.5329549724139295, + "grad_norm": 1.5933289063697031, + "learning_rate": 5.678761757866273e-06, + "loss": 0.7905, + "step": 38760 + }, + { + "epoch": 1.5333504716328186, + "grad_norm": 1.571698507898866, + "learning_rate": 5.676481993173832e-06, + "loss": 0.824, + "step": 38770 + }, + { + "epoch": 1.5337459708517076, + "grad_norm": 1.3836422282780558, + "learning_rate": 5.674202085214096e-06, + "loss": 0.822, + "step": 38780 + }, + { + "epoch": 1.5341414700705966, + "grad_norm": 1.3477182619705774, + "learning_rate": 5.6719220344699076e-06, + "loss": 0.8098, + "step": 38790 + }, + { + "epoch": 1.5345369692894857, + "grad_norm": 1.6234631116214455, + "learning_rate": 5.669641841424145e-06, + "loss": 0.819, + "step": 38800 + }, + { + "epoch": 1.5349324685083747, + "grad_norm": 1.3631566579980516, + "learning_rate": 5.667361506559712e-06, + "loss": 0.8222, + "step": 38810 + }, + { + "epoch": 1.5353279677272638, + "grad_norm": 1.225113445298731, + "learning_rate": 5.6650810303595445e-06, + "loss": 0.8276, + "step": 38820 + }, + { + "epoch": 1.5357234669461528, + "grad_norm": 1.2518426705100616, + "learning_rate": 5.662800413306611e-06, + "loss": 0.7989, + "step": 38830 + }, + { + "epoch": 1.5361189661650418, + "grad_norm": 1.454598850463559, + "learning_rate": 5.6605196558839035e-06, + "loss": 0.8169, + "step": 38840 + }, + { + "epoch": 1.5365144653839309, + "grad_norm": 1.4120882843212048, + "learning_rate": 5.658238758574451e-06, + "loss": 0.7972, + "step": 38850 + }, + { + "epoch": 1.53690996460282, + "grad_norm": 1.4655033143313787, + "learning_rate": 5.655957721861305e-06, + "loss": 0.8293, + "step": 38860 + }, + { + "epoch": 1.537305463821709, + "grad_norm": 1.1867759851838549, + "learning_rate": 5.653676546227551e-06, + "loss": 0.8394, + "step": 38870 + }, + { + "epoch": 1.537700963040598, + "grad_norm": 1.3861241414366707, + "learning_rate": 5.651395232156305e-06, + "loss": 0.828, + "step": 38880 + }, + { + "epoch": 1.538096462259487, + "grad_norm": 1.2570524199306334, + "learning_rate": 5.649113780130708e-06, + "loss": 0.8086, + "step": 38890 + }, + { + "epoch": 1.538491961478376, + "grad_norm": 1.2338247393546447, + "learning_rate": 5.646832190633933e-06, + "loss": 0.8104, + "step": 38900 + }, + { + "epoch": 1.5388874606972651, + "grad_norm": 1.4890857361789498, + "learning_rate": 5.644550464149181e-06, + "loss": 0.808, + "step": 38910 + }, + { + "epoch": 1.5392829599161542, + "grad_norm": 1.745385794419566, + "learning_rate": 5.642268601159684e-06, + "loss": 0.8093, + "step": 38920 + }, + { + "epoch": 1.5396784591350432, + "grad_norm": 1.307365814179379, + "learning_rate": 5.639986602148701e-06, + "loss": 0.7943, + "step": 38930 + }, + { + "epoch": 1.5400739583539322, + "grad_norm": 1.5926438350605667, + "learning_rate": 5.637704467599519e-06, + "loss": 0.8178, + "step": 38940 + }, + { + "epoch": 1.5404694575728213, + "grad_norm": 1.1807889793922797, + "learning_rate": 5.635422197995457e-06, + "loss": 0.8034, + "step": 38950 + }, + { + "epoch": 1.5408649567917103, + "grad_norm": 1.2122033798033789, + "learning_rate": 5.6331397938198594e-06, + "loss": 0.8103, + "step": 38960 + }, + { + "epoch": 1.5412604560105994, + "grad_norm": 1.5957206217105713, + "learning_rate": 5.6308572555561e-06, + "loss": 0.7969, + "step": 38970 + }, + { + "epoch": 1.5416559552294884, + "grad_norm": 1.0827701728754713, + "learning_rate": 5.628574583687582e-06, + "loss": 0.836, + "step": 38980 + }, + { + "epoch": 1.5420514544483774, + "grad_norm": 1.3355434796487196, + "learning_rate": 5.626291778697737e-06, + "loss": 0.8238, + "step": 38990 + }, + { + "epoch": 1.5424469536672665, + "grad_norm": 1.1769896925261032, + "learning_rate": 5.6240088410700226e-06, + "loss": 0.8471, + "step": 39000 + }, + { + "epoch": 1.5428424528861555, + "grad_norm": 1.2407091295908643, + "learning_rate": 5.6217257712879254e-06, + "loss": 0.8281, + "step": 39010 + }, + { + "epoch": 1.5432379521050446, + "grad_norm": 1.3636080737320468, + "learning_rate": 5.6194425698349615e-06, + "loss": 0.8096, + "step": 39020 + }, + { + "epoch": 1.5436334513239336, + "grad_norm": 1.2245634746187997, + "learning_rate": 5.617159237194675e-06, + "loss": 0.7887, + "step": 39030 + }, + { + "epoch": 1.5440289505428226, + "grad_norm": 1.2467842244493976, + "learning_rate": 5.614875773850633e-06, + "loss": 0.8041, + "step": 39040 + }, + { + "epoch": 1.5444244497617117, + "grad_norm": 1.0935122439457097, + "learning_rate": 5.612592180286439e-06, + "loss": 0.825, + "step": 39050 + }, + { + "epoch": 1.5448199489806007, + "grad_norm": 1.239647065182398, + "learning_rate": 5.610308456985716e-06, + "loss": 0.8227, + "step": 39060 + }, + { + "epoch": 1.5452154481994897, + "grad_norm": 1.482679233583797, + "learning_rate": 5.608024604432117e-06, + "loss": 0.7949, + "step": 39070 + }, + { + "epoch": 1.5456109474183788, + "grad_norm": 1.3375920596273634, + "learning_rate": 5.605740623109322e-06, + "loss": 0.7955, + "step": 39080 + }, + { + "epoch": 1.5460064466372678, + "grad_norm": 1.4103959220782802, + "learning_rate": 5.603456513501042e-06, + "loss": 0.7977, + "step": 39090 + }, + { + "epoch": 1.5464019458561569, + "grad_norm": 1.1429661050424258, + "learning_rate": 5.60117227609101e-06, + "loss": 0.8235, + "step": 39100 + }, + { + "epoch": 1.546797445075046, + "grad_norm": 1.1668869977872303, + "learning_rate": 5.598887911362992e-06, + "loss": 0.8057, + "step": 39110 + }, + { + "epoch": 1.547192944293935, + "grad_norm": 1.1816970950651535, + "learning_rate": 5.596603419800772e-06, + "loss": 0.8402, + "step": 39120 + }, + { + "epoch": 1.547588443512824, + "grad_norm": 1.632450542651483, + "learning_rate": 5.59431880188817e-06, + "loss": 0.8025, + "step": 39130 + }, + { + "epoch": 1.547983942731713, + "grad_norm": 1.2093192572076203, + "learning_rate": 5.59203405810903e-06, + "loss": 0.8135, + "step": 39140 + }, + { + "epoch": 1.548379441950602, + "grad_norm": 1.2139579886991492, + "learning_rate": 5.589749188947216e-06, + "loss": 0.8009, + "step": 39150 + }, + { + "epoch": 1.548774941169491, + "grad_norm": 1.3890540832478753, + "learning_rate": 5.587464194886628e-06, + "loss": 0.8041, + "step": 39160 + }, + { + "epoch": 1.5491704403883801, + "grad_norm": 1.2213136005106913, + "learning_rate": 5.585179076411189e-06, + "loss": 0.8455, + "step": 39170 + }, + { + "epoch": 1.5495659396072692, + "grad_norm": 1.752192437863592, + "learning_rate": 5.5828938340048465e-06, + "loss": 0.8037, + "step": 39180 + }, + { + "epoch": 1.5499614388261582, + "grad_norm": 1.1913841083070977, + "learning_rate": 5.580608468151576e-06, + "loss": 0.828, + "step": 39190 + }, + { + "epoch": 1.5503569380450473, + "grad_norm": 1.3050077075471247, + "learning_rate": 5.5783229793353785e-06, + "loss": 0.819, + "step": 39200 + }, + { + "epoch": 1.5507524372639363, + "grad_norm": 1.470225449093407, + "learning_rate": 5.576037368040282e-06, + "loss": 0.8058, + "step": 39210 + }, + { + "epoch": 1.5511479364828253, + "grad_norm": 1.2350903834665412, + "learning_rate": 5.57375163475034e-06, + "loss": 0.8328, + "step": 39220 + }, + { + "epoch": 1.5515434357017144, + "grad_norm": 1.21422056049844, + "learning_rate": 5.571465779949633e-06, + "loss": 0.8091, + "step": 39230 + }, + { + "epoch": 1.5519389349206034, + "grad_norm": 1.4081869618346856, + "learning_rate": 5.569179804122263e-06, + "loss": 0.8257, + "step": 39240 + }, + { + "epoch": 1.5523344341394925, + "grad_norm": 1.363848958047758, + "learning_rate": 5.566893707752362e-06, + "loss": 0.7901, + "step": 39250 + }, + { + "epoch": 1.5527299333583815, + "grad_norm": 1.7840250259116874, + "learning_rate": 5.564607491324085e-06, + "loss": 0.8223, + "step": 39260 + }, + { + "epoch": 1.5531254325772705, + "grad_norm": 1.2380626087172715, + "learning_rate": 5.562321155321615e-06, + "loss": 0.8024, + "step": 39270 + }, + { + "epoch": 1.5535209317961596, + "grad_norm": 1.43218123769, + "learning_rate": 5.560034700229157e-06, + "loss": 0.8238, + "step": 39280 + }, + { + "epoch": 1.5539164310150486, + "grad_norm": 1.3984581186758427, + "learning_rate": 5.557748126530946e-06, + "loss": 0.7966, + "step": 39290 + }, + { + "epoch": 1.5543119302339377, + "grad_norm": 1.472340457391162, + "learning_rate": 5.555461434711237e-06, + "loss": 0.8256, + "step": 39300 + }, + { + "epoch": 1.5547074294528267, + "grad_norm": 1.4758825007263823, + "learning_rate": 5.553174625254312e-06, + "loss": 0.8007, + "step": 39310 + }, + { + "epoch": 1.5551029286717157, + "grad_norm": 1.2369053120366102, + "learning_rate": 5.5508876986444774e-06, + "loss": 0.803, + "step": 39320 + }, + { + "epoch": 1.5554984278906048, + "grad_norm": 1.2940420396551047, + "learning_rate": 5.5486006553660665e-06, + "loss": 0.8045, + "step": 39330 + }, + { + "epoch": 1.5558939271094938, + "grad_norm": 1.1060473545686946, + "learning_rate": 5.546313495903436e-06, + "loss": 0.8153, + "step": 39340 + }, + { + "epoch": 1.5562894263283829, + "grad_norm": 1.2265893512972266, + "learning_rate": 5.544026220740968e-06, + "loss": 0.8311, + "step": 39350 + }, + { + "epoch": 1.556684925547272, + "grad_norm": 1.6550990508506043, + "learning_rate": 5.541738830363065e-06, + "loss": 0.8236, + "step": 39360 + }, + { + "epoch": 1.557080424766161, + "grad_norm": 1.1458236898666863, + "learning_rate": 5.5394513252541594e-06, + "loss": 0.8388, + "step": 39370 + }, + { + "epoch": 1.55747592398505, + "grad_norm": 1.495186838260878, + "learning_rate": 5.5371637058987046e-06, + "loss": 0.8086, + "step": 39380 + }, + { + "epoch": 1.557871423203939, + "grad_norm": 1.4717040694086956, + "learning_rate": 5.534875972781181e-06, + "loss": 0.8225, + "step": 39390 + }, + { + "epoch": 1.558266922422828, + "grad_norm": 1.2305717977973327, + "learning_rate": 5.532588126386088e-06, + "loss": 0.8348, + "step": 39400 + }, + { + "epoch": 1.5586624216417173, + "grad_norm": 1.4143999990427574, + "learning_rate": 5.530300167197955e-06, + "loss": 0.8165, + "step": 39410 + }, + { + "epoch": 1.5590579208606063, + "grad_norm": 1.443370757375486, + "learning_rate": 5.528012095701334e-06, + "loss": 0.8036, + "step": 39420 + }, + { + "epoch": 1.5594534200794954, + "grad_norm": 1.4501652394376046, + "learning_rate": 5.525723912380794e-06, + "loss": 0.8091, + "step": 39430 + }, + { + "epoch": 1.5598489192983844, + "grad_norm": 1.338755523797807, + "learning_rate": 5.523435617720937e-06, + "loss": 0.7694, + "step": 39440 + }, + { + "epoch": 1.5602444185172735, + "grad_norm": 1.2820511550016698, + "learning_rate": 5.521147212206385e-06, + "loss": 0.8227, + "step": 39450 + }, + { + "epoch": 1.5606399177361625, + "grad_norm": 1.5250704885892234, + "learning_rate": 5.518858696321781e-06, + "loss": 0.7936, + "step": 39460 + }, + { + "epoch": 1.5610354169550515, + "grad_norm": 1.2487698246875318, + "learning_rate": 5.516570070551794e-06, + "loss": 0.8252, + "step": 39470 + }, + { + "epoch": 1.5614309161739406, + "grad_norm": 1.3328166987955157, + "learning_rate": 5.514281335381116e-06, + "loss": 0.8287, + "step": 39480 + }, + { + "epoch": 1.5618264153928296, + "grad_norm": 1.42088727589414, + "learning_rate": 5.511992491294462e-06, + "loss": 0.8079, + "step": 39490 + }, + { + "epoch": 1.5622219146117187, + "grad_norm": 1.4988100766902124, + "learning_rate": 5.509703538776571e-06, + "loss": 0.8082, + "step": 39500 + }, + { + "epoch": 1.5626174138306077, + "grad_norm": 1.3833842461176957, + "learning_rate": 5.5074144783122005e-06, + "loss": 0.8321, + "step": 39510 + }, + { + "epoch": 1.5630129130494967, + "grad_norm": 1.464308309676638, + "learning_rate": 5.505125310386138e-06, + "loss": 0.8123, + "step": 39520 + }, + { + "epoch": 1.5634084122683858, + "grad_norm": 1.4910036244046405, + "learning_rate": 5.502836035483189e-06, + "loss": 0.8427, + "step": 39530 + }, + { + "epoch": 1.5638039114872748, + "grad_norm": 1.4540376600595513, + "learning_rate": 5.500546654088184e-06, + "loss": 0.8087, + "step": 39540 + }, + { + "epoch": 1.5641994107061639, + "grad_norm": 1.1440511797426696, + "learning_rate": 5.49825716668597e-06, + "loss": 0.8393, + "step": 39550 + }, + { + "epoch": 1.564594909925053, + "grad_norm": 1.4075786996648874, + "learning_rate": 5.495967573761425e-06, + "loss": 0.7981, + "step": 39560 + }, + { + "epoch": 1.564990409143942, + "grad_norm": 1.3099595994992026, + "learning_rate": 5.493677875799446e-06, + "loss": 0.8222, + "step": 39570 + }, + { + "epoch": 1.565385908362831, + "grad_norm": 1.2218317511865187, + "learning_rate": 5.491388073284951e-06, + "loss": 0.8061, + "step": 39580 + }, + { + "epoch": 1.56578140758172, + "grad_norm": 1.206393200809857, + "learning_rate": 5.48909816670288e-06, + "loss": 0.8226, + "step": 39590 + }, + { + "epoch": 1.566176906800609, + "grad_norm": 1.3708673103211968, + "learning_rate": 5.4868081565381955e-06, + "loss": 0.8078, + "step": 39600 + }, + { + "epoch": 1.566572406019498, + "grad_norm": 1.4120005717905835, + "learning_rate": 5.484518043275885e-06, + "loss": 0.8337, + "step": 39610 + }, + { + "epoch": 1.5669679052383871, + "grad_norm": 1.4600630693904642, + "learning_rate": 5.482227827400953e-06, + "loss": 0.8068, + "step": 39620 + }, + { + "epoch": 1.5673634044572762, + "grad_norm": 1.670653620704324, + "learning_rate": 5.4799375093984285e-06, + "loss": 0.8004, + "step": 39630 + }, + { + "epoch": 1.5677589036761652, + "grad_norm": 1.1515165217257826, + "learning_rate": 5.477647089753363e-06, + "loss": 0.8158, + "step": 39640 + }, + { + "epoch": 1.5681544028950543, + "grad_norm": 1.300203640787178, + "learning_rate": 5.475356568950826e-06, + "loss": 0.8155, + "step": 39650 + }, + { + "epoch": 1.5685499021139433, + "grad_norm": 1.5027638851013374, + "learning_rate": 5.473065947475913e-06, + "loss": 0.81, + "step": 39660 + }, + { + "epoch": 1.5689454013328323, + "grad_norm": 1.12842357234813, + "learning_rate": 5.470775225813736e-06, + "loss": 0.8334, + "step": 39670 + }, + { + "epoch": 1.5693409005517214, + "grad_norm": 1.3292738681420568, + "learning_rate": 5.4684844044494314e-06, + "loss": 0.8316, + "step": 39680 + }, + { + "epoch": 1.5697363997706104, + "grad_norm": 1.495904681940552, + "learning_rate": 5.466193483868155e-06, + "loss": 0.827, + "step": 39690 + }, + { + "epoch": 1.5701318989894995, + "grad_norm": 1.4214349460061635, + "learning_rate": 5.463902464555088e-06, + "loss": 0.7888, + "step": 39700 + }, + { + "epoch": 1.5705273982083885, + "grad_norm": 1.599978081555621, + "learning_rate": 5.4616113469954245e-06, + "loss": 0.7827, + "step": 39710 + }, + { + "epoch": 1.5709228974272775, + "grad_norm": 1.0861151853314188, + "learning_rate": 5.459320131674388e-06, + "loss": 0.8268, + "step": 39720 + }, + { + "epoch": 1.5713183966461666, + "grad_norm": 1.1588730546634205, + "learning_rate": 5.4570288190772156e-06, + "loss": 0.8226, + "step": 39730 + }, + { + "epoch": 1.5717138958650558, + "grad_norm": 1.3819229872228296, + "learning_rate": 5.454737409689169e-06, + "loss": 0.7843, + "step": 39740 + }, + { + "epoch": 1.5721093950839449, + "grad_norm": 1.2112121186132216, + "learning_rate": 5.45244590399553e-06, + "loss": 0.8125, + "step": 39750 + }, + { + "epoch": 1.572504894302834, + "grad_norm": 1.1913765900022304, + "learning_rate": 5.4501543024816005e-06, + "loss": 0.8208, + "step": 39760 + }, + { + "epoch": 1.572900393521723, + "grad_norm": 1.5073803555928857, + "learning_rate": 5.447862605632701e-06, + "loss": 0.7918, + "step": 39770 + }, + { + "epoch": 1.573295892740612, + "grad_norm": 1.221168009029546, + "learning_rate": 5.4455708139341764e-06, + "loss": 0.8189, + "step": 39780 + }, + { + "epoch": 1.573691391959501, + "grad_norm": 1.307264926017239, + "learning_rate": 5.443278927871385e-06, + "loss": 0.8012, + "step": 39790 + }, + { + "epoch": 1.57408689117839, + "grad_norm": 1.1350453704583348, + "learning_rate": 5.440986947929712e-06, + "loss": 0.7895, + "step": 39800 + }, + { + "epoch": 1.574482390397279, + "grad_norm": 1.3002718909237414, + "learning_rate": 5.438694874594558e-06, + "loss": 0.8162, + "step": 39810 + }, + { + "epoch": 1.5748778896161681, + "grad_norm": 1.5794201658229785, + "learning_rate": 5.436402708351346e-06, + "loss": 0.7872, + "step": 39820 + }, + { + "epoch": 1.5752733888350572, + "grad_norm": 1.2507161420546935, + "learning_rate": 5.434110449685517e-06, + "loss": 0.8193, + "step": 39830 + }, + { + "epoch": 1.5756688880539462, + "grad_norm": 1.1983513610804764, + "learning_rate": 5.431818099082532e-06, + "loss": 0.7852, + "step": 39840 + }, + { + "epoch": 1.5760643872728353, + "grad_norm": 1.4340925872168722, + "learning_rate": 5.429525657027871e-06, + "loss": 0.8006, + "step": 39850 + }, + { + "epoch": 1.5764598864917243, + "grad_norm": 1.4561711851947867, + "learning_rate": 5.427233124007034e-06, + "loss": 0.7818, + "step": 39860 + }, + { + "epoch": 1.5768553857106133, + "grad_norm": 1.3427937770208374, + "learning_rate": 5.424940500505541e-06, + "loss": 0.8214, + "step": 39870 + }, + { + "epoch": 1.5772508849295024, + "grad_norm": 1.424175563943074, + "learning_rate": 5.42264778700893e-06, + "loss": 0.7879, + "step": 39880 + }, + { + "epoch": 1.5776463841483914, + "grad_norm": 1.2015772096184594, + "learning_rate": 5.420354984002759e-06, + "loss": 0.8103, + "step": 39890 + }, + { + "epoch": 1.5780418833672805, + "grad_norm": 1.6775521195256204, + "learning_rate": 5.418062091972604e-06, + "loss": 0.8057, + "step": 39900 + }, + { + "epoch": 1.5784373825861695, + "grad_norm": 1.4559484766066624, + "learning_rate": 5.415769111404061e-06, + "loss": 0.8121, + "step": 39910 + }, + { + "epoch": 1.5788328818050585, + "grad_norm": 1.4466132892633963, + "learning_rate": 5.413476042782742e-06, + "loss": 0.8239, + "step": 39920 + }, + { + "epoch": 1.5792283810239476, + "grad_norm": 1.4088345948193022, + "learning_rate": 5.4111828865942825e-06, + "loss": 0.7919, + "step": 39930 + }, + { + "epoch": 1.5796238802428366, + "grad_norm": 1.3034516680823378, + "learning_rate": 5.408889643324331e-06, + "loss": 0.824, + "step": 39940 + }, + { + "epoch": 1.5800193794617257, + "grad_norm": 1.4245272053754119, + "learning_rate": 5.406596313458558e-06, + "loss": 0.808, + "step": 39950 + }, + { + "epoch": 1.5804148786806147, + "grad_norm": 1.4489805746654498, + "learning_rate": 5.404302897482652e-06, + "loss": 0.821, + "step": 39960 + }, + { + "epoch": 1.5808103778995037, + "grad_norm": 1.2340763438717153, + "learning_rate": 5.402009395882319e-06, + "loss": 0.8279, + "step": 39970 + }, + { + "epoch": 1.5812058771183928, + "grad_norm": 1.252012477117537, + "learning_rate": 5.3997158091432835e-06, + "loss": 0.8126, + "step": 39980 + }, + { + "epoch": 1.5816013763372818, + "grad_norm": 1.3248169842244182, + "learning_rate": 5.397422137751287e-06, + "loss": 0.8099, + "step": 39990 + }, + { + "epoch": 1.5819968755561709, + "grad_norm": 1.3475216947297723, + "learning_rate": 5.395128382192091e-06, + "loss": 0.8102, + "step": 40000 + }, + { + "epoch": 1.58239237477506, + "grad_norm": 1.1762188778251867, + "learning_rate": 5.392834542951472e-06, + "loss": 0.8142, + "step": 40010 + }, + { + "epoch": 1.582787873993949, + "grad_norm": 1.2710464444756748, + "learning_rate": 5.390540620515229e-06, + "loss": 0.7998, + "step": 40020 + }, + { + "epoch": 1.583183373212838, + "grad_norm": 1.3264818674691536, + "learning_rate": 5.388246615369171e-06, + "loss": 0.8138, + "step": 40030 + }, + { + "epoch": 1.583578872431727, + "grad_norm": 1.4441310045795963, + "learning_rate": 5.385952527999132e-06, + "loss": 0.8053, + "step": 40040 + }, + { + "epoch": 1.583974371650616, + "grad_norm": 1.2704095797848778, + "learning_rate": 5.3836583588909615e-06, + "loss": 0.8165, + "step": 40050 + }, + { + "epoch": 1.584369870869505, + "grad_norm": 1.338339622896242, + "learning_rate": 5.381364108530523e-06, + "loss": 0.799, + "step": 40060 + }, + { + "epoch": 1.5847653700883941, + "grad_norm": 1.3619915388159567, + "learning_rate": 5.379069777403698e-06, + "loss": 0.7947, + "step": 40070 + }, + { + "epoch": 1.5851608693072832, + "grad_norm": 1.5434712344785235, + "learning_rate": 5.37677536599639e-06, + "loss": 0.7803, + "step": 40080 + }, + { + "epoch": 1.5855563685261722, + "grad_norm": 1.2760463512195508, + "learning_rate": 5.374480874794514e-06, + "loss": 0.8089, + "step": 40090 + }, + { + "epoch": 1.5859518677450613, + "grad_norm": 1.553780124801252, + "learning_rate": 5.372186304284005e-06, + "loss": 0.8367, + "step": 40100 + }, + { + "epoch": 1.5863473669639503, + "grad_norm": 1.2886073410975674, + "learning_rate": 5.369891654950812e-06, + "loss": 0.7911, + "step": 40110 + }, + { + "epoch": 1.5867428661828393, + "grad_norm": 1.5419804686856209, + "learning_rate": 5.367596927280904e-06, + "loss": 0.7809, + "step": 40120 + }, + { + "epoch": 1.5871383654017284, + "grad_norm": 1.654616091305112, + "learning_rate": 5.365302121760264e-06, + "loss": 0.8076, + "step": 40130 + }, + { + "epoch": 1.5875338646206174, + "grad_norm": 1.2812944189204107, + "learning_rate": 5.363007238874895e-06, + "loss": 0.8074, + "step": 40140 + }, + { + "epoch": 1.5879293638395064, + "grad_norm": 1.3803435979578003, + "learning_rate": 5.36071227911081e-06, + "loss": 0.8003, + "step": 40150 + }, + { + "epoch": 1.5883248630583955, + "grad_norm": 1.5265592135017254, + "learning_rate": 5.358417242954045e-06, + "loss": 0.7965, + "step": 40160 + }, + { + "epoch": 1.5887203622772845, + "grad_norm": 1.3098743695519768, + "learning_rate": 5.356122130890647e-06, + "loss": 0.8268, + "step": 40170 + }, + { + "epoch": 1.5891158614961736, + "grad_norm": 1.2590320720747743, + "learning_rate": 5.3538269434066846e-06, + "loss": 0.8503, + "step": 40180 + }, + { + "epoch": 1.5895113607150626, + "grad_norm": 1.3319176043086327, + "learning_rate": 5.351531680988237e-06, + "loss": 0.8192, + "step": 40190 + }, + { + "epoch": 1.5899068599339516, + "grad_norm": 1.3214017728416376, + "learning_rate": 5.349236344121401e-06, + "loss": 0.8023, + "step": 40200 + }, + { + "epoch": 1.5903023591528407, + "grad_norm": 1.4876331372802387, + "learning_rate": 5.346940933292291e-06, + "loss": 0.8069, + "step": 40210 + }, + { + "epoch": 1.5906978583717297, + "grad_norm": 1.4313982345901055, + "learning_rate": 5.344645448987036e-06, + "loss": 0.8192, + "step": 40220 + }, + { + "epoch": 1.5910933575906188, + "grad_norm": 1.2245650169287001, + "learning_rate": 5.342349891691778e-06, + "loss": 0.8115, + "step": 40230 + }, + { + "epoch": 1.5914888568095078, + "grad_norm": 1.5035978513353114, + "learning_rate": 5.3400542618926795e-06, + "loss": 0.7973, + "step": 40240 + }, + { + "epoch": 1.5918843560283968, + "grad_norm": 1.3267528883074913, + "learning_rate": 5.337758560075913e-06, + "loss": 0.7738, + "step": 40250 + }, + { + "epoch": 1.5922798552472859, + "grad_norm": 1.3623002922820524, + "learning_rate": 5.335462786727672e-06, + "loss": 0.8245, + "step": 40260 + }, + { + "epoch": 1.592675354466175, + "grad_norm": 1.1705826085042157, + "learning_rate": 5.3331669423341585e-06, + "loss": 0.7871, + "step": 40270 + }, + { + "epoch": 1.593070853685064, + "grad_norm": 1.2336945135349124, + "learning_rate": 5.330871027381594e-06, + "loss": 0.8137, + "step": 40280 + }, + { + "epoch": 1.593466352903953, + "grad_norm": 1.2116229779499763, + "learning_rate": 5.3285750423562155e-06, + "loss": 0.8062, + "step": 40290 + }, + { + "epoch": 1.593861852122842, + "grad_norm": 1.7707626236067646, + "learning_rate": 5.326278987744272e-06, + "loss": 0.8158, + "step": 40300 + }, + { + "epoch": 1.594257351341731, + "grad_norm": 1.3341719764154734, + "learning_rate": 5.323982864032028e-06, + "loss": 0.8073, + "step": 40310 + }, + { + "epoch": 1.5946528505606201, + "grad_norm": 1.3027250424657377, + "learning_rate": 5.321686671705765e-06, + "loss": 0.8092, + "step": 40320 + }, + { + "epoch": 1.5950483497795092, + "grad_norm": 1.332325718292871, + "learning_rate": 5.319390411251776e-06, + "loss": 0.8107, + "step": 40330 + }, + { + "epoch": 1.5954438489983982, + "grad_norm": 1.4501848648546483, + "learning_rate": 5.317094083156369e-06, + "loss": 0.7978, + "step": 40340 + }, + { + "epoch": 1.5958393482172872, + "grad_norm": 1.354568540251385, + "learning_rate": 5.314797687905868e-06, + "loss": 0.8059, + "step": 40350 + }, + { + "epoch": 1.5962348474361763, + "grad_norm": 1.4799730802658775, + "learning_rate": 5.31250122598661e-06, + "loss": 0.7823, + "step": 40360 + }, + { + "epoch": 1.5966303466550653, + "grad_norm": 1.3690694929723226, + "learning_rate": 5.310204697884945e-06, + "loss": 0.8095, + "step": 40370 + }, + { + "epoch": 1.5970258458739544, + "grad_norm": 1.3694042457107873, + "learning_rate": 5.307908104087241e-06, + "loss": 0.8236, + "step": 40380 + }, + { + "epoch": 1.5974213450928434, + "grad_norm": 1.2863554246821167, + "learning_rate": 5.305611445079875e-06, + "loss": 0.8159, + "step": 40390 + }, + { + "epoch": 1.5978168443117324, + "grad_norm": 1.418322845417851, + "learning_rate": 5.303314721349242e-06, + "loss": 0.7964, + "step": 40400 + }, + { + "epoch": 1.5982123435306215, + "grad_norm": 1.4567542728239116, + "learning_rate": 5.301017933381745e-06, + "loss": 0.8127, + "step": 40410 + }, + { + "epoch": 1.5986078427495105, + "grad_norm": 1.5296405578181327, + "learning_rate": 5.298721081663809e-06, + "loss": 0.8236, + "step": 40420 + }, + { + "epoch": 1.5990033419683995, + "grad_norm": 1.3380548661247318, + "learning_rate": 5.296424166681865e-06, + "loss": 0.8047, + "step": 40430 + }, + { + "epoch": 1.5993988411872886, + "grad_norm": 1.6403274708926723, + "learning_rate": 5.294127188922361e-06, + "loss": 0.7651, + "step": 40440 + }, + { + "epoch": 1.5997943404061776, + "grad_norm": 1.396002379922597, + "learning_rate": 5.291830148871757e-06, + "loss": 0.8116, + "step": 40450 + }, + { + "epoch": 1.6001898396250667, + "grad_norm": 1.5885888554485912, + "learning_rate": 5.289533047016528e-06, + "loss": 0.8041, + "step": 40460 + }, + { + "epoch": 1.6005853388439557, + "grad_norm": 1.4895143411119116, + "learning_rate": 5.287235883843159e-06, + "loss": 0.802, + "step": 40470 + }, + { + "epoch": 1.6009808380628447, + "grad_norm": 1.4963376550456207, + "learning_rate": 5.2849386598381515e-06, + "loss": 0.8034, + "step": 40480 + }, + { + "epoch": 1.6013763372817338, + "grad_norm": 1.534133783545266, + "learning_rate": 5.282641375488018e-06, + "loss": 0.8292, + "step": 40490 + }, + { + "epoch": 1.6017718365006228, + "grad_norm": 1.3632980225984834, + "learning_rate": 5.280344031279282e-06, + "loss": 0.8215, + "step": 40500 + }, + { + "epoch": 1.6021673357195119, + "grad_norm": 1.4154749144285832, + "learning_rate": 5.278046627698483e-06, + "loss": 0.8128, + "step": 40510 + }, + { + "epoch": 1.602562834938401, + "grad_norm": 1.3333665684610525, + "learning_rate": 5.275749165232173e-06, + "loss": 0.8039, + "step": 40520 + }, + { + "epoch": 1.60295833415729, + "grad_norm": 1.5310751848501931, + "learning_rate": 5.273451644366913e-06, + "loss": 0.7967, + "step": 40530 + }, + { + "epoch": 1.603353833376179, + "grad_norm": 1.6498015241780608, + "learning_rate": 5.27115406558928e-06, + "loss": 0.8103, + "step": 40540 + }, + { + "epoch": 1.603749332595068, + "grad_norm": 1.4229058599630109, + "learning_rate": 5.2688564293858615e-06, + "loss": 0.8098, + "step": 40550 + }, + { + "epoch": 1.604144831813957, + "grad_norm": 1.531581110951035, + "learning_rate": 5.266558736243257e-06, + "loss": 0.8032, + "step": 40560 + }, + { + "epoch": 1.604540331032846, + "grad_norm": 1.4889568355933234, + "learning_rate": 5.264260986648079e-06, + "loss": 0.8007, + "step": 40570 + }, + { + "epoch": 1.6049358302517351, + "grad_norm": 1.309551517550046, + "learning_rate": 5.261963181086953e-06, + "loss": 0.8256, + "step": 40580 + }, + { + "epoch": 1.6053313294706242, + "grad_norm": 1.2697547210999371, + "learning_rate": 5.259665320046511e-06, + "loss": 0.8112, + "step": 40590 + }, + { + "epoch": 1.6057268286895132, + "grad_norm": 1.5415879654948754, + "learning_rate": 5.257367404013404e-06, + "loss": 0.8025, + "step": 40600 + }, + { + "epoch": 1.6061223279084023, + "grad_norm": 1.3342375567267324, + "learning_rate": 5.255069433474289e-06, + "loss": 0.8105, + "step": 40610 + }, + { + "epoch": 1.6065178271272913, + "grad_norm": 1.4440148664344064, + "learning_rate": 5.252771408915839e-06, + "loss": 0.7837, + "step": 40620 + }, + { + "epoch": 1.6069133263461803, + "grad_norm": 1.406281251365491, + "learning_rate": 5.2504733308247335e-06, + "loss": 0.7984, + "step": 40630 + }, + { + "epoch": 1.6073088255650694, + "grad_norm": 1.4063659622937021, + "learning_rate": 5.248175199687671e-06, + "loss": 0.8093, + "step": 40640 + }, + { + "epoch": 1.6077043247839584, + "grad_norm": 1.6771433662018442, + "learning_rate": 5.24587701599135e-06, + "loss": 0.8092, + "step": 40650 + }, + { + "epoch": 1.6080998240028475, + "grad_norm": 1.2120573884464543, + "learning_rate": 5.24357878022249e-06, + "loss": 0.8054, + "step": 40660 + }, + { + "epoch": 1.6084953232217365, + "grad_norm": 1.2195005934195515, + "learning_rate": 5.2412804928678175e-06, + "loss": 0.7845, + "step": 40670 + }, + { + "epoch": 1.6088908224406255, + "grad_norm": 1.4574078451411203, + "learning_rate": 5.238982154414071e-06, + "loss": 0.8252, + "step": 40680 + }, + { + "epoch": 1.6092863216595146, + "grad_norm": 1.3620365469514852, + "learning_rate": 5.236683765347997e-06, + "loss": 0.7976, + "step": 40690 + }, + { + "epoch": 1.6096818208784036, + "grad_norm": 1.3738660603992838, + "learning_rate": 5.234385326156358e-06, + "loss": 0.8261, + "step": 40700 + }, + { + "epoch": 1.6100773200972927, + "grad_norm": 1.3483139459114069, + "learning_rate": 5.232086837325921e-06, + "loss": 0.7827, + "step": 40710 + }, + { + "epoch": 1.6104728193161817, + "grad_norm": 1.3099654534445502, + "learning_rate": 5.229788299343469e-06, + "loss": 0.8153, + "step": 40720 + }, + { + "epoch": 1.6108683185350707, + "grad_norm": 1.333028879245161, + "learning_rate": 5.227489712695794e-06, + "loss": 0.8052, + "step": 40730 + }, + { + "epoch": 1.6112638177539598, + "grad_norm": 1.189247195036392, + "learning_rate": 5.225191077869692e-06, + "loss": 0.819, + "step": 40740 + }, + { + "epoch": 1.611659316972849, + "grad_norm": 1.4282237240003406, + "learning_rate": 5.222892395351979e-06, + "loss": 0.7846, + "step": 40750 + }, + { + "epoch": 1.612054816191738, + "grad_norm": 1.4333835930506296, + "learning_rate": 5.220593665629476e-06, + "loss": 0.8121, + "step": 40760 + }, + { + "epoch": 1.612450315410627, + "grad_norm": 1.2786809764135276, + "learning_rate": 5.218294889189012e-06, + "loss": 0.8151, + "step": 40770 + }, + { + "epoch": 1.6128458146295162, + "grad_norm": 1.46649829050338, + "learning_rate": 5.215996066517432e-06, + "loss": 0.8015, + "step": 40780 + }, + { + "epoch": 1.6132413138484052, + "grad_norm": 1.369463227511982, + "learning_rate": 5.213697198101584e-06, + "loss": 0.8091, + "step": 40790 + }, + { + "epoch": 1.6136368130672942, + "grad_norm": 1.6166295093265266, + "learning_rate": 5.211398284428331e-06, + "loss": 0.7918, + "step": 40800 + }, + { + "epoch": 1.6140323122861833, + "grad_norm": 1.4687774927015027, + "learning_rate": 5.2090993259845435e-06, + "loss": 0.7835, + "step": 40810 + }, + { + "epoch": 1.6144278115050723, + "grad_norm": 1.2612010247542904, + "learning_rate": 5.206800323257102e-06, + "loss": 0.7975, + "step": 40820 + }, + { + "epoch": 1.6148233107239613, + "grad_norm": 1.4430852737062814, + "learning_rate": 5.204501276732894e-06, + "loss": 0.8064, + "step": 40830 + }, + { + "epoch": 1.6152188099428504, + "grad_norm": 1.4229175577473279, + "learning_rate": 5.202202186898819e-06, + "loss": 0.8177, + "step": 40840 + }, + { + "epoch": 1.6156143091617394, + "grad_norm": 1.4340347986726354, + "learning_rate": 5.199903054241785e-06, + "loss": 0.8082, + "step": 40850 + }, + { + "epoch": 1.6160098083806285, + "grad_norm": 1.3192594531785078, + "learning_rate": 5.1976038792487086e-06, + "loss": 0.7978, + "step": 40860 + }, + { + "epoch": 1.6164053075995175, + "grad_norm": 1.4188118970579786, + "learning_rate": 5.195304662406516e-06, + "loss": 0.7886, + "step": 40870 + }, + { + "epoch": 1.6168008068184065, + "grad_norm": 1.3508353889334082, + "learning_rate": 5.1930054042021425e-06, + "loss": 0.7818, + "step": 40880 + }, + { + "epoch": 1.6171963060372956, + "grad_norm": 1.4638681954061876, + "learning_rate": 5.1907061051225315e-06, + "loss": 0.8128, + "step": 40890 + }, + { + "epoch": 1.6175918052561846, + "grad_norm": 1.459273468962672, + "learning_rate": 5.188406765654634e-06, + "loss": 0.806, + "step": 40900 + }, + { + "epoch": 1.6179873044750737, + "grad_norm": 1.44578084287467, + "learning_rate": 5.186107386285411e-06, + "loss": 0.7833, + "step": 40910 + }, + { + "epoch": 1.6183828036939627, + "grad_norm": 1.5753975152670685, + "learning_rate": 5.1838079675018315e-06, + "loss": 0.8028, + "step": 40920 + }, + { + "epoch": 1.6187783029128517, + "grad_norm": 1.422009266154339, + "learning_rate": 5.181508509790874e-06, + "loss": 0.8227, + "step": 40930 + }, + { + "epoch": 1.6191738021317408, + "grad_norm": 1.297346489643188, + "learning_rate": 5.179209013639526e-06, + "loss": 0.7827, + "step": 40940 + }, + { + "epoch": 1.6195693013506298, + "grad_norm": 1.183255862240372, + "learning_rate": 5.1769094795347765e-06, + "loss": 0.8028, + "step": 40950 + }, + { + "epoch": 1.6199648005695189, + "grad_norm": 1.3829413149807757, + "learning_rate": 5.174609907963632e-06, + "loss": 0.7969, + "step": 40960 + }, + { + "epoch": 1.620360299788408, + "grad_norm": 1.3219613632842353, + "learning_rate": 5.1723102994130994e-06, + "loss": 0.8132, + "step": 40970 + }, + { + "epoch": 1.620755799007297, + "grad_norm": 1.3746912111761755, + "learning_rate": 5.170010654370197e-06, + "loss": 0.8046, + "step": 40980 + }, + { + "epoch": 1.621151298226186, + "grad_norm": 1.5284944514785503, + "learning_rate": 5.167710973321951e-06, + "loss": 0.771, + "step": 40990 + }, + { + "epoch": 1.621546797445075, + "grad_norm": 1.1941270662955368, + "learning_rate": 5.165411256755394e-06, + "loss": 0.8068, + "step": 41000 + }, + { + "epoch": 1.621942296663964, + "grad_norm": 1.5626063117347866, + "learning_rate": 5.163111505157568e-06, + "loss": 0.7796, + "step": 41010 + }, + { + "epoch": 1.622337795882853, + "grad_norm": 1.3242677983623634, + "learning_rate": 5.160811719015517e-06, + "loss": 0.819, + "step": 41020 + }, + { + "epoch": 1.6227332951017421, + "grad_norm": 1.4458638941451125, + "learning_rate": 5.1585118988163005e-06, + "loss": 0.7966, + "step": 41030 + }, + { + "epoch": 1.6231287943206312, + "grad_norm": 1.3637171913114798, + "learning_rate": 5.15621204504698e-06, + "loss": 0.8185, + "step": 41040 + }, + { + "epoch": 1.6235242935395202, + "grad_norm": 1.2603522704197314, + "learning_rate": 5.153912158194623e-06, + "loss": 0.8106, + "step": 41050 + }, + { + "epoch": 1.6239197927584093, + "grad_norm": 1.1415050399577653, + "learning_rate": 5.1516122387463085e-06, + "loss": 0.7883, + "step": 41060 + }, + { + "epoch": 1.6243152919772983, + "grad_norm": 1.4815100543142539, + "learning_rate": 5.149312287189121e-06, + "loss": 0.7984, + "step": 41070 + }, + { + "epoch": 1.6247107911961876, + "grad_norm": 1.151794784175854, + "learning_rate": 5.147012304010147e-06, + "loss": 0.7956, + "step": 41080 + }, + { + "epoch": 1.6251062904150766, + "grad_norm": 1.2311399933124467, + "learning_rate": 5.1447122896964865e-06, + "loss": 0.7991, + "step": 41090 + }, + { + "epoch": 1.6255017896339656, + "grad_norm": 1.4993124530336446, + "learning_rate": 5.1424122447352424e-06, + "loss": 0.8159, + "step": 41100 + }, + { + "epoch": 1.6258972888528547, + "grad_norm": 1.2230048244228209, + "learning_rate": 5.1401121696135235e-06, + "loss": 0.7885, + "step": 41110 + }, + { + "epoch": 1.6262927880717437, + "grad_norm": 1.4699198884997282, + "learning_rate": 5.137812064818448e-06, + "loss": 0.7789, + "step": 41120 + }, + { + "epoch": 1.6266882872906328, + "grad_norm": 1.4290286243301284, + "learning_rate": 5.13551193083714e-06, + "loss": 0.8024, + "step": 41130 + }, + { + "epoch": 1.6270837865095218, + "grad_norm": 1.3227082632051894, + "learning_rate": 5.133211768156725e-06, + "loss": 0.8026, + "step": 41140 + }, + { + "epoch": 1.6274792857284108, + "grad_norm": 1.5142721017223066, + "learning_rate": 5.130911577264339e-06, + "loss": 0.8151, + "step": 41150 + }, + { + "epoch": 1.6278747849472999, + "grad_norm": 1.518370424170619, + "learning_rate": 5.128611358647125e-06, + "loss": 0.8026, + "step": 41160 + }, + { + "epoch": 1.628270284166189, + "grad_norm": 1.2265411947058955, + "learning_rate": 5.126311112792229e-06, + "loss": 0.8043, + "step": 41170 + }, + { + "epoch": 1.628665783385078, + "grad_norm": 1.2756620215500682, + "learning_rate": 5.124010840186803e-06, + "loss": 0.794, + "step": 41180 + }, + { + "epoch": 1.629061282603967, + "grad_norm": 1.6144680072120252, + "learning_rate": 5.121710541318005e-06, + "loss": 0.813, + "step": 41190 + }, + { + "epoch": 1.629456781822856, + "grad_norm": 1.1864032667168556, + "learning_rate": 5.119410216673e-06, + "loss": 0.8348, + "step": 41200 + }, + { + "epoch": 1.629852281041745, + "grad_norm": 1.2891299525580995, + "learning_rate": 5.117109866738956e-06, + "loss": 0.8015, + "step": 41210 + }, + { + "epoch": 1.630247780260634, + "grad_norm": 1.4370258261290074, + "learning_rate": 5.11480949200305e-06, + "loss": 0.7943, + "step": 41220 + }, + { + "epoch": 1.6306432794795231, + "grad_norm": 1.2958430551134368, + "learning_rate": 5.112509092952459e-06, + "loss": 0.7808, + "step": 41230 + }, + { + "epoch": 1.6310387786984122, + "grad_norm": 1.4127537081316506, + "learning_rate": 5.1102086700743705e-06, + "loss": 0.8101, + "step": 41240 + }, + { + "epoch": 1.6314342779173012, + "grad_norm": 1.2759522436620703, + "learning_rate": 5.107908223855974e-06, + "loss": 0.8085, + "step": 41250 + }, + { + "epoch": 1.6318297771361903, + "grad_norm": 1.335467702209931, + "learning_rate": 5.105607754784464e-06, + "loss": 0.7899, + "step": 41260 + }, + { + "epoch": 1.6322252763550793, + "grad_norm": 1.4066176742411196, + "learning_rate": 5.103307263347042e-06, + "loss": 0.7988, + "step": 41270 + }, + { + "epoch": 1.6326207755739683, + "grad_norm": 1.7410660983956376, + "learning_rate": 5.101006750030909e-06, + "loss": 0.8114, + "step": 41280 + }, + { + "epoch": 1.6330162747928574, + "grad_norm": 1.1572830853662208, + "learning_rate": 5.098706215323278e-06, + "loss": 0.7978, + "step": 41290 + }, + { + "epoch": 1.6334117740117464, + "grad_norm": 2.038659243784275, + "learning_rate": 5.096405659711362e-06, + "loss": 0.7752, + "step": 41300 + }, + { + "epoch": 1.6338072732306355, + "grad_norm": 1.1956590106167422, + "learning_rate": 5.09410508368238e-06, + "loss": 0.8269, + "step": 41310 + }, + { + "epoch": 1.6342027724495245, + "grad_norm": 1.4121952198194214, + "learning_rate": 5.091804487723552e-06, + "loss": 0.8079, + "step": 41320 + }, + { + "epoch": 1.6345982716684135, + "grad_norm": 1.484749633932209, + "learning_rate": 5.089503872322106e-06, + "loss": 0.8186, + "step": 41330 + }, + { + "epoch": 1.6349937708873026, + "grad_norm": 1.2017763707371538, + "learning_rate": 5.087203237965274e-06, + "loss": 0.7991, + "step": 41340 + }, + { + "epoch": 1.6353892701061916, + "grad_norm": 1.3291992319748185, + "learning_rate": 5.08490258514029e-06, + "loss": 0.8186, + "step": 41350 + }, + { + "epoch": 1.6357847693250807, + "grad_norm": 1.3216493155035252, + "learning_rate": 5.082601914334392e-06, + "loss": 0.7812, + "step": 41360 + }, + { + "epoch": 1.6361802685439697, + "grad_norm": 1.2400719889180203, + "learning_rate": 5.080301226034826e-06, + "loss": 0.7867, + "step": 41370 + }, + { + "epoch": 1.6365757677628587, + "grad_norm": 1.5642923134596862, + "learning_rate": 5.078000520728835e-06, + "loss": 0.8107, + "step": 41380 + }, + { + "epoch": 1.6369712669817478, + "grad_norm": 1.3374933689334152, + "learning_rate": 5.07569979890367e-06, + "loss": 0.8052, + "step": 41390 + }, + { + "epoch": 1.6373667662006368, + "grad_norm": 1.1892639563490934, + "learning_rate": 5.073399061046584e-06, + "loss": 0.792, + "step": 41400 + }, + { + "epoch": 1.6377622654195259, + "grad_norm": 1.6774346530220643, + "learning_rate": 5.071098307644835e-06, + "loss": 0.7915, + "step": 41410 + }, + { + "epoch": 1.638157764638415, + "grad_norm": 1.3469751914426114, + "learning_rate": 5.0687975391856825e-06, + "loss": 0.7943, + "step": 41420 + }, + { + "epoch": 1.638553263857304, + "grad_norm": 1.3360890070486067, + "learning_rate": 5.06649675615639e-06, + "loss": 0.8082, + "step": 41430 + }, + { + "epoch": 1.638948763076193, + "grad_norm": 1.4637269478159602, + "learning_rate": 5.064195959044224e-06, + "loss": 0.8404, + "step": 41440 + }, + { + "epoch": 1.639344262295082, + "grad_norm": 1.5566142242572265, + "learning_rate": 5.061895148336452e-06, + "loss": 0.8174, + "step": 41450 + }, + { + "epoch": 1.639739761513971, + "grad_norm": 1.4233634252833747, + "learning_rate": 5.059594324520348e-06, + "loss": 0.8062, + "step": 41460 + }, + { + "epoch": 1.64013526073286, + "grad_norm": 1.5718870081810636, + "learning_rate": 5.057293488083188e-06, + "loss": 0.8108, + "step": 41470 + }, + { + "epoch": 1.6405307599517491, + "grad_norm": 1.3328920880261494, + "learning_rate": 5.05499263951225e-06, + "loss": 0.7941, + "step": 41480 + }, + { + "epoch": 1.6409262591706382, + "grad_norm": 1.5310802655594322, + "learning_rate": 5.0526917792948105e-06, + "loss": 0.8004, + "step": 41490 + }, + { + "epoch": 1.6413217583895272, + "grad_norm": 1.3720082816217016, + "learning_rate": 5.0503909079181576e-06, + "loss": 0.8185, + "step": 41500 + }, + { + "epoch": 1.6417172576084162, + "grad_norm": 1.3001261214761737, + "learning_rate": 5.048090025869572e-06, + "loss": 0.7769, + "step": 41510 + }, + { + "epoch": 1.6421127568273053, + "grad_norm": 1.459594603957872, + "learning_rate": 5.045789133636343e-06, + "loss": 0.8207, + "step": 41520 + }, + { + "epoch": 1.6425082560461943, + "grad_norm": 1.528547742547802, + "learning_rate": 5.04348823170576e-06, + "loss": 0.8178, + "step": 41530 + }, + { + "epoch": 1.6429037552650834, + "grad_norm": 1.6085092021760932, + "learning_rate": 5.041187320565115e-06, + "loss": 0.8093, + "step": 41540 + }, + { + "epoch": 1.6432992544839724, + "grad_norm": 1.5470087310200578, + "learning_rate": 5.0388864007017e-06, + "loss": 0.7922, + "step": 41550 + }, + { + "epoch": 1.6436947537028614, + "grad_norm": 1.6178933920682441, + "learning_rate": 5.036585472602814e-06, + "loss": 0.7933, + "step": 41560 + }, + { + "epoch": 1.6440902529217505, + "grad_norm": 1.2731993922742746, + "learning_rate": 5.03428453675575e-06, + "loss": 0.8038, + "step": 41570 + }, + { + "epoch": 1.6444857521406395, + "grad_norm": 1.274779109720121, + "learning_rate": 5.031983593647808e-06, + "loss": 0.8127, + "step": 41580 + }, + { + "epoch": 1.6448812513595286, + "grad_norm": 1.4217855981866225, + "learning_rate": 5.02968264376629e-06, + "loss": 0.8063, + "step": 41590 + }, + { + "epoch": 1.6452767505784176, + "grad_norm": 1.1475518373650064, + "learning_rate": 5.0273816875984945e-06, + "loss": 0.8292, + "step": 41600 + }, + { + "epoch": 1.6456722497973066, + "grad_norm": 1.4813545809653703, + "learning_rate": 5.0250807256317305e-06, + "loss": 0.8053, + "step": 41610 + }, + { + "epoch": 1.6460677490161957, + "grad_norm": 1.5924575905158627, + "learning_rate": 5.022779758353296e-06, + "loss": 0.787, + "step": 41620 + }, + { + "epoch": 1.6464632482350847, + "grad_norm": 1.6631214353540575, + "learning_rate": 5.020478786250498e-06, + "loss": 0.7841, + "step": 41630 + }, + { + "epoch": 1.6468587474539738, + "grad_norm": 1.361685365546339, + "learning_rate": 5.018177809810646e-06, + "loss": 0.7766, + "step": 41640 + }, + { + "epoch": 1.6472542466728628, + "grad_norm": 1.51754234697285, + "learning_rate": 5.015876829521045e-06, + "loss": 0.792, + "step": 41650 + }, + { + "epoch": 1.6476497458917518, + "grad_norm": 1.2264930998008845, + "learning_rate": 5.013575845869002e-06, + "loss": 0.8055, + "step": 41660 + }, + { + "epoch": 1.6480452451106409, + "grad_norm": 1.3978468973402076, + "learning_rate": 5.011274859341828e-06, + "loss": 0.7912, + "step": 41670 + }, + { + "epoch": 1.64844074432953, + "grad_norm": 1.307692278637964, + "learning_rate": 5.0089738704268336e-06, + "loss": 0.7927, + "step": 41680 + }, + { + "epoch": 1.648836243548419, + "grad_norm": 1.3138834727499307, + "learning_rate": 5.006672879611325e-06, + "loss": 0.8048, + "step": 41690 + }, + { + "epoch": 1.649231742767308, + "grad_norm": 1.4437893592989504, + "learning_rate": 5.004371887382615e-06, + "loss": 0.7721, + "step": 41700 + }, + { + "epoch": 1.649627241986197, + "grad_norm": 1.3997530085345107, + "learning_rate": 5.002070894228015e-06, + "loss": 0.8021, + "step": 41710 + }, + { + "epoch": 1.650022741205086, + "grad_norm": 1.4303570839058444, + "learning_rate": 4.999769900634835e-06, + "loss": 0.8109, + "step": 41720 + }, + { + "epoch": 1.6504182404239751, + "grad_norm": 1.3448357092680352, + "learning_rate": 4.997468907090385e-06, + "loss": 0.7735, + "step": 41730 + }, + { + "epoch": 1.6508137396428642, + "grad_norm": 1.3548187282963036, + "learning_rate": 4.995167914081979e-06, + "loss": 0.8075, + "step": 41740 + }, + { + "epoch": 1.6512092388617532, + "grad_norm": 1.4015841055562521, + "learning_rate": 4.9928669220969245e-06, + "loss": 0.7894, + "step": 41750 + }, + { + "epoch": 1.6516047380806422, + "grad_norm": 1.4101541375930886, + "learning_rate": 4.990565931622534e-06, + "loss": 0.8233, + "step": 41760 + }, + { + "epoch": 1.6520002372995313, + "grad_norm": 1.4053939714558688, + "learning_rate": 4.98826494314612e-06, + "loss": 0.8151, + "step": 41770 + }, + { + "epoch": 1.6523957365184203, + "grad_norm": 1.5853674425593267, + "learning_rate": 4.985963957154988e-06, + "loss": 0.8015, + "step": 41780 + }, + { + "epoch": 1.6527912357373093, + "grad_norm": 1.279673576255129, + "learning_rate": 4.9836629741364485e-06, + "loss": 0.7916, + "step": 41790 + }, + { + "epoch": 1.6531867349561984, + "grad_norm": 1.6211416265648426, + "learning_rate": 4.981361994577812e-06, + "loss": 0.7682, + "step": 41800 + }, + { + "epoch": 1.6535822341750874, + "grad_norm": 1.2474906969955124, + "learning_rate": 4.979061018966385e-06, + "loss": 0.7821, + "step": 41810 + }, + { + "epoch": 1.6539777333939765, + "grad_norm": 1.4355222449291372, + "learning_rate": 4.976760047789476e-06, + "loss": 0.7989, + "step": 41820 + }, + { + "epoch": 1.6543732326128655, + "grad_norm": 1.335955536813259, + "learning_rate": 4.974459081534391e-06, + "loss": 0.806, + "step": 41830 + }, + { + "epoch": 1.6547687318317545, + "grad_norm": 1.2633356330201375, + "learning_rate": 4.972158120688435e-06, + "loss": 0.8147, + "step": 41840 + }, + { + "epoch": 1.6551642310506436, + "grad_norm": 1.3893167873568655, + "learning_rate": 4.9698571657389126e-06, + "loss": 0.8093, + "step": 41850 + }, + { + "epoch": 1.6555597302695326, + "grad_norm": 1.1974974826775149, + "learning_rate": 4.9675562171731255e-06, + "loss": 0.8065, + "step": 41860 + }, + { + "epoch": 1.6559552294884217, + "grad_norm": 1.459057806211302, + "learning_rate": 4.965255275478375e-06, + "loss": 0.7989, + "step": 41870 + }, + { + "epoch": 1.6563507287073107, + "grad_norm": 1.3100690740574463, + "learning_rate": 4.962954341141962e-06, + "loss": 0.7968, + "step": 41880 + }, + { + "epoch": 1.6567462279261997, + "grad_norm": 1.2798098844745645, + "learning_rate": 4.960653414651185e-06, + "loss": 0.8228, + "step": 41890 + }, + { + "epoch": 1.6571417271450888, + "grad_norm": 1.284988533619987, + "learning_rate": 4.958352496493344e-06, + "loss": 0.8234, + "step": 41900 + }, + { + "epoch": 1.6575372263639778, + "grad_norm": 1.6586809908788955, + "learning_rate": 4.9560515871557275e-06, + "loss": 0.8053, + "step": 41910 + }, + { + "epoch": 1.6579327255828669, + "grad_norm": 1.3544940087804016, + "learning_rate": 4.953750687125632e-06, + "loss": 0.8153, + "step": 41920 + }, + { + "epoch": 1.658328224801756, + "grad_norm": 1.3540182816989457, + "learning_rate": 4.951449796890349e-06, + "loss": 0.8046, + "step": 41930 + }, + { + "epoch": 1.658723724020645, + "grad_norm": 1.2846223969690125, + "learning_rate": 4.949148916937166e-06, + "loss": 0.7978, + "step": 41940 + }, + { + "epoch": 1.659119223239534, + "grad_norm": 1.1978172717837627, + "learning_rate": 4.946848047753372e-06, + "loss": 0.7793, + "step": 41950 + }, + { + "epoch": 1.659514722458423, + "grad_norm": 1.6702083592952615, + "learning_rate": 4.944547189826252e-06, + "loss": 0.791, + "step": 41960 + }, + { + "epoch": 1.659910221677312, + "grad_norm": 1.3200634483638447, + "learning_rate": 4.942246343643086e-06, + "loss": 0.7974, + "step": 41970 + }, + { + "epoch": 1.660305720896201, + "grad_norm": 1.3251496709118895, + "learning_rate": 4.939945509691155e-06, + "loss": 0.7787, + "step": 41980 + }, + { + "epoch": 1.6607012201150901, + "grad_norm": 1.4667522010162886, + "learning_rate": 4.937644688457735e-06, + "loss": 0.8195, + "step": 41990 + }, + { + "epoch": 1.6610967193339792, + "grad_norm": 1.572741584792104, + "learning_rate": 4.935343880430104e-06, + "loss": 0.788, + "step": 42000 + }, + { + "epoch": 1.6614922185528682, + "grad_norm": 1.5311403619254402, + "learning_rate": 4.93304308609553e-06, + "loss": 0.7796, + "step": 42010 + }, + { + "epoch": 1.6618877177717573, + "grad_norm": 1.1239757199185472, + "learning_rate": 4.930742305941286e-06, + "loss": 0.8199, + "step": 42020 + }, + { + "epoch": 1.6622832169906463, + "grad_norm": 1.4821709652376787, + "learning_rate": 4.928441540454633e-06, + "loss": 0.8008, + "step": 42030 + }, + { + "epoch": 1.6626787162095353, + "grad_norm": 1.3214292561972814, + "learning_rate": 4.926140790122835e-06, + "loss": 0.7966, + "step": 42040 + }, + { + "epoch": 1.6630742154284244, + "grad_norm": 1.3409218031743169, + "learning_rate": 4.923840055433153e-06, + "loss": 0.7605, + "step": 42050 + }, + { + "epoch": 1.6634697146473134, + "grad_norm": 1.470378145593192, + "learning_rate": 4.921539336872843e-06, + "loss": 0.8095, + "step": 42060 + }, + { + "epoch": 1.6638652138662025, + "grad_norm": 1.211922381443777, + "learning_rate": 4.919238634929156e-06, + "loss": 0.7845, + "step": 42070 + }, + { + "epoch": 1.6642607130850915, + "grad_norm": 1.4309073000503374, + "learning_rate": 4.9169379500893435e-06, + "loss": 0.8018, + "step": 42080 + }, + { + "epoch": 1.6646562123039808, + "grad_norm": 1.2459282904481337, + "learning_rate": 4.91463728284065e-06, + "loss": 0.8244, + "step": 42090 + }, + { + "epoch": 1.6650517115228698, + "grad_norm": 1.300560462748635, + "learning_rate": 4.912336633670317e-06, + "loss": 0.8191, + "step": 42100 + }, + { + "epoch": 1.6654472107417588, + "grad_norm": 1.5664068497204677, + "learning_rate": 4.910036003065584e-06, + "loss": 0.7726, + "step": 42110 + }, + { + "epoch": 1.6658427099606479, + "grad_norm": 1.4343503619714235, + "learning_rate": 4.907735391513683e-06, + "loss": 0.8152, + "step": 42120 + }, + { + "epoch": 1.666238209179537, + "grad_norm": 1.3538463598837878, + "learning_rate": 4.905434799501846e-06, + "loss": 0.8081, + "step": 42130 + }, + { + "epoch": 1.666633708398426, + "grad_norm": 1.4546245671485367, + "learning_rate": 4.9031342275173e-06, + "loss": 0.7889, + "step": 42140 + }, + { + "epoch": 1.667029207617315, + "grad_norm": 1.460979398214131, + "learning_rate": 4.900833676047264e-06, + "loss": 0.816, + "step": 42150 + }, + { + "epoch": 1.667424706836204, + "grad_norm": 1.4005379869488497, + "learning_rate": 4.8985331455789555e-06, + "loss": 0.8093, + "step": 42160 + }, + { + "epoch": 1.667820206055093, + "grad_norm": 1.461123299834793, + "learning_rate": 4.896232636599589e-06, + "loss": 0.8049, + "step": 42170 + }, + { + "epoch": 1.668215705273982, + "grad_norm": 1.498751814254091, + "learning_rate": 4.8939321495963725e-06, + "loss": 0.8109, + "step": 42180 + }, + { + "epoch": 1.6686112044928711, + "grad_norm": 1.3214552818503533, + "learning_rate": 4.8916316850565085e-06, + "loss": 0.7971, + "step": 42190 + }, + { + "epoch": 1.6690067037117602, + "grad_norm": 1.2904600340977768, + "learning_rate": 4.889331243467198e-06, + "loss": 0.8199, + "step": 42200 + }, + { + "epoch": 1.6694022029306492, + "grad_norm": 1.8066773438161514, + "learning_rate": 4.887030825315634e-06, + "loss": 0.793, + "step": 42210 + }, + { + "epoch": 1.6697977021495383, + "grad_norm": 1.4727567463500029, + "learning_rate": 4.884730431089005e-06, + "loss": 0.8071, + "step": 42220 + }, + { + "epoch": 1.6701932013684273, + "grad_norm": 1.328195378699885, + "learning_rate": 4.882430061274497e-06, + "loss": 0.7843, + "step": 42230 + }, + { + "epoch": 1.6705887005873163, + "grad_norm": 1.418817685555987, + "learning_rate": 4.880129716359287e-06, + "loss": 0.7891, + "step": 42240 + }, + { + "epoch": 1.6709841998062054, + "grad_norm": 1.3605698534039914, + "learning_rate": 4.87782939683055e-06, + "loss": 0.8008, + "step": 42250 + }, + { + "epoch": 1.6713796990250944, + "grad_norm": 1.7000954090105216, + "learning_rate": 4.875529103175456e-06, + "loss": 0.7688, + "step": 42260 + }, + { + "epoch": 1.6717751982439835, + "grad_norm": 1.4566491742786827, + "learning_rate": 4.873228835881162e-06, + "loss": 0.7561, + "step": 42270 + }, + { + "epoch": 1.6721706974628725, + "grad_norm": 1.3160028803144155, + "learning_rate": 4.87092859543483e-06, + "loss": 0.7863, + "step": 42280 + }, + { + "epoch": 1.6725661966817615, + "grad_norm": 1.2901877704331015, + "learning_rate": 4.86862838232361e-06, + "loss": 0.797, + "step": 42290 + }, + { + "epoch": 1.6729616959006506, + "grad_norm": 1.2041622522828366, + "learning_rate": 4.866328197034648e-06, + "loss": 0.8062, + "step": 42300 + }, + { + "epoch": 1.6733571951195396, + "grad_norm": 1.467281123581637, + "learning_rate": 4.864028040055083e-06, + "loss": 0.7804, + "step": 42310 + }, + { + "epoch": 1.6737526943384287, + "grad_norm": 1.5623391677438745, + "learning_rate": 4.8617279118720514e-06, + "loss": 0.7746, + "step": 42320 + }, + { + "epoch": 1.6741481935573177, + "grad_norm": 1.2405064822533243, + "learning_rate": 4.859427812972678e-06, + "loss": 0.8185, + "step": 42330 + }, + { + "epoch": 1.6745436927762067, + "grad_norm": 1.427040683576127, + "learning_rate": 4.857127743844085e-06, + "loss": 0.7971, + "step": 42340 + }, + { + "epoch": 1.6749391919950958, + "grad_norm": 1.3788531581757955, + "learning_rate": 4.8548277049733885e-06, + "loss": 0.7875, + "step": 42350 + }, + { + "epoch": 1.6753346912139848, + "grad_norm": 1.3577203505759141, + "learning_rate": 4.852527696847697e-06, + "loss": 0.7904, + "step": 42360 + }, + { + "epoch": 1.6757301904328739, + "grad_norm": 1.3937128443814712, + "learning_rate": 4.850227719954113e-06, + "loss": 0.8197, + "step": 42370 + }, + { + "epoch": 1.676125689651763, + "grad_norm": 1.501564988224279, + "learning_rate": 4.847927774779732e-06, + "loss": 0.786, + "step": 42380 + }, + { + "epoch": 1.676521188870652, + "grad_norm": 1.4957662122876973, + "learning_rate": 4.845627861811645e-06, + "loss": 0.7608, + "step": 42390 + }, + { + "epoch": 1.676916688089541, + "grad_norm": 1.4240772072539798, + "learning_rate": 4.8433279815369296e-06, + "loss": 0.7822, + "step": 42400 + }, + { + "epoch": 1.67731218730843, + "grad_norm": 1.2154685029738814, + "learning_rate": 4.841028134442664e-06, + "loss": 0.7841, + "step": 42410 + }, + { + "epoch": 1.677707686527319, + "grad_norm": 1.3227397609642766, + "learning_rate": 4.838728321015916e-06, + "loss": 0.7734, + "step": 42420 + }, + { + "epoch": 1.6781031857462083, + "grad_norm": 1.266335843013026, + "learning_rate": 4.836428541743746e-06, + "loss": 0.7721, + "step": 42430 + }, + { + "epoch": 1.6784986849650974, + "grad_norm": 1.443548428620844, + "learning_rate": 4.834128797113209e-06, + "loss": 0.7606, + "step": 42440 + }, + { + "epoch": 1.6788941841839864, + "grad_norm": 1.5556808427805768, + "learning_rate": 4.831829087611351e-06, + "loss": 0.7987, + "step": 42450 + }, + { + "epoch": 1.6792896834028754, + "grad_norm": 1.2562177980775426, + "learning_rate": 4.829529413725211e-06, + "loss": 0.7999, + "step": 42460 + }, + { + "epoch": 1.6796851826217645, + "grad_norm": 1.437104020335207, + "learning_rate": 4.827229775941821e-06, + "loss": 0.7979, + "step": 42470 + }, + { + "epoch": 1.6800806818406535, + "grad_norm": 1.4806773656333854, + "learning_rate": 4.824930174748205e-06, + "loss": 0.797, + "step": 42480 + }, + { + "epoch": 1.6804761810595426, + "grad_norm": 1.362515207104312, + "learning_rate": 4.822630610631378e-06, + "loss": 0.7733, + "step": 42490 + }, + { + "epoch": 1.6808716802784316, + "grad_norm": 1.405912697625109, + "learning_rate": 4.82033108407835e-06, + "loss": 0.7945, + "step": 42500 + }, + { + "epoch": 1.6812671794973206, + "grad_norm": 1.2248690592419256, + "learning_rate": 4.8180315955761194e-06, + "loss": 0.8054, + "step": 42510 + }, + { + "epoch": 1.6816626787162097, + "grad_norm": 1.3441265232365305, + "learning_rate": 4.815732145611679e-06, + "loss": 0.8001, + "step": 42520 + }, + { + "epoch": 1.6820581779350987, + "grad_norm": 1.4068201423342177, + "learning_rate": 4.813432734672014e-06, + "loss": 0.8026, + "step": 42530 + }, + { + "epoch": 1.6824536771539877, + "grad_norm": 1.6335093188431398, + "learning_rate": 4.811133363244098e-06, + "loss": 0.7829, + "step": 42540 + }, + { + "epoch": 1.6828491763728768, + "grad_norm": 1.6361973567873165, + "learning_rate": 4.808834031814903e-06, + "loss": 0.7963, + "step": 42550 + }, + { + "epoch": 1.6832446755917658, + "grad_norm": 1.6063928983817761, + "learning_rate": 4.8065347408713825e-06, + "loss": 0.8293, + "step": 42560 + }, + { + "epoch": 1.6836401748106549, + "grad_norm": 1.189586260288003, + "learning_rate": 4.80423549090049e-06, + "loss": 0.8113, + "step": 42570 + }, + { + "epoch": 1.684035674029544, + "grad_norm": 1.4326924850284528, + "learning_rate": 4.8019362823891666e-06, + "loss": 0.803, + "step": 42580 + }, + { + "epoch": 1.684431173248433, + "grad_norm": 1.3667483940799752, + "learning_rate": 4.799637115824345e-06, + "loss": 0.798, + "step": 42590 + }, + { + "epoch": 1.684826672467322, + "grad_norm": 1.5016465200063276, + "learning_rate": 4.797337991692949e-06, + "loss": 0.7738, + "step": 42600 + }, + { + "epoch": 1.685222171686211, + "grad_norm": 1.6018033297555405, + "learning_rate": 4.795038910481895e-06, + "loss": 0.8036, + "step": 42610 + }, + { + "epoch": 1.6856176709051, + "grad_norm": 1.471060850050138, + "learning_rate": 4.7927398726780885e-06, + "loss": 0.8051, + "step": 42620 + }, + { + "epoch": 1.686013170123989, + "grad_norm": 1.5916982701656708, + "learning_rate": 4.7904408787684285e-06, + "loss": 0.7977, + "step": 42630 + }, + { + "epoch": 1.6864086693428781, + "grad_norm": 1.3876508928192102, + "learning_rate": 4.788141929239798e-06, + "loss": 0.8016, + "step": 42640 + }, + { + "epoch": 1.6868041685617672, + "grad_norm": 1.6229604411871494, + "learning_rate": 4.785843024579077e-06, + "loss": 0.7928, + "step": 42650 + }, + { + "epoch": 1.6871996677806562, + "grad_norm": 1.2848397455570024, + "learning_rate": 4.783544165273134e-06, + "loss": 0.7872, + "step": 42660 + }, + { + "epoch": 1.6875951669995453, + "grad_norm": 1.8445795440823296, + "learning_rate": 4.781245351808829e-06, + "loss": 0.7803, + "step": 42670 + }, + { + "epoch": 1.6879906662184343, + "grad_norm": 1.5040229146897743, + "learning_rate": 4.7789465846730106e-06, + "loss": 0.7975, + "step": 42680 + }, + { + "epoch": 1.6883861654373233, + "grad_norm": 1.2624642923813387, + "learning_rate": 4.776647864352518e-06, + "loss": 0.8116, + "step": 42690 + }, + { + "epoch": 1.6887816646562124, + "grad_norm": 1.4422981083863158, + "learning_rate": 4.774349191334182e-06, + "loss": 0.8156, + "step": 42700 + }, + { + "epoch": 1.6891771638751014, + "grad_norm": 1.2807413259462608, + "learning_rate": 4.772050566104821e-06, + "loss": 0.8165, + "step": 42710 + }, + { + "epoch": 1.6895726630939905, + "grad_norm": 1.4337090275460487, + "learning_rate": 4.769751989151244e-06, + "loss": 0.7953, + "step": 42720 + }, + { + "epoch": 1.6899681623128795, + "grad_norm": 1.4730448060629187, + "learning_rate": 4.767453460960253e-06, + "loss": 0.7904, + "step": 42730 + }, + { + "epoch": 1.6903636615317685, + "grad_norm": 1.3122743484906407, + "learning_rate": 4.765154982018634e-06, + "loss": 0.7579, + "step": 42740 + }, + { + "epoch": 1.6907591607506576, + "grad_norm": 1.464506022989358, + "learning_rate": 4.762856552813167e-06, + "loss": 0.7927, + "step": 42750 + }, + { + "epoch": 1.6911546599695466, + "grad_norm": 1.2175133867313277, + "learning_rate": 4.7605581738306196e-06, + "loss": 0.7908, + "step": 42760 + }, + { + "epoch": 1.6915501591884357, + "grad_norm": 1.304249314924589, + "learning_rate": 4.758259845557748e-06, + "loss": 0.7788, + "step": 42770 + }, + { + "epoch": 1.6919456584073247, + "grad_norm": 1.5389999111391783, + "learning_rate": 4.755961568481299e-06, + "loss": 0.7893, + "step": 42780 + }, + { + "epoch": 1.6923411576262137, + "grad_norm": 1.283603829244678, + "learning_rate": 4.7536633430880106e-06, + "loss": 0.8001, + "step": 42790 + }, + { + "epoch": 1.6927366568451028, + "grad_norm": 1.533734508752668, + "learning_rate": 4.751365169864604e-06, + "loss": 0.7931, + "step": 42800 + }, + { + "epoch": 1.6931321560639918, + "grad_norm": 1.3646039861787496, + "learning_rate": 4.749067049297795e-06, + "loss": 0.7672, + "step": 42810 + }, + { + "epoch": 1.6935276552828809, + "grad_norm": 1.4328315251127925, + "learning_rate": 4.746768981874286e-06, + "loss": 0.7696, + "step": 42820 + }, + { + "epoch": 1.69392315450177, + "grad_norm": 1.3241302899423866, + "learning_rate": 4.744470968080769e-06, + "loss": 0.7965, + "step": 42830 + }, + { + "epoch": 1.694318653720659, + "grad_norm": 1.2411850828009556, + "learning_rate": 4.7421730084039225e-06, + "loss": 0.8008, + "step": 42840 + }, + { + "epoch": 1.694714152939548, + "grad_norm": 1.1850419218779245, + "learning_rate": 4.739875103330416e-06, + "loss": 0.7838, + "step": 42850 + }, + { + "epoch": 1.695109652158437, + "grad_norm": 1.477033790672088, + "learning_rate": 4.7375772533469055e-06, + "loss": 0.8107, + "step": 42860 + }, + { + "epoch": 1.695505151377326, + "grad_norm": 1.2723717474790426, + "learning_rate": 4.735279458940037e-06, + "loss": 0.7758, + "step": 42870 + }, + { + "epoch": 1.695900650596215, + "grad_norm": 1.5846393257758138, + "learning_rate": 4.732981720596447e-06, + "loss": 0.7926, + "step": 42880 + }, + { + "epoch": 1.6962961498151041, + "grad_norm": 1.2630931814888893, + "learning_rate": 4.73068403880275e-06, + "loss": 0.78, + "step": 42890 + }, + { + "epoch": 1.6966916490339932, + "grad_norm": 1.5360216483570945, + "learning_rate": 4.728386414045561e-06, + "loss": 0.7923, + "step": 42900 + }, + { + "epoch": 1.6970871482528822, + "grad_norm": 1.4776253090034164, + "learning_rate": 4.726088846811476e-06, + "loss": 0.7844, + "step": 42910 + }, + { + "epoch": 1.6974826474717712, + "grad_norm": 1.1139686598064495, + "learning_rate": 4.72379133758708e-06, + "loss": 0.8108, + "step": 42920 + }, + { + "epoch": 1.6978781466906603, + "grad_norm": 1.3942163650179535, + "learning_rate": 4.721493886858947e-06, + "loss": 0.7758, + "step": 42930 + }, + { + "epoch": 1.6982736459095493, + "grad_norm": 1.233318492107555, + "learning_rate": 4.719196495113637e-06, + "loss": 0.7929, + "step": 42940 + }, + { + "epoch": 1.6986691451284384, + "grad_norm": 1.5815785738343369, + "learning_rate": 4.7168991628377e-06, + "loss": 0.7814, + "step": 42950 + }, + { + "epoch": 1.6990646443473274, + "grad_norm": 1.2812676881165932, + "learning_rate": 4.714601890517669e-06, + "loss": 0.7782, + "step": 42960 + }, + { + "epoch": 1.6994601435662164, + "grad_norm": 1.65062343339854, + "learning_rate": 4.712304678640069e-06, + "loss": 0.8127, + "step": 42970 + }, + { + "epoch": 1.6998556427851055, + "grad_norm": 1.2545488390976887, + "learning_rate": 4.710007527691409e-06, + "loss": 0.8155, + "step": 42980 + }, + { + "epoch": 1.7002511420039945, + "grad_norm": 1.331153033309248, + "learning_rate": 4.707710438158185e-06, + "loss": 0.791, + "step": 42990 + }, + { + "epoch": 1.7006466412228836, + "grad_norm": 1.45002074119932, + "learning_rate": 4.705413410526885e-06, + "loss": 0.8064, + "step": 43000 + }, + { + "epoch": 1.7010421404417726, + "grad_norm": 1.4229274728152188, + "learning_rate": 4.7031164452839764e-06, + "loss": 0.7802, + "step": 43010 + }, + { + "epoch": 1.7014376396606616, + "grad_norm": 1.5195023053378005, + "learning_rate": 4.700819542915919e-06, + "loss": 0.7878, + "step": 43020 + }, + { + "epoch": 1.7018331388795507, + "grad_norm": 1.1286717055297824, + "learning_rate": 4.698522703909156e-06, + "loss": 0.7924, + "step": 43030 + }, + { + "epoch": 1.7022286380984397, + "grad_norm": 1.3208356068146982, + "learning_rate": 4.69622592875012e-06, + "loss": 0.7788, + "step": 43040 + }, + { + "epoch": 1.7026241373173288, + "grad_norm": 1.4236047452016296, + "learning_rate": 4.6939292179252264e-06, + "loss": 0.7834, + "step": 43050 + }, + { + "epoch": 1.7030196365362178, + "grad_norm": 1.4213020799472975, + "learning_rate": 4.691632571920882e-06, + "loss": 0.7594, + "step": 43060 + }, + { + "epoch": 1.7034151357551068, + "grad_norm": 1.2990758698231542, + "learning_rate": 4.689335991223475e-06, + "loss": 0.7946, + "step": 43070 + }, + { + "epoch": 1.7038106349739959, + "grad_norm": 1.079892919526166, + "learning_rate": 4.687039476319384e-06, + "loss": 0.7934, + "step": 43080 + }, + { + "epoch": 1.704206134192885, + "grad_norm": 1.368650688183977, + "learning_rate": 4.68474302769497e-06, + "loss": 0.7947, + "step": 43090 + }, + { + "epoch": 1.704601633411774, + "grad_norm": 1.1723317519407415, + "learning_rate": 4.6824466458365805e-06, + "loss": 0.7928, + "step": 43100 + }, + { + "epoch": 1.704997132630663, + "grad_norm": 1.3812258058757154, + "learning_rate": 4.680150331230552e-06, + "loss": 0.7954, + "step": 43110 + }, + { + "epoch": 1.705392631849552, + "grad_norm": 1.461383247629719, + "learning_rate": 4.677854084363206e-06, + "loss": 0.7851, + "step": 43120 + }, + { + "epoch": 1.705788131068441, + "grad_norm": 1.3002946754519833, + "learning_rate": 4.675557905720842e-06, + "loss": 0.804, + "step": 43130 + }, + { + "epoch": 1.70618363028733, + "grad_norm": 1.6365299241906293, + "learning_rate": 4.673261795789757e-06, + "loss": 0.7856, + "step": 43140 + }, + { + "epoch": 1.7065791295062192, + "grad_norm": 1.2647620869391314, + "learning_rate": 4.6709657550562254e-06, + "loss": 0.7752, + "step": 43150 + }, + { + "epoch": 1.7069746287251082, + "grad_norm": 1.3916516294522465, + "learning_rate": 4.668669784006509e-06, + "loss": 0.7706, + "step": 43160 + }, + { + "epoch": 1.7073701279439972, + "grad_norm": 1.4425852227050018, + "learning_rate": 4.666373883126857e-06, + "loss": 0.7674, + "step": 43170 + }, + { + "epoch": 1.7077656271628863, + "grad_norm": 1.247225327649516, + "learning_rate": 4.664078052903501e-06, + "loss": 0.7791, + "step": 43180 + }, + { + "epoch": 1.7081611263817753, + "grad_norm": 1.5542956589994794, + "learning_rate": 4.661782293822657e-06, + "loss": 0.7868, + "step": 43190 + }, + { + "epoch": 1.7085566256006643, + "grad_norm": 1.262597111656027, + "learning_rate": 4.659486606370531e-06, + "loss": 0.8086, + "step": 43200 + }, + { + "epoch": 1.7089521248195534, + "grad_norm": 1.562777807705374, + "learning_rate": 4.657190991033306e-06, + "loss": 0.8009, + "step": 43210 + }, + { + "epoch": 1.7093476240384424, + "grad_norm": 1.4206171746031013, + "learning_rate": 4.654895448297157e-06, + "loss": 0.814, + "step": 43220 + }, + { + "epoch": 1.7097431232573315, + "grad_norm": 1.3357640746080472, + "learning_rate": 4.652599978648239e-06, + "loss": 0.7768, + "step": 43230 + }, + { + "epoch": 1.7101386224762205, + "grad_norm": 1.4507705043982013, + "learning_rate": 4.650304582572696e-06, + "loss": 0.7918, + "step": 43240 + }, + { + "epoch": 1.7105341216951095, + "grad_norm": 1.555565836525127, + "learning_rate": 4.648009260556648e-06, + "loss": 0.7431, + "step": 43250 + }, + { + "epoch": 1.7109296209139986, + "grad_norm": 1.4793011784054226, + "learning_rate": 4.645714013086212e-06, + "loss": 0.7626, + "step": 43260 + }, + { + "epoch": 1.7113251201328876, + "grad_norm": 1.3249260777705645, + "learning_rate": 4.643418840647475e-06, + "loss": 0.7873, + "step": 43270 + }, + { + "epoch": 1.7117206193517767, + "grad_norm": 1.141516036502993, + "learning_rate": 4.64112374372652e-06, + "loss": 0.7695, + "step": 43280 + }, + { + "epoch": 1.7121161185706657, + "grad_norm": 1.2682986606771651, + "learning_rate": 4.638828722809407e-06, + "loss": 0.8331, + "step": 43290 + }, + { + "epoch": 1.7125116177895547, + "grad_norm": 1.7800288765457992, + "learning_rate": 4.636533778382183e-06, + "loss": 0.7892, + "step": 43300 + }, + { + "epoch": 1.7129071170084438, + "grad_norm": 1.1573003314148935, + "learning_rate": 4.634238910930878e-06, + "loss": 0.7974, + "step": 43310 + }, + { + "epoch": 1.7133026162273328, + "grad_norm": 1.3140613896474784, + "learning_rate": 4.631944120941505e-06, + "loss": 0.768, + "step": 43320 + }, + { + "epoch": 1.7136981154462219, + "grad_norm": 1.242059835084353, + "learning_rate": 4.6296494089000625e-06, + "loss": 0.7981, + "step": 43330 + }, + { + "epoch": 1.714093614665111, + "grad_norm": 1.3520625891250195, + "learning_rate": 4.627354775292529e-06, + "loss": 0.8124, + "step": 43340 + }, + { + "epoch": 1.714489113884, + "grad_norm": 1.3938212345846235, + "learning_rate": 4.6250602206048706e-06, + "loss": 0.7992, + "step": 43350 + }, + { + "epoch": 1.714884613102889, + "grad_norm": 1.540168213658307, + "learning_rate": 4.6227657453230335e-06, + "loss": 0.7787, + "step": 43360 + }, + { + "epoch": 1.715280112321778, + "grad_norm": 1.5187184457938918, + "learning_rate": 4.620471349932951e-06, + "loss": 0.7827, + "step": 43370 + }, + { + "epoch": 1.715675611540667, + "grad_norm": 1.3365967903633527, + "learning_rate": 4.618177034920533e-06, + "loss": 0.784, + "step": 43380 + }, + { + "epoch": 1.716071110759556, + "grad_norm": 1.2613601465845692, + "learning_rate": 4.615882800771676e-06, + "loss": 0.7984, + "step": 43390 + }, + { + "epoch": 1.7164666099784451, + "grad_norm": 1.5110117544441157, + "learning_rate": 4.613588647972263e-06, + "loss": 0.8014, + "step": 43400 + }, + { + "epoch": 1.7168621091973342, + "grad_norm": 1.4515443369380563, + "learning_rate": 4.611294577008153e-06, + "loss": 0.7983, + "step": 43410 + }, + { + "epoch": 1.7172576084162232, + "grad_norm": 1.3116019499031484, + "learning_rate": 4.609000588365193e-06, + "loss": 0.7955, + "step": 43420 + }, + { + "epoch": 1.7176531076351125, + "grad_norm": 1.5715744245793175, + "learning_rate": 4.606706682529209e-06, + "loss": 0.8038, + "step": 43430 + }, + { + "epoch": 1.7180486068540015, + "grad_norm": 1.4233011934503232, + "learning_rate": 4.604412859986013e-06, + "loss": 0.8005, + "step": 43440 + }, + { + "epoch": 1.7184441060728906, + "grad_norm": 1.5406127604454116, + "learning_rate": 4.602119121221395e-06, + "loss": 0.7839, + "step": 43450 + }, + { + "epoch": 1.7188396052917796, + "grad_norm": 1.2969103620457323, + "learning_rate": 4.59982546672113e-06, + "loss": 0.8304, + "step": 43460 + }, + { + "epoch": 1.7192351045106686, + "grad_norm": 1.2168489023920228, + "learning_rate": 4.597531896970975e-06, + "loss": 0.8121, + "step": 43470 + }, + { + "epoch": 1.7196306037295577, + "grad_norm": 1.5503270882288969, + "learning_rate": 4.59523841245667e-06, + "loss": 0.7793, + "step": 43480 + }, + { + "epoch": 1.7200261029484467, + "grad_norm": 1.3386067126107042, + "learning_rate": 4.592945013663937e-06, + "loss": 0.7911, + "step": 43490 + }, + { + "epoch": 1.7204216021673358, + "grad_norm": 1.648293225110738, + "learning_rate": 4.590651701078474e-06, + "loss": 0.8038, + "step": 43500 + }, + { + "epoch": 1.7208171013862248, + "grad_norm": 1.411665150837201, + "learning_rate": 4.588358475185968e-06, + "loss": 0.7898, + "step": 43510 + }, + { + "epoch": 1.7212126006051138, + "grad_norm": 1.3745587267809554, + "learning_rate": 4.586065336472083e-06, + "loss": 0.7736, + "step": 43520 + }, + { + "epoch": 1.7216080998240029, + "grad_norm": 1.2600581481452702, + "learning_rate": 4.58377228542247e-06, + "loss": 0.773, + "step": 43530 + }, + { + "epoch": 1.722003599042892, + "grad_norm": 1.3379460524623126, + "learning_rate": 4.581479322522755e-06, + "loss": 0.8021, + "step": 43540 + }, + { + "epoch": 1.722399098261781, + "grad_norm": 1.5224938196495796, + "learning_rate": 4.57918644825855e-06, + "loss": 0.8054, + "step": 43550 + }, + { + "epoch": 1.72279459748067, + "grad_norm": 1.3272093623988706, + "learning_rate": 4.576893663115445e-06, + "loss": 0.7815, + "step": 43560 + }, + { + "epoch": 1.723190096699559, + "grad_norm": 1.189088259580658, + "learning_rate": 4.574600967579014e-06, + "loss": 0.7719, + "step": 43570 + }, + { + "epoch": 1.723585595918448, + "grad_norm": 1.804509097110049, + "learning_rate": 4.57230836213481e-06, + "loss": 0.7912, + "step": 43580 + }, + { + "epoch": 1.723981095137337, + "grad_norm": 1.3525131534672215, + "learning_rate": 4.570015847268368e-06, + "loss": 0.7626, + "step": 43590 + }, + { + "epoch": 1.7243765943562261, + "grad_norm": 1.5868380595371219, + "learning_rate": 4.567723423465203e-06, + "loss": 0.7899, + "step": 43600 + }, + { + "epoch": 1.7247720935751152, + "grad_norm": 1.1700013146166923, + "learning_rate": 4.565431091210813e-06, + "loss": 0.7999, + "step": 43610 + }, + { + "epoch": 1.7251675927940042, + "grad_norm": 1.5595369260555179, + "learning_rate": 4.56313885099067e-06, + "loss": 0.8083, + "step": 43620 + }, + { + "epoch": 1.7255630920128933, + "grad_norm": 1.3654884814429953, + "learning_rate": 4.560846703290234e-06, + "loss": 0.7943, + "step": 43630 + }, + { + "epoch": 1.7259585912317823, + "grad_norm": 1.264563670134767, + "learning_rate": 4.558554648594943e-06, + "loss": 0.7834, + "step": 43640 + }, + { + "epoch": 1.7263540904506713, + "grad_norm": 1.4524405450710334, + "learning_rate": 4.556262687390214e-06, + "loss": 0.7846, + "step": 43650 + }, + { + "epoch": 1.7267495896695604, + "grad_norm": 1.2135123442114277, + "learning_rate": 4.5539708201614466e-06, + "loss": 0.7732, + "step": 43660 + }, + { + "epoch": 1.7271450888884494, + "grad_norm": 1.3724194130354423, + "learning_rate": 4.5516790473940184e-06, + "loss": 0.799, + "step": 43670 + }, + { + "epoch": 1.7275405881073385, + "grad_norm": 1.2973309513581168, + "learning_rate": 4.5493873695732866e-06, + "loss": 0.8104, + "step": 43680 + }, + { + "epoch": 1.7279360873262275, + "grad_norm": 1.6248574289962425, + "learning_rate": 4.54709578718459e-06, + "loss": 0.7739, + "step": 43690 + }, + { + "epoch": 1.7283315865451165, + "grad_norm": 1.4590613801731587, + "learning_rate": 4.544804300713246e-06, + "loss": 0.7579, + "step": 43700 + }, + { + "epoch": 1.7287270857640056, + "grad_norm": 1.5067340878685358, + "learning_rate": 4.542512910644553e-06, + "loss": 0.7976, + "step": 43710 + }, + { + "epoch": 1.7291225849828946, + "grad_norm": 1.6010654889579239, + "learning_rate": 4.540221617463787e-06, + "loss": 0.7899, + "step": 43720 + }, + { + "epoch": 1.7295180842017837, + "grad_norm": 1.3715262231363263, + "learning_rate": 4.537930421656208e-06, + "loss": 0.8217, + "step": 43730 + }, + { + "epoch": 1.7299135834206727, + "grad_norm": 1.3163594482149317, + "learning_rate": 4.535639323707047e-06, + "loss": 0.7716, + "step": 43740 + }, + { + "epoch": 1.7303090826395617, + "grad_norm": 1.3055859024999616, + "learning_rate": 4.533348324101523e-06, + "loss": 0.7978, + "step": 43750 + }, + { + "epoch": 1.7307045818584508, + "grad_norm": 1.3722285625144957, + "learning_rate": 4.531057423324828e-06, + "loss": 0.7846, + "step": 43760 + }, + { + "epoch": 1.73110008107734, + "grad_norm": 1.2916459077755589, + "learning_rate": 4.528766621862137e-06, + "loss": 0.7907, + "step": 43770 + }, + { + "epoch": 1.731495580296229, + "grad_norm": 1.14407030889974, + "learning_rate": 4.526475920198602e-06, + "loss": 0.7888, + "step": 43780 + }, + { + "epoch": 1.7318910795151181, + "grad_norm": 1.2394495756314408, + "learning_rate": 4.524185318819355e-06, + "loss": 0.8048, + "step": 43790 + }, + { + "epoch": 1.7322865787340072, + "grad_norm": 1.3053063977768904, + "learning_rate": 4.5218948182095055e-06, + "loss": 0.7887, + "step": 43800 + }, + { + "epoch": 1.7326820779528962, + "grad_norm": 1.713932736485177, + "learning_rate": 4.5196044188541426e-06, + "loss": 0.7732, + "step": 43810 + }, + { + "epoch": 1.7330775771717852, + "grad_norm": 1.4351614371987402, + "learning_rate": 4.517314121238333e-06, + "loss": 0.7694, + "step": 43820 + }, + { + "epoch": 1.7334730763906743, + "grad_norm": 1.5428394603274558, + "learning_rate": 4.515023925847124e-06, + "loss": 0.7642, + "step": 43830 + }, + { + "epoch": 1.7338685756095633, + "grad_norm": 1.3262905067355322, + "learning_rate": 4.512733833165538e-06, + "loss": 0.8094, + "step": 43840 + }, + { + "epoch": 1.7342640748284524, + "grad_norm": 1.0615067206404898, + "learning_rate": 4.510443843678578e-06, + "loss": 0.7727, + "step": 43850 + }, + { + "epoch": 1.7346595740473414, + "grad_norm": 1.253974618679905, + "learning_rate": 4.508153957871228e-06, + "loss": 0.772, + "step": 43860 + }, + { + "epoch": 1.7350550732662304, + "grad_norm": 1.363532089707463, + "learning_rate": 4.5058641762284405e-06, + "loss": 0.7883, + "step": 43870 + }, + { + "epoch": 1.7354505724851195, + "grad_norm": 1.4190352078639084, + "learning_rate": 4.503574499235155e-06, + "loss": 0.7971, + "step": 43880 + }, + { + "epoch": 1.7358460717040085, + "grad_norm": 1.282005254832603, + "learning_rate": 4.5012849273762856e-06, + "loss": 0.7702, + "step": 43890 + }, + { + "epoch": 1.7362415709228975, + "grad_norm": 1.5165821503367798, + "learning_rate": 4.498995461136725e-06, + "loss": 0.7646, + "step": 43900 + }, + { + "epoch": 1.7366370701417866, + "grad_norm": 1.4966807550285086, + "learning_rate": 4.496706101001343e-06, + "loss": 0.7793, + "step": 43910 + }, + { + "epoch": 1.7370325693606756, + "grad_norm": 1.3873163639745776, + "learning_rate": 4.494416847454986e-06, + "loss": 0.7793, + "step": 43920 + }, + { + "epoch": 1.7374280685795647, + "grad_norm": 1.368106939601308, + "learning_rate": 4.492127700982477e-06, + "loss": 0.7856, + "step": 43930 + }, + { + "epoch": 1.7378235677984537, + "grad_norm": 1.3820321702043639, + "learning_rate": 4.489838662068622e-06, + "loss": 0.7859, + "step": 43940 + }, + { + "epoch": 1.7382190670173427, + "grad_norm": 1.3247565738913807, + "learning_rate": 4.487549731198197e-06, + "loss": 0.7712, + "step": 43950 + }, + { + "epoch": 1.7386145662362318, + "grad_norm": 1.60642127770446, + "learning_rate": 4.48526090885596e-06, + "loss": 0.7964, + "step": 43960 + }, + { + "epoch": 1.7390100654551208, + "grad_norm": 1.519598684598891, + "learning_rate": 4.482972195526644e-06, + "loss": 0.7534, + "step": 43970 + }, + { + "epoch": 1.7394055646740099, + "grad_norm": 1.3630472960650235, + "learning_rate": 4.480683591694961e-06, + "loss": 0.7667, + "step": 43980 + }, + { + "epoch": 1.739801063892899, + "grad_norm": 1.6283119330185185, + "learning_rate": 4.478395097845594e-06, + "loss": 0.7796, + "step": 43990 + }, + { + "epoch": 1.740196563111788, + "grad_norm": 1.1730907853344723, + "learning_rate": 4.47610671446321e-06, + "loss": 0.8033, + "step": 44000 + }, + { + "epoch": 1.740592062330677, + "grad_norm": 1.3834211017168112, + "learning_rate": 4.473818442032447e-06, + "loss": 0.8094, + "step": 44010 + }, + { + "epoch": 1.740987561549566, + "grad_norm": 1.5383465385946293, + "learning_rate": 4.4715302810379246e-06, + "loss": 0.8084, + "step": 44020 + }, + { + "epoch": 1.741383060768455, + "grad_norm": 1.5515370420499288, + "learning_rate": 4.469242231964234e-06, + "loss": 0.7819, + "step": 44030 + }, + { + "epoch": 1.741778559987344, + "grad_norm": 1.3522124000769695, + "learning_rate": 4.466954295295946e-06, + "loss": 0.7947, + "step": 44040 + }, + { + "epoch": 1.7421740592062331, + "grad_norm": 1.477866666677567, + "learning_rate": 4.4646664715176056e-06, + "loss": 0.8017, + "step": 44050 + }, + { + "epoch": 1.7425695584251222, + "grad_norm": 1.2171213913733678, + "learning_rate": 4.4623787611137355e-06, + "loss": 0.7637, + "step": 44060 + }, + { + "epoch": 1.7429650576440112, + "grad_norm": 1.4214546469646214, + "learning_rate": 4.460091164568833e-06, + "loss": 0.789, + "step": 44070 + }, + { + "epoch": 1.7433605568629003, + "grad_norm": 1.5340306748071701, + "learning_rate": 4.457803682367373e-06, + "loss": 0.7907, + "step": 44080 + }, + { + "epoch": 1.7437560560817893, + "grad_norm": 1.6173509563536357, + "learning_rate": 4.455516314993804e-06, + "loss": 0.7548, + "step": 44090 + }, + { + "epoch": 1.7441515553006783, + "grad_norm": 1.5526486560273582, + "learning_rate": 4.453229062932552e-06, + "loss": 0.758, + "step": 44100 + }, + { + "epoch": 1.7445470545195674, + "grad_norm": 1.3342344683731149, + "learning_rate": 4.450941926668015e-06, + "loss": 0.8055, + "step": 44110 + }, + { + "epoch": 1.7449425537384564, + "grad_norm": 1.4338919841988385, + "learning_rate": 4.448654906684572e-06, + "loss": 0.7829, + "step": 44120 + }, + { + "epoch": 1.7453380529573455, + "grad_norm": 1.547178784383168, + "learning_rate": 4.446368003466574e-06, + "loss": 0.7892, + "step": 44130 + }, + { + "epoch": 1.7457335521762345, + "grad_norm": 1.6062509247795729, + "learning_rate": 4.444081217498349e-06, + "loss": 0.8044, + "step": 44140 + }, + { + "epoch": 1.7461290513951235, + "grad_norm": 1.4141914559416404, + "learning_rate": 4.441794549264196e-06, + "loss": 0.7774, + "step": 44150 + }, + { + "epoch": 1.7465245506140126, + "grad_norm": 1.392607812214566, + "learning_rate": 4.439507999248396e-06, + "loss": 0.7773, + "step": 44160 + }, + { + "epoch": 1.7469200498329016, + "grad_norm": 1.443839498310404, + "learning_rate": 4.4372215679351985e-06, + "loss": 0.8011, + "step": 44170 + }, + { + "epoch": 1.7473155490517907, + "grad_norm": 1.6681270048610946, + "learning_rate": 4.434935255808831e-06, + "loss": 0.7649, + "step": 44180 + }, + { + "epoch": 1.7477110482706797, + "grad_norm": 1.3821515146973142, + "learning_rate": 4.432649063353496e-06, + "loss": 0.7853, + "step": 44190 + }, + { + "epoch": 1.7481065474895687, + "grad_norm": 1.2105415917711317, + "learning_rate": 4.430362991053369e-06, + "loss": 0.788, + "step": 44200 + }, + { + "epoch": 1.7485020467084578, + "grad_norm": 1.3506672361945162, + "learning_rate": 4.428077039392602e-06, + "loss": 0.7769, + "step": 44210 + }, + { + "epoch": 1.7488975459273468, + "grad_norm": 1.3125742732995098, + "learning_rate": 4.4257912088553215e-06, + "loss": 0.8036, + "step": 44220 + }, + { + "epoch": 1.7492930451462358, + "grad_norm": 1.5724996332375645, + "learning_rate": 4.423505499925623e-06, + "loss": 0.7918, + "step": 44230 + }, + { + "epoch": 1.7496885443651249, + "grad_norm": 1.3996903588212173, + "learning_rate": 4.421219913087583e-06, + "loss": 0.8216, + "step": 44240 + }, + { + "epoch": 1.750084043584014, + "grad_norm": 1.5267959490567566, + "learning_rate": 4.41893444882525e-06, + "loss": 0.7713, + "step": 44250 + }, + { + "epoch": 1.750479542802903, + "grad_norm": 1.4311818426161296, + "learning_rate": 4.416649107622646e-06, + "loss": 0.7898, + "step": 44260 + }, + { + "epoch": 1.750875042021792, + "grad_norm": 1.3332990181789384, + "learning_rate": 4.414363889963766e-06, + "loss": 0.8003, + "step": 44270 + }, + { + "epoch": 1.751270541240681, + "grad_norm": 1.4080000551555123, + "learning_rate": 4.412078796332582e-06, + "loss": 0.8085, + "step": 44280 + }, + { + "epoch": 1.75166604045957, + "grad_norm": 1.4249913072448293, + "learning_rate": 4.409793827213036e-06, + "loss": 0.8044, + "step": 44290 + }, + { + "epoch": 1.7520615396784591, + "grad_norm": 1.4895033989886426, + "learning_rate": 4.407508983089046e-06, + "loss": 0.7982, + "step": 44300 + }, + { + "epoch": 1.7524570388973482, + "grad_norm": 1.2921987129638182, + "learning_rate": 4.405224264444502e-06, + "loss": 0.8082, + "step": 44310 + }, + { + "epoch": 1.7528525381162372, + "grad_norm": 1.444011915566668, + "learning_rate": 4.40293967176327e-06, + "loss": 0.7632, + "step": 44320 + }, + { + "epoch": 1.7532480373351262, + "grad_norm": 1.2908306617089407, + "learning_rate": 4.400655205529187e-06, + "loss": 0.8017, + "step": 44330 + }, + { + "epoch": 1.7536435365540153, + "grad_norm": 1.3096535754533938, + "learning_rate": 4.398370866226065e-06, + "loss": 0.7632, + "step": 44340 + }, + { + "epoch": 1.7540390357729043, + "grad_norm": 1.3461796811902251, + "learning_rate": 4.3960866543376835e-06, + "loss": 0.7872, + "step": 44350 + }, + { + "epoch": 1.7544345349917934, + "grad_norm": 1.2807883025604567, + "learning_rate": 4.393802570347803e-06, + "loss": 0.7953, + "step": 44360 + }, + { + "epoch": 1.7548300342106824, + "grad_norm": 1.6433221941781169, + "learning_rate": 4.391518614740152e-06, + "loss": 0.7721, + "step": 44370 + }, + { + "epoch": 1.7552255334295714, + "grad_norm": 1.408821212382581, + "learning_rate": 4.3892347879984345e-06, + "loss": 0.7848, + "step": 44380 + }, + { + "epoch": 1.7556210326484605, + "grad_norm": 1.3192214630163677, + "learning_rate": 4.386951090606325e-06, + "loss": 0.7818, + "step": 44390 + }, + { + "epoch": 1.7560165318673495, + "grad_norm": 1.4823118378180073, + "learning_rate": 4.384667523047472e-06, + "loss": 0.7867, + "step": 44400 + }, + { + "epoch": 1.7564120310862386, + "grad_norm": 1.6090004865722174, + "learning_rate": 4.382384085805495e-06, + "loss": 0.7893, + "step": 44410 + }, + { + "epoch": 1.7568075303051276, + "grad_norm": 1.3019361148439694, + "learning_rate": 4.380100779363987e-06, + "loss": 0.773, + "step": 44420 + }, + { + "epoch": 1.7572030295240166, + "grad_norm": 1.3781399188672865, + "learning_rate": 4.377817604206514e-06, + "loss": 0.8034, + "step": 44430 + }, + { + "epoch": 1.7575985287429057, + "grad_norm": 1.1935781056824448, + "learning_rate": 4.375534560816613e-06, + "loss": 0.7914, + "step": 44440 + }, + { + "epoch": 1.7579940279617947, + "grad_norm": 1.421751383170964, + "learning_rate": 4.3732516496777945e-06, + "loss": 0.7645, + "step": 44450 + }, + { + "epoch": 1.7583895271806838, + "grad_norm": 1.4071668704830689, + "learning_rate": 4.370968871273538e-06, + "loss": 0.7902, + "step": 44460 + }, + { + "epoch": 1.7587850263995728, + "grad_norm": 1.3532918776688694, + "learning_rate": 4.368686226087301e-06, + "loss": 0.8074, + "step": 44470 + }, + { + "epoch": 1.7591805256184618, + "grad_norm": 1.2007198128694068, + "learning_rate": 4.366403714602502e-06, + "loss": 0.7635, + "step": 44480 + }, + { + "epoch": 1.7595760248373509, + "grad_norm": 1.3450244739720503, + "learning_rate": 4.3641213373025425e-06, + "loss": 0.7836, + "step": 44490 + }, + { + "epoch": 1.75997152405624, + "grad_norm": 1.5882583966104258, + "learning_rate": 4.361839094670789e-06, + "loss": 0.7967, + "step": 44500 + }, + { + "epoch": 1.760367023275129, + "grad_norm": 1.2176379769436472, + "learning_rate": 4.359556987190583e-06, + "loss": 0.7777, + "step": 44510 + }, + { + "epoch": 1.760762522494018, + "grad_norm": 1.3298017266261861, + "learning_rate": 4.357275015345235e-06, + "loss": 0.7946, + "step": 44520 + }, + { + "epoch": 1.761158021712907, + "grad_norm": 1.4489843147698926, + "learning_rate": 4.354993179618026e-06, + "loss": 0.779, + "step": 44530 + }, + { + "epoch": 1.761553520931796, + "grad_norm": 1.5955900570171213, + "learning_rate": 4.3527114804922125e-06, + "loss": 0.7654, + "step": 44540 + }, + { + "epoch": 1.761949020150685, + "grad_norm": 1.3389699944648392, + "learning_rate": 4.350429918451018e-06, + "loss": 0.7955, + "step": 44550 + }, + { + "epoch": 1.7623445193695741, + "grad_norm": 1.64525477102138, + "learning_rate": 4.348148493977638e-06, + "loss": 0.7649, + "step": 44560 + }, + { + "epoch": 1.7627400185884632, + "grad_norm": 1.556791328967586, + "learning_rate": 4.34586720755524e-06, + "loss": 0.7423, + "step": 44570 + }, + { + "epoch": 1.7631355178073522, + "grad_norm": 1.6112608140121207, + "learning_rate": 4.343586059666959e-06, + "loss": 0.7672, + "step": 44580 + }, + { + "epoch": 1.7635310170262413, + "grad_norm": 1.4049003769288613, + "learning_rate": 4.341305050795907e-06, + "loss": 0.797, + "step": 44590 + }, + { + "epoch": 1.7639265162451303, + "grad_norm": 1.4151072328398027, + "learning_rate": 4.339024181425159e-06, + "loss": 0.7824, + "step": 44600 + }, + { + "epoch": 1.7643220154640193, + "grad_norm": 1.4105712582019052, + "learning_rate": 4.336743452037767e-06, + "loss": 0.7787, + "step": 44610 + }, + { + "epoch": 1.7647175146829084, + "grad_norm": 1.3403168130991987, + "learning_rate": 4.334462863116747e-06, + "loss": 0.7756, + "step": 44620 + }, + { + "epoch": 1.7651130139017974, + "grad_norm": 1.4747284421853857, + "learning_rate": 4.33218241514509e-06, + "loss": 0.7773, + "step": 44630 + }, + { + "epoch": 1.7655085131206865, + "grad_norm": 1.4345108874032895, + "learning_rate": 4.329902108605758e-06, + "loss": 0.7693, + "step": 44640 + }, + { + "epoch": 1.7659040123395755, + "grad_norm": 1.6038288444856101, + "learning_rate": 4.327621943981678e-06, + "loss": 0.8043, + "step": 44650 + }, + { + "epoch": 1.7662995115584645, + "grad_norm": 1.4731335139835002, + "learning_rate": 4.3253419217557506e-06, + "loss": 0.796, + "step": 44660 + }, + { + "epoch": 1.7666950107773536, + "grad_norm": 1.613875442187249, + "learning_rate": 4.323062042410846e-06, + "loss": 0.7654, + "step": 44670 + }, + { + "epoch": 1.7670905099962426, + "grad_norm": 1.4677278660609412, + "learning_rate": 4.320782306429804e-06, + "loss": 0.7775, + "step": 44680 + }, + { + "epoch": 1.7674860092151317, + "grad_norm": 1.7639145973226702, + "learning_rate": 4.318502714295433e-06, + "loss": 0.8005, + "step": 44690 + }, + { + "epoch": 1.7678815084340207, + "grad_norm": 1.6021415871617222, + "learning_rate": 4.316223266490511e-06, + "loss": 0.7929, + "step": 44700 + }, + { + "epoch": 1.7682770076529097, + "grad_norm": 1.362740997156845, + "learning_rate": 4.313943963497788e-06, + "loss": 0.7936, + "step": 44710 + }, + { + "epoch": 1.7686725068717988, + "grad_norm": 1.292511416699964, + "learning_rate": 4.31166480579998e-06, + "loss": 0.7669, + "step": 44720 + }, + { + "epoch": 1.7690680060906878, + "grad_norm": 1.3835456305885425, + "learning_rate": 4.309385793879772e-06, + "loss": 0.7744, + "step": 44730 + }, + { + "epoch": 1.7694635053095769, + "grad_norm": 1.819094092993542, + "learning_rate": 4.307106928219821e-06, + "loss": 0.7483, + "step": 44740 + }, + { + "epoch": 1.769859004528466, + "grad_norm": 1.452203152532573, + "learning_rate": 4.3048282093027524e-06, + "loss": 0.7768, + "step": 44750 + }, + { + "epoch": 1.770254503747355, + "grad_norm": 1.363704627632019, + "learning_rate": 4.302549637611158e-06, + "loss": 0.7643, + "step": 44760 + }, + { + "epoch": 1.7706500029662442, + "grad_norm": 1.4401428913720895, + "learning_rate": 4.300271213627603e-06, + "loss": 0.782, + "step": 44770 + }, + { + "epoch": 1.7710455021851332, + "grad_norm": 1.35899039954038, + "learning_rate": 4.297992937834617e-06, + "loss": 0.7813, + "step": 44780 + }, + { + "epoch": 1.7714410014040223, + "grad_norm": 1.3956822551876829, + "learning_rate": 4.295714810714699e-06, + "loss": 0.7504, + "step": 44790 + }, + { + "epoch": 1.7718365006229113, + "grad_norm": 1.4914086598701293, + "learning_rate": 4.29343683275032e-06, + "loss": 0.7586, + "step": 44800 + }, + { + "epoch": 1.7722319998418004, + "grad_norm": 1.623809994117821, + "learning_rate": 4.2911590044239125e-06, + "loss": 0.808, + "step": 44810 + }, + { + "epoch": 1.7726274990606894, + "grad_norm": 1.6039147208215316, + "learning_rate": 4.288881326217886e-06, + "loss": 0.7624, + "step": 44820 + }, + { + "epoch": 1.7730229982795784, + "grad_norm": 1.5259344298884712, + "learning_rate": 4.286603798614611e-06, + "loss": 0.7704, + "step": 44830 + }, + { + "epoch": 1.7734184974984675, + "grad_norm": 1.3225292504284039, + "learning_rate": 4.284326422096428e-06, + "loss": 0.7863, + "step": 44840 + }, + { + "epoch": 1.7738139967173565, + "grad_norm": 1.408858905221673, + "learning_rate": 4.28204919714565e-06, + "loss": 0.7612, + "step": 44850 + }, + { + "epoch": 1.7742094959362456, + "grad_norm": 1.4230568037531317, + "learning_rate": 4.27977212424455e-06, + "loss": 0.7788, + "step": 44860 + }, + { + "epoch": 1.7746049951551346, + "grad_norm": 1.2483437728124192, + "learning_rate": 4.277495203875376e-06, + "loss": 0.7886, + "step": 44870 + }, + { + "epoch": 1.7750004943740236, + "grad_norm": 1.4672133290171747, + "learning_rate": 4.275218436520339e-06, + "loss": 0.7784, + "step": 44880 + }, + { + "epoch": 1.7753959935929127, + "grad_norm": 1.393363646562001, + "learning_rate": 4.2729418226616205e-06, + "loss": 0.7832, + "step": 44890 + }, + { + "epoch": 1.7757914928118017, + "grad_norm": 1.478880340926508, + "learning_rate": 4.2706653627813675e-06, + "loss": 0.7852, + "step": 44900 + }, + { + "epoch": 1.7761869920306907, + "grad_norm": 1.568129819601717, + "learning_rate": 4.268389057361695e-06, + "loss": 0.7781, + "step": 44910 + }, + { + "epoch": 1.7765824912495798, + "grad_norm": 1.4193852075004723, + "learning_rate": 4.2661129068846875e-06, + "loss": 0.7893, + "step": 44920 + }, + { + "epoch": 1.7769779904684688, + "grad_norm": 1.5068973474620326, + "learning_rate": 4.263836911832392e-06, + "loss": 0.7533, + "step": 44930 + }, + { + "epoch": 1.7773734896873579, + "grad_norm": 1.3691205031676723, + "learning_rate": 4.261561072686827e-06, + "loss": 0.7648, + "step": 44940 + }, + { + "epoch": 1.777768988906247, + "grad_norm": 1.468170859456608, + "learning_rate": 4.2592853899299755e-06, + "loss": 0.7985, + "step": 44950 + }, + { + "epoch": 1.778164488125136, + "grad_norm": 1.450527616166147, + "learning_rate": 4.257009864043791e-06, + "loss": 0.7933, + "step": 44960 + }, + { + "epoch": 1.778559987344025, + "grad_norm": 1.461212450557013, + "learning_rate": 4.254734495510185e-06, + "loss": 0.7813, + "step": 44970 + }, + { + "epoch": 1.778955486562914, + "grad_norm": 1.4166972287102884, + "learning_rate": 4.252459284811046e-06, + "loss": 0.7764, + "step": 44980 + }, + { + "epoch": 1.779350985781803, + "grad_norm": 1.0924556310868954, + "learning_rate": 4.250184232428223e-06, + "loss": 0.803, + "step": 44990 + }, + { + "epoch": 1.779746485000692, + "grad_norm": 1.2439774206128422, + "learning_rate": 4.247909338843534e-06, + "loss": 0.7875, + "step": 45000 + }, + { + "epoch": 1.7801419842195811, + "grad_norm": 1.827258645312475, + "learning_rate": 4.2456346045387615e-06, + "loss": 0.7703, + "step": 45010 + }, + { + "epoch": 1.7805374834384702, + "grad_norm": 1.1506913522004274, + "learning_rate": 4.243360029995656e-06, + "loss": 0.7682, + "step": 45020 + }, + { + "epoch": 1.7809329826573592, + "grad_norm": 1.4542294363594457, + "learning_rate": 4.241085615695935e-06, + "loss": 0.7699, + "step": 45030 + }, + { + "epoch": 1.7813284818762483, + "grad_norm": 1.3816438258849228, + "learning_rate": 4.238811362121277e-06, + "loss": 0.7842, + "step": 45040 + }, + { + "epoch": 1.7817239810951373, + "grad_norm": 1.7282756274413493, + "learning_rate": 4.236537269753331e-06, + "loss": 0.7568, + "step": 45050 + }, + { + "epoch": 1.7821194803140263, + "grad_norm": 1.494841111415543, + "learning_rate": 4.2342633390737126e-06, + "loss": 0.775, + "step": 45060 + }, + { + "epoch": 1.7825149795329154, + "grad_norm": 1.5946477635242504, + "learning_rate": 4.231989570564e-06, + "loss": 0.7559, + "step": 45070 + }, + { + "epoch": 1.7829104787518044, + "grad_norm": 1.2593815456816662, + "learning_rate": 4.2297159647057405e-06, + "loss": 0.7781, + "step": 45080 + }, + { + "epoch": 1.7833059779706935, + "grad_norm": 1.2234962134948866, + "learning_rate": 4.227442521980441e-06, + "loss": 0.7753, + "step": 45090 + }, + { + "epoch": 1.7837014771895825, + "grad_norm": 1.6497901789470792, + "learning_rate": 4.22516924286958e-06, + "loss": 0.7817, + "step": 45100 + }, + { + "epoch": 1.7840969764084718, + "grad_norm": 1.3971012132818765, + "learning_rate": 4.222896127854598e-06, + "loss": 0.7688, + "step": 45110 + }, + { + "epoch": 1.7844924756273608, + "grad_norm": 1.6813738699880654, + "learning_rate": 4.220623177416903e-06, + "loss": 0.7732, + "step": 45120 + }, + { + "epoch": 1.7848879748462498, + "grad_norm": 1.630597845864978, + "learning_rate": 4.218350392037866e-06, + "loss": 0.733, + "step": 45130 + }, + { + "epoch": 1.7852834740651389, + "grad_norm": 1.157999637773178, + "learning_rate": 4.216077772198826e-06, + "loss": 0.7734, + "step": 45140 + }, + { + "epoch": 1.785678973284028, + "grad_norm": 1.215136790667082, + "learning_rate": 4.213805318381084e-06, + "loss": 0.7977, + "step": 45150 + }, + { + "epoch": 1.786074472502917, + "grad_norm": 1.6018416144309322, + "learning_rate": 4.2115330310659046e-06, + "loss": 0.7538, + "step": 45160 + }, + { + "epoch": 1.786469971721806, + "grad_norm": 1.4296328880052969, + "learning_rate": 4.209260910734522e-06, + "loss": 0.7673, + "step": 45170 + }, + { + "epoch": 1.786865470940695, + "grad_norm": 1.289735164772647, + "learning_rate": 4.20698895786813e-06, + "loss": 0.795, + "step": 45180 + }, + { + "epoch": 1.787260970159584, + "grad_norm": 1.4003928626152649, + "learning_rate": 4.204717172947892e-06, + "loss": 0.7675, + "step": 45190 + }, + { + "epoch": 1.7876564693784731, + "grad_norm": 1.435218394120719, + "learning_rate": 4.202445556454934e-06, + "loss": 0.7873, + "step": 45200 + }, + { + "epoch": 1.7880519685973622, + "grad_norm": 1.6299015380454278, + "learning_rate": 4.200174108870341e-06, + "loss": 0.7339, + "step": 45210 + }, + { + "epoch": 1.7884474678162512, + "grad_norm": 1.5999887640432664, + "learning_rate": 4.197902830675169e-06, + "loss": 0.787, + "step": 45220 + }, + { + "epoch": 1.7888429670351402, + "grad_norm": 1.1896054718627778, + "learning_rate": 4.195631722350436e-06, + "loss": 0.7945, + "step": 45230 + }, + { + "epoch": 1.7892384662540293, + "grad_norm": 1.3360387219716665, + "learning_rate": 4.193360784377123e-06, + "loss": 0.7504, + "step": 45240 + }, + { + "epoch": 1.7896339654729183, + "grad_norm": 1.461943732480947, + "learning_rate": 4.191090017236177e-06, + "loss": 0.7715, + "step": 45250 + }, + { + "epoch": 1.7900294646918073, + "grad_norm": 1.5371448898378919, + "learning_rate": 4.188819421408507e-06, + "loss": 0.7535, + "step": 45260 + }, + { + "epoch": 1.7904249639106964, + "grad_norm": 1.4208965498047244, + "learning_rate": 4.186548997374986e-06, + "loss": 0.7697, + "step": 45270 + }, + { + "epoch": 1.7908204631295854, + "grad_norm": 1.4778972438745934, + "learning_rate": 4.184278745616451e-06, + "loss": 0.7745, + "step": 45280 + }, + { + "epoch": 1.7912159623484745, + "grad_norm": 1.4165928337881941, + "learning_rate": 4.182008666613703e-06, + "loss": 0.7566, + "step": 45290 + }, + { + "epoch": 1.7916114615673635, + "grad_norm": 1.7572655586233457, + "learning_rate": 4.179738760847505e-06, + "loss": 0.7856, + "step": 45300 + }, + { + "epoch": 1.7920069607862525, + "grad_norm": 1.6129481366580611, + "learning_rate": 4.1774690287985845e-06, + "loss": 0.7325, + "step": 45310 + }, + { + "epoch": 1.7924024600051416, + "grad_norm": 1.2045356151529507, + "learning_rate": 4.1751994709476345e-06, + "loss": 0.7645, + "step": 45320 + }, + { + "epoch": 1.7927979592240306, + "grad_norm": 1.468682648123263, + "learning_rate": 4.1729300877753035e-06, + "loss": 0.771, + "step": 45330 + }, + { + "epoch": 1.7931934584429197, + "grad_norm": 1.5377179201316538, + "learning_rate": 4.170660879762211e-06, + "loss": 0.7721, + "step": 45340 + }, + { + "epoch": 1.7935889576618087, + "grad_norm": 1.8154945244252436, + "learning_rate": 4.168391847388934e-06, + "loss": 0.7695, + "step": 45350 + }, + { + "epoch": 1.7939844568806977, + "grad_norm": 1.3895031709559718, + "learning_rate": 4.166122991136018e-06, + "loss": 0.781, + "step": 45360 + }, + { + "epoch": 1.7943799560995868, + "grad_norm": 1.239612160982945, + "learning_rate": 4.163854311483966e-06, + "loss": 0.8003, + "step": 45370 + }, + { + "epoch": 1.7947754553184758, + "grad_norm": 1.5197095804328187, + "learning_rate": 4.161585808913246e-06, + "loss": 0.7787, + "step": 45380 + }, + { + "epoch": 1.7951709545373649, + "grad_norm": 1.5401817467103172, + "learning_rate": 4.1593174839042874e-06, + "loss": 0.7719, + "step": 45390 + }, + { + "epoch": 1.795566453756254, + "grad_norm": 1.349350293611061, + "learning_rate": 4.157049336937483e-06, + "loss": 0.759, + "step": 45400 + }, + { + "epoch": 1.795961952975143, + "grad_norm": 1.3723117613601592, + "learning_rate": 4.154781368493187e-06, + "loss": 0.7763, + "step": 45410 + }, + { + "epoch": 1.796357452194032, + "grad_norm": 1.545437380981378, + "learning_rate": 4.152513579051718e-06, + "loss": 0.7718, + "step": 45420 + }, + { + "epoch": 1.796752951412921, + "grad_norm": 1.583606779428353, + "learning_rate": 4.150245969093353e-06, + "loss": 0.7611, + "step": 45430 + }, + { + "epoch": 1.79714845063181, + "grad_norm": 1.476474120051364, + "learning_rate": 4.147978539098334e-06, + "loss": 0.752, + "step": 45440 + }, + { + "epoch": 1.797543949850699, + "grad_norm": 1.5210462138085645, + "learning_rate": 4.1457112895468645e-06, + "loss": 0.7761, + "step": 45450 + }, + { + "epoch": 1.7979394490695881, + "grad_norm": 1.633826341873089, + "learning_rate": 4.143444220919107e-06, + "loss": 0.7721, + "step": 45460 + }, + { + "epoch": 1.7983349482884772, + "grad_norm": 1.7503841990183295, + "learning_rate": 4.141177333695188e-06, + "loss": 0.7897, + "step": 45470 + }, + { + "epoch": 1.7987304475073662, + "grad_norm": 1.2619864241037637, + "learning_rate": 4.138910628355197e-06, + "loss": 0.8, + "step": 45480 + }, + { + "epoch": 1.7991259467262553, + "grad_norm": 1.471301430485854, + "learning_rate": 4.136644105379182e-06, + "loss": 0.8015, + "step": 45490 + }, + { + "epoch": 1.7995214459451443, + "grad_norm": 1.4421259886120517, + "learning_rate": 4.134377765247155e-06, + "loss": 0.7605, + "step": 45500 + }, + { + "epoch": 1.7999169451640333, + "grad_norm": 1.3516067775719383, + "learning_rate": 4.132111608439087e-06, + "loss": 0.7841, + "step": 45510 + }, + { + "epoch": 1.8003124443829224, + "grad_norm": 1.2441439433322172, + "learning_rate": 4.129845635434911e-06, + "loss": 0.7508, + "step": 45520 + }, + { + "epoch": 1.8007079436018114, + "grad_norm": 1.4889357719088259, + "learning_rate": 4.127579846714522e-06, + "loss": 0.7765, + "step": 45530 + }, + { + "epoch": 1.8011034428207005, + "grad_norm": 1.557395453210787, + "learning_rate": 4.125314242757775e-06, + "loss": 0.757, + "step": 45540 + }, + { + "epoch": 1.8014989420395895, + "grad_norm": 1.3010898195447373, + "learning_rate": 4.123048824044486e-06, + "loss": 0.7742, + "step": 45550 + }, + { + "epoch": 1.8018944412584785, + "grad_norm": 1.1998803660136321, + "learning_rate": 4.120783591054433e-06, + "loss": 0.7582, + "step": 45560 + }, + { + "epoch": 1.8022899404773676, + "grad_norm": 1.5045054749468374, + "learning_rate": 4.118518544267353e-06, + "loss": 0.7387, + "step": 45570 + }, + { + "epoch": 1.8026854396962566, + "grad_norm": 1.387991288840058, + "learning_rate": 4.116253684162943e-06, + "loss": 0.7709, + "step": 45580 + }, + { + "epoch": 1.8030809389151456, + "grad_norm": 1.5643526758483042, + "learning_rate": 4.11398901122086e-06, + "loss": 0.7819, + "step": 45590 + }, + { + "epoch": 1.8034764381340347, + "grad_norm": 1.3172101138170522, + "learning_rate": 4.111724525920727e-06, + "loss": 0.765, + "step": 45600 + }, + { + "epoch": 1.8038719373529237, + "grad_norm": 1.4102617081286002, + "learning_rate": 4.1094602287421215e-06, + "loss": 0.7881, + "step": 45610 + }, + { + "epoch": 1.8042674365718128, + "grad_norm": 1.3142229000056214, + "learning_rate": 4.107196120164582e-06, + "loss": 0.7694, + "step": 45620 + }, + { + "epoch": 1.8046629357907018, + "grad_norm": 1.416473754856598, + "learning_rate": 4.104932200667609e-06, + "loss": 0.7653, + "step": 45630 + }, + { + "epoch": 1.8050584350095908, + "grad_norm": 1.2073096088392639, + "learning_rate": 4.10266847073066e-06, + "loss": 0.7834, + "step": 45640 + }, + { + "epoch": 1.8054539342284799, + "grad_norm": 1.8836674488829814, + "learning_rate": 4.1004049308331565e-06, + "loss": 0.7594, + "step": 45650 + }, + { + "epoch": 1.805849433447369, + "grad_norm": 1.3055936757301028, + "learning_rate": 4.098141581454477e-06, + "loss": 0.7665, + "step": 45660 + }, + { + "epoch": 1.806244932666258, + "grad_norm": 1.5364421268664765, + "learning_rate": 4.09587842307396e-06, + "loss": 0.7677, + "step": 45670 + }, + { + "epoch": 1.806640431885147, + "grad_norm": 1.4259537098748811, + "learning_rate": 4.0936154561709035e-06, + "loss": 0.78, + "step": 45680 + }, + { + "epoch": 1.807035931104036, + "grad_norm": 1.2407664826893434, + "learning_rate": 4.0913526812245655e-06, + "loss": 0.7984, + "step": 45690 + }, + { + "epoch": 1.807431430322925, + "grad_norm": 1.3636778782955388, + "learning_rate": 4.089090098714161e-06, + "loss": 0.8002, + "step": 45700 + }, + { + "epoch": 1.8078269295418141, + "grad_norm": 1.4363421582210136, + "learning_rate": 4.086827709118868e-06, + "loss": 0.7473, + "step": 45710 + }, + { + "epoch": 1.8082224287607032, + "grad_norm": 1.9586507531462394, + "learning_rate": 4.084565512917822e-06, + "loss": 0.765, + "step": 45720 + }, + { + "epoch": 1.8086179279795922, + "grad_norm": 1.5234385476542143, + "learning_rate": 4.082303510590117e-06, + "loss": 0.7729, + "step": 45730 + }, + { + "epoch": 1.8090134271984812, + "grad_norm": 1.5199325513116924, + "learning_rate": 4.080041702614807e-06, + "loss": 0.7838, + "step": 45740 + }, + { + "epoch": 1.8094089264173703, + "grad_norm": 1.3517545894513459, + "learning_rate": 4.077780089470902e-06, + "loss": 0.7764, + "step": 45750 + }, + { + "epoch": 1.8098044256362593, + "grad_norm": 1.5972210696439342, + "learning_rate": 4.075518671637375e-06, + "loss": 0.7623, + "step": 45760 + }, + { + "epoch": 1.8101999248551484, + "grad_norm": 1.6592142475372433, + "learning_rate": 4.073257449593156e-06, + "loss": 0.7642, + "step": 45770 + }, + { + "epoch": 1.8105954240740374, + "grad_norm": 1.269483600304672, + "learning_rate": 4.07099642381713e-06, + "loss": 0.7943, + "step": 45780 + }, + { + "epoch": 1.8109909232929264, + "grad_norm": 1.389191623328749, + "learning_rate": 4.068735594788146e-06, + "loss": 0.7757, + "step": 45790 + }, + { + "epoch": 1.8113864225118155, + "grad_norm": 1.5224690858989347, + "learning_rate": 4.066474962985009e-06, + "loss": 0.7543, + "step": 45800 + }, + { + "epoch": 1.8117819217307045, + "grad_norm": 1.5179014185853363, + "learning_rate": 4.064214528886484e-06, + "loss": 0.7494, + "step": 45810 + }, + { + "epoch": 1.8121774209495936, + "grad_norm": 1.5700708861766726, + "learning_rate": 4.061954292971287e-06, + "loss": 0.7741, + "step": 45820 + }, + { + "epoch": 1.8125729201684826, + "grad_norm": 1.2761813531353026, + "learning_rate": 4.0596942557181004e-06, + "loss": 0.7651, + "step": 45830 + }, + { + "epoch": 1.8129684193873716, + "grad_norm": 1.3097193073534474, + "learning_rate": 4.05743441760556e-06, + "loss": 0.7838, + "step": 45840 + }, + { + "epoch": 1.8133639186062607, + "grad_norm": 1.5033990406958209, + "learning_rate": 4.055174779112262e-06, + "loss": 0.7689, + "step": 45850 + }, + { + "epoch": 1.8137594178251497, + "grad_norm": 1.496448182208677, + "learning_rate": 4.05291534071676e-06, + "loss": 0.7744, + "step": 45860 + }, + { + "epoch": 1.8141549170440388, + "grad_norm": 1.619399835825619, + "learning_rate": 4.050656102897562e-06, + "loss": 0.7584, + "step": 45870 + }, + { + "epoch": 1.8145504162629278, + "grad_norm": 1.4993273938849707, + "learning_rate": 4.048397066133138e-06, + "loss": 0.8094, + "step": 45880 + }, + { + "epoch": 1.8149459154818168, + "grad_norm": 1.521287738985428, + "learning_rate": 4.0461382309019114e-06, + "loss": 0.7569, + "step": 45890 + }, + { + "epoch": 1.8153414147007059, + "grad_norm": 1.5487038465547813, + "learning_rate": 4.043879597682266e-06, + "loss": 0.7746, + "step": 45900 + }, + { + "epoch": 1.815736913919595, + "grad_norm": 1.3562951761395383, + "learning_rate": 4.041621166952542e-06, + "loss": 0.7766, + "step": 45910 + }, + { + "epoch": 1.816132413138484, + "grad_norm": 1.412088105457243, + "learning_rate": 4.039362939191036e-06, + "loss": 0.7628, + "step": 45920 + }, + { + "epoch": 1.816527912357373, + "grad_norm": 1.2410648810785314, + "learning_rate": 4.037104914876001e-06, + "loss": 0.7808, + "step": 45930 + }, + { + "epoch": 1.816923411576262, + "grad_norm": 1.38406206806233, + "learning_rate": 4.0348470944856496e-06, + "loss": 0.7722, + "step": 45940 + }, + { + "epoch": 1.817318910795151, + "grad_norm": 1.474588730130419, + "learning_rate": 4.032589478498147e-06, + "loss": 0.7692, + "step": 45950 + }, + { + "epoch": 1.81771441001404, + "grad_norm": 1.4136768043549202, + "learning_rate": 4.0303320673916195e-06, + "loss": 0.7991, + "step": 45960 + }, + { + "epoch": 1.8181099092329291, + "grad_norm": 1.4874282697046477, + "learning_rate": 4.028074861644149e-06, + "loss": 0.7372, + "step": 45970 + }, + { + "epoch": 1.8185054084518182, + "grad_norm": 1.3734093413076005, + "learning_rate": 4.025817861733769e-06, + "loss": 0.7671, + "step": 45980 + }, + { + "epoch": 1.8189009076707072, + "grad_norm": 1.2770383275104966, + "learning_rate": 4.023561068138478e-06, + "loss": 0.7741, + "step": 45990 + }, + { + "epoch": 1.8192964068895963, + "grad_norm": 1.5258026474440733, + "learning_rate": 4.0213044813362225e-06, + "loss": 0.7617, + "step": 46000 + }, + { + "epoch": 1.8196919061084853, + "grad_norm": 1.4470186437379458, + "learning_rate": 4.0190481018049116e-06, + "loss": 0.8073, + "step": 46010 + }, + { + "epoch": 1.8200874053273743, + "grad_norm": 1.4227686776566977, + "learning_rate": 4.016791930022407e-06, + "loss": 0.7746, + "step": 46020 + }, + { + "epoch": 1.8204829045462634, + "grad_norm": 1.536187426490688, + "learning_rate": 4.014535966466526e-06, + "loss": 0.7605, + "step": 46030 + }, + { + "epoch": 1.8208784037651524, + "grad_norm": 1.2772171551252087, + "learning_rate": 4.012280211615046e-06, + "loss": 0.774, + "step": 46040 + }, + { + "epoch": 1.8212739029840415, + "grad_norm": 1.3208669900956689, + "learning_rate": 4.010024665945693e-06, + "loss": 0.765, + "step": 46050 + }, + { + "epoch": 1.8216694022029305, + "grad_norm": 1.6056383013665025, + "learning_rate": 4.0077693299361594e-06, + "loss": 0.7331, + "step": 46060 + }, + { + "epoch": 1.8220649014218195, + "grad_norm": 1.773901275603768, + "learning_rate": 4.00551420406408e-06, + "loss": 0.7712, + "step": 46070 + }, + { + "epoch": 1.8224604006407086, + "grad_norm": 1.298633913311332, + "learning_rate": 4.003259288807055e-06, + "loss": 0.7781, + "step": 46080 + }, + { + "epoch": 1.8228558998595976, + "grad_norm": 1.3544488286149823, + "learning_rate": 4.001004584642635e-06, + "loss": 0.7984, + "step": 46090 + }, + { + "epoch": 1.8232513990784867, + "grad_norm": 1.3814133108976563, + "learning_rate": 3.998750092048329e-06, + "loss": 0.7691, + "step": 46100 + }, + { + "epoch": 1.823646898297376, + "grad_norm": 1.4582808095956161, + "learning_rate": 3.996495811501601e-06, + "loss": 0.7608, + "step": 46110 + }, + { + "epoch": 1.824042397516265, + "grad_norm": 1.5000079919750775, + "learning_rate": 3.994241743479867e-06, + "loss": 0.7661, + "step": 46120 + }, + { + "epoch": 1.824437896735154, + "grad_norm": 1.6103539859038203, + "learning_rate": 3.9919878884605015e-06, + "loss": 0.7783, + "step": 46130 + }, + { + "epoch": 1.824833395954043, + "grad_norm": 1.3782951038814135, + "learning_rate": 3.989734246920831e-06, + "loss": 0.7523, + "step": 46140 + }, + { + "epoch": 1.825228895172932, + "grad_norm": 1.6617487897801164, + "learning_rate": 3.987480819338141e-06, + "loss": 0.753, + "step": 46150 + }, + { + "epoch": 1.8256243943918211, + "grad_norm": 1.2456127558127321, + "learning_rate": 3.985227606189665e-06, + "loss": 0.7711, + "step": 46160 + }, + { + "epoch": 1.8260198936107102, + "grad_norm": 1.4248525267395948, + "learning_rate": 3.9829746079525975e-06, + "loss": 0.786, + "step": 46170 + }, + { + "epoch": 1.8264153928295992, + "grad_norm": 1.5509402017837948, + "learning_rate": 3.980721825104085e-06, + "loss": 0.7928, + "step": 46180 + }, + { + "epoch": 1.8268108920484882, + "grad_norm": 1.2819042373879894, + "learning_rate": 3.978469258121225e-06, + "loss": 0.7556, + "step": 46190 + }, + { + "epoch": 1.8272063912673773, + "grad_norm": 1.5321893336095616, + "learning_rate": 3.976216907481076e-06, + "loss": 0.7541, + "step": 46200 + }, + { + "epoch": 1.8276018904862663, + "grad_norm": 1.4560866364015026, + "learning_rate": 3.973964773660649e-06, + "loss": 0.777, + "step": 46210 + }, + { + "epoch": 1.8279973897051554, + "grad_norm": 1.4638563114993728, + "learning_rate": 3.971712857136902e-06, + "loss": 0.7808, + "step": 46220 + }, + { + "epoch": 1.8283928889240444, + "grad_norm": 1.27753349151783, + "learning_rate": 3.969461158386755e-06, + "loss": 0.7855, + "step": 46230 + }, + { + "epoch": 1.8287883881429334, + "grad_norm": 1.2601120346826225, + "learning_rate": 3.967209677887079e-06, + "loss": 0.8116, + "step": 46240 + }, + { + "epoch": 1.8291838873618225, + "grad_norm": 1.3532146278018007, + "learning_rate": 3.9649584161147e-06, + "loss": 0.8207, + "step": 46250 + }, + { + "epoch": 1.8295793865807115, + "grad_norm": 1.7483432492532731, + "learning_rate": 3.962707373546396e-06, + "loss": 0.7348, + "step": 46260 + }, + { + "epoch": 1.8299748857996005, + "grad_norm": 1.2177485797370824, + "learning_rate": 3.960456550658899e-06, + "loss": 0.7622, + "step": 46270 + }, + { + "epoch": 1.8303703850184896, + "grad_norm": 1.542469264649917, + "learning_rate": 3.958205947928895e-06, + "loss": 0.7644, + "step": 46280 + }, + { + "epoch": 1.8307658842373786, + "grad_norm": 1.5596861154038129, + "learning_rate": 3.9559555658330226e-06, + "loss": 0.7581, + "step": 46290 + }, + { + "epoch": 1.8311613834562677, + "grad_norm": 1.4021852589195147, + "learning_rate": 3.953705404847877e-06, + "loss": 0.7863, + "step": 46300 + }, + { + "epoch": 1.8315568826751567, + "grad_norm": 1.237541100184159, + "learning_rate": 3.951455465449999e-06, + "loss": 0.7796, + "step": 46310 + }, + { + "epoch": 1.8319523818940457, + "grad_norm": 1.291996208592542, + "learning_rate": 3.9492057481158905e-06, + "loss": 0.7742, + "step": 46320 + }, + { + "epoch": 1.8323478811129348, + "grad_norm": 1.705638717812554, + "learning_rate": 3.946956253322001e-06, + "loss": 0.7696, + "step": 46330 + }, + { + "epoch": 1.8327433803318238, + "grad_norm": 1.3042719117786041, + "learning_rate": 3.9447069815447365e-06, + "loss": 0.785, + "step": 46340 + }, + { + "epoch": 1.8331388795507129, + "grad_norm": 1.6320920785234616, + "learning_rate": 3.942457933260454e-06, + "loss": 0.7681, + "step": 46350 + }, + { + "epoch": 1.833534378769602, + "grad_norm": 1.5755890690491232, + "learning_rate": 3.940209108945463e-06, + "loss": 0.7621, + "step": 46360 + }, + { + "epoch": 1.833929877988491, + "grad_norm": 1.3850454525456801, + "learning_rate": 3.937960509076026e-06, + "loss": 0.7994, + "step": 46370 + }, + { + "epoch": 1.83432537720738, + "grad_norm": 1.2858976057023412, + "learning_rate": 3.935712134128359e-06, + "loss": 0.7849, + "step": 46380 + }, + { + "epoch": 1.834720876426269, + "grad_norm": 1.2661530575212854, + "learning_rate": 3.933463984578629e-06, + "loss": 0.7605, + "step": 46390 + }, + { + "epoch": 1.835116375645158, + "grad_norm": 1.5757550643874016, + "learning_rate": 3.931216060902953e-06, + "loss": 0.7743, + "step": 46400 + }, + { + "epoch": 1.835511874864047, + "grad_norm": 1.3706600697486169, + "learning_rate": 3.928968363577406e-06, + "loss": 0.7776, + "step": 46410 + }, + { + "epoch": 1.8359073740829361, + "grad_norm": 1.2600157671615326, + "learning_rate": 3.9267208930780095e-06, + "loss": 0.7691, + "step": 46420 + }, + { + "epoch": 1.8363028733018252, + "grad_norm": 1.297255437228757, + "learning_rate": 3.924473649880742e-06, + "loss": 0.7731, + "step": 46430 + }, + { + "epoch": 1.8366983725207142, + "grad_norm": 1.649512152772169, + "learning_rate": 3.922226634461529e-06, + "loss": 0.7482, + "step": 46440 + }, + { + "epoch": 1.8370938717396035, + "grad_norm": 1.547920602594299, + "learning_rate": 3.919979847296249e-06, + "loss": 0.7548, + "step": 46450 + }, + { + "epoch": 1.8374893709584925, + "grad_norm": 1.5427777863591599, + "learning_rate": 3.917733288860735e-06, + "loss": 0.7704, + "step": 46460 + }, + { + "epoch": 1.8378848701773816, + "grad_norm": 1.603914690257776, + "learning_rate": 3.9154869596307675e-06, + "loss": 0.7555, + "step": 46470 + }, + { + "epoch": 1.8382803693962706, + "grad_norm": 1.2338832587372544, + "learning_rate": 3.913240860082083e-06, + "loss": 0.745, + "step": 46480 + }, + { + "epoch": 1.8386758686151596, + "grad_norm": 1.3747284062494922, + "learning_rate": 3.910994990690366e-06, + "loss": 0.7533, + "step": 46490 + }, + { + "epoch": 1.8390713678340487, + "grad_norm": 1.5156644275398272, + "learning_rate": 3.908749351931251e-06, + "loss": 0.7866, + "step": 46500 + }, + { + "epoch": 1.8394668670529377, + "grad_norm": 1.3922074450804318, + "learning_rate": 3.9065039442803295e-06, + "loss": 0.781, + "step": 46510 + }, + { + "epoch": 1.8398623662718268, + "grad_norm": 1.4450779243915337, + "learning_rate": 3.9042587682131385e-06, + "loss": 0.7567, + "step": 46520 + }, + { + "epoch": 1.8402578654907158, + "grad_norm": 1.3973409135576254, + "learning_rate": 3.902013824205168e-06, + "loss": 0.7841, + "step": 46530 + }, + { + "epoch": 1.8406533647096048, + "grad_norm": 1.5687146319956817, + "learning_rate": 3.899769112731858e-06, + "loss": 0.7724, + "step": 46540 + }, + { + "epoch": 1.8410488639284939, + "grad_norm": 1.3325344548524891, + "learning_rate": 3.897524634268603e-06, + "loss": 0.7871, + "step": 46550 + }, + { + "epoch": 1.841444363147383, + "grad_norm": 1.4403765630798755, + "learning_rate": 3.89528038929074e-06, + "loss": 0.7841, + "step": 46560 + }, + { + "epoch": 1.841839862366272, + "grad_norm": 1.4242968179904358, + "learning_rate": 3.893036378273565e-06, + "loss": 0.7734, + "step": 46570 + }, + { + "epoch": 1.842235361585161, + "grad_norm": 1.490809391329131, + "learning_rate": 3.8907926016923205e-06, + "loss": 0.7643, + "step": 46580 + }, + { + "epoch": 1.84263086080405, + "grad_norm": 1.3295000751714796, + "learning_rate": 3.888549060022199e-06, + "loss": 0.7707, + "step": 46590 + }, + { + "epoch": 1.843026360022939, + "grad_norm": 1.1283752203481403, + "learning_rate": 3.8863057537383455e-06, + "loss": 0.7929, + "step": 46600 + }, + { + "epoch": 1.843421859241828, + "grad_norm": 1.2135571881303013, + "learning_rate": 3.8840626833158536e-06, + "loss": 0.7924, + "step": 46610 + }, + { + "epoch": 1.8438173584607171, + "grad_norm": 1.3588187301583048, + "learning_rate": 3.881819849229767e-06, + "loss": 0.788, + "step": 46620 + }, + { + "epoch": 1.8442128576796062, + "grad_norm": 1.5012704043770022, + "learning_rate": 3.879577251955079e-06, + "loss": 0.7538, + "step": 46630 + }, + { + "epoch": 1.8446083568984952, + "grad_norm": 1.2883755124542555, + "learning_rate": 3.8773348919667345e-06, + "loss": 0.7734, + "step": 46640 + }, + { + "epoch": 1.8450038561173843, + "grad_norm": 1.3574950782424573, + "learning_rate": 3.875092769739625e-06, + "loss": 0.7502, + "step": 46650 + }, + { + "epoch": 1.8453993553362733, + "grad_norm": 1.2512945178200172, + "learning_rate": 3.872850885748595e-06, + "loss": 0.7719, + "step": 46660 + }, + { + "epoch": 1.8457948545551623, + "grad_norm": 1.3725857682025175, + "learning_rate": 3.870609240468438e-06, + "loss": 0.7577, + "step": 46670 + }, + { + "epoch": 1.8461903537740514, + "grad_norm": 1.476246384778804, + "learning_rate": 3.868367834373895e-06, + "loss": 0.7703, + "step": 46680 + }, + { + "epoch": 1.8465858529929404, + "grad_norm": 1.249788109236149, + "learning_rate": 3.866126667939657e-06, + "loss": 0.7842, + "step": 46690 + }, + { + "epoch": 1.8469813522118295, + "grad_norm": 1.3699958036186077, + "learning_rate": 3.863885741640364e-06, + "loss": 0.7629, + "step": 46700 + }, + { + "epoch": 1.8473768514307185, + "grad_norm": 1.2344044725237173, + "learning_rate": 3.8616450559506065e-06, + "loss": 0.792, + "step": 46710 + }, + { + "epoch": 1.8477723506496075, + "grad_norm": 1.600299718541322, + "learning_rate": 3.859404611344925e-06, + "loss": 0.7401, + "step": 46720 + }, + { + "epoch": 1.8481678498684966, + "grad_norm": 1.3469177550692348, + "learning_rate": 3.8571644082978055e-06, + "loss": 0.8, + "step": 46730 + }, + { + "epoch": 1.8485633490873856, + "grad_norm": 1.822731952582551, + "learning_rate": 3.8549244472836845e-06, + "loss": 0.7625, + "step": 46740 + }, + { + "epoch": 1.8489588483062747, + "grad_norm": 1.4941439596833723, + "learning_rate": 3.852684728776948e-06, + "loss": 0.7868, + "step": 46750 + }, + { + "epoch": 1.8493543475251637, + "grad_norm": 1.2735700977569417, + "learning_rate": 3.85044525325193e-06, + "loss": 0.7802, + "step": 46760 + }, + { + "epoch": 1.8497498467440527, + "grad_norm": 1.6673522163069612, + "learning_rate": 3.848206021182913e-06, + "loss": 0.755, + "step": 46770 + }, + { + "epoch": 1.8501453459629418, + "grad_norm": 1.5837285639524838, + "learning_rate": 3.845967033044128e-06, + "loss": 0.7403, + "step": 46780 + }, + { + "epoch": 1.8505408451818308, + "grad_norm": 1.3251871856610935, + "learning_rate": 3.843728289309756e-06, + "loss": 0.7474, + "step": 46790 + }, + { + "epoch": 1.8509363444007199, + "grad_norm": 1.876480145570574, + "learning_rate": 3.8414897904539216e-06, + "loss": 0.7367, + "step": 46800 + }, + { + "epoch": 1.851331843619609, + "grad_norm": 1.4835877326385254, + "learning_rate": 3.8392515369507015e-06, + "loss": 0.7512, + "step": 46810 + }, + { + "epoch": 1.851727342838498, + "grad_norm": 1.6262707068358329, + "learning_rate": 3.8370135292741195e-06, + "loss": 0.7529, + "step": 46820 + }, + { + "epoch": 1.852122842057387, + "grad_norm": 1.4842564967867224, + "learning_rate": 3.834775767898148e-06, + "loss": 0.7428, + "step": 46830 + }, + { + "epoch": 1.852518341276276, + "grad_norm": 1.2023274939663307, + "learning_rate": 3.8325382532967045e-06, + "loss": 0.7732, + "step": 46840 + }, + { + "epoch": 1.852913840495165, + "grad_norm": 1.6824630189874736, + "learning_rate": 3.830300985943659e-06, + "loss": 0.7824, + "step": 46850 + }, + { + "epoch": 1.853309339714054, + "grad_norm": 1.3907747321127248, + "learning_rate": 3.828063966312827e-06, + "loss": 0.7694, + "step": 46860 + }, + { + "epoch": 1.8537048389329431, + "grad_norm": 1.3473232480616073, + "learning_rate": 3.825827194877967e-06, + "loss": 0.766, + "step": 46870 + }, + { + "epoch": 1.8541003381518322, + "grad_norm": 1.2997042914062478, + "learning_rate": 3.823590672112791e-06, + "loss": 0.7632, + "step": 46880 + }, + { + "epoch": 1.8544958373707212, + "grad_norm": 1.5027026063239883, + "learning_rate": 3.821354398490956e-06, + "loss": 0.7734, + "step": 46890 + }, + { + "epoch": 1.8548913365896103, + "grad_norm": 1.3219458775293302, + "learning_rate": 3.819118374486067e-06, + "loss": 0.7431, + "step": 46900 + }, + { + "epoch": 1.8552868358084993, + "grad_norm": 1.693861508471388, + "learning_rate": 3.816882600571675e-06, + "loss": 0.7784, + "step": 46910 + }, + { + "epoch": 1.8556823350273883, + "grad_norm": 1.4560999861874828, + "learning_rate": 3.814647077221281e-06, + "loss": 0.762, + "step": 46920 + }, + { + "epoch": 1.8560778342462774, + "grad_norm": 1.2969110838173135, + "learning_rate": 3.8124118049083257e-06, + "loss": 0.7684, + "step": 46930 + }, + { + "epoch": 1.8564733334651664, + "grad_norm": 1.3325445869781007, + "learning_rate": 3.810176784106205e-06, + "loss": 0.7598, + "step": 46940 + }, + { + "epoch": 1.8568688326840554, + "grad_norm": 1.441763564000196, + "learning_rate": 3.807942015288257e-06, + "loss": 0.7453, + "step": 46950 + }, + { + "epoch": 1.8572643319029445, + "grad_norm": 1.3562392166698485, + "learning_rate": 3.8057074989277676e-06, + "loss": 0.7985, + "step": 46960 + }, + { + "epoch": 1.8576598311218335, + "grad_norm": 1.3740741191197638, + "learning_rate": 3.8034732354979686e-06, + "loss": 0.7922, + "step": 46970 + }, + { + "epoch": 1.8580553303407226, + "grad_norm": 1.4661039861626741, + "learning_rate": 3.801239225472039e-06, + "loss": 0.7389, + "step": 46980 + }, + { + "epoch": 1.8584508295596116, + "grad_norm": 1.551889864743315, + "learning_rate": 3.7990054693231047e-06, + "loss": 0.7849, + "step": 46990 + }, + { + "epoch": 1.8588463287785006, + "grad_norm": 1.5807503772397271, + "learning_rate": 3.7967719675242366e-06, + "loss": 0.7573, + "step": 47000 + }, + { + "epoch": 1.8592418279973897, + "grad_norm": 1.569689675277944, + "learning_rate": 3.7945387205484514e-06, + "loss": 0.7649, + "step": 47010 + }, + { + "epoch": 1.8596373272162787, + "grad_norm": 1.654889404618668, + "learning_rate": 3.7923057288687125e-06, + "loss": 0.7642, + "step": 47020 + }, + { + "epoch": 1.8600328264351678, + "grad_norm": 1.5507912584625825, + "learning_rate": 3.7900729929579305e-06, + "loss": 0.7437, + "step": 47030 + }, + { + "epoch": 1.8604283256540568, + "grad_norm": 1.3285720838303194, + "learning_rate": 3.7878405132889618e-06, + "loss": 0.7667, + "step": 47040 + }, + { + "epoch": 1.8608238248729458, + "grad_norm": 1.5652573592522234, + "learning_rate": 3.7856082903346034e-06, + "loss": 0.7577, + "step": 47050 + }, + { + "epoch": 1.8612193240918349, + "grad_norm": 1.33809875754651, + "learning_rate": 3.7833763245676037e-06, + "loss": 0.7662, + "step": 47060 + }, + { + "epoch": 1.861614823310724, + "grad_norm": 1.5871729801949934, + "learning_rate": 3.7811446164606552e-06, + "loss": 0.7732, + "step": 47070 + }, + { + "epoch": 1.862010322529613, + "grad_norm": 1.242156028538668, + "learning_rate": 3.7789131664863956e-06, + "loss": 0.7835, + "step": 47080 + }, + { + "epoch": 1.862405821748502, + "grad_norm": 1.41531552062675, + "learning_rate": 3.776681975117408e-06, + "loss": 0.7642, + "step": 47090 + }, + { + "epoch": 1.862801320967391, + "grad_norm": 1.6408357012460044, + "learning_rate": 3.7744510428262193e-06, + "loss": 0.756, + "step": 47100 + }, + { + "epoch": 1.86319682018628, + "grad_norm": 1.3605624997799788, + "learning_rate": 3.7722203700853026e-06, + "loss": 0.7624, + "step": 47110 + }, + { + "epoch": 1.8635923194051691, + "grad_norm": 1.3389985496842347, + "learning_rate": 3.769989957367078e-06, + "loss": 0.753, + "step": 47120 + }, + { + "epoch": 1.8639878186240582, + "grad_norm": 1.2212417677663336, + "learning_rate": 3.767759805143907e-06, + "loss": 0.7501, + "step": 47130 + }, + { + "epoch": 1.8643833178429472, + "grad_norm": 1.767624420110444, + "learning_rate": 3.7655299138880986e-06, + "loss": 0.7823, + "step": 47140 + }, + { + "epoch": 1.8647788170618362, + "grad_norm": 1.61737452316488, + "learning_rate": 3.7633002840719044e-06, + "loss": 0.7735, + "step": 47150 + }, + { + "epoch": 1.8651743162807253, + "grad_norm": 1.4173678641166108, + "learning_rate": 3.7610709161675264e-06, + "loss": 0.7577, + "step": 47160 + }, + { + "epoch": 1.8655698154996143, + "grad_norm": 1.7781541040998885, + "learning_rate": 3.758841810647099e-06, + "loss": 0.7678, + "step": 47170 + }, + { + "epoch": 1.8659653147185034, + "grad_norm": 1.3133033382571648, + "learning_rate": 3.7566129679827135e-06, + "loss": 0.7571, + "step": 47180 + }, + { + "epoch": 1.8663608139373924, + "grad_norm": 1.8589502526553208, + "learning_rate": 3.7543843886463993e-06, + "loss": 0.7625, + "step": 47190 + }, + { + "epoch": 1.8667563131562814, + "grad_norm": 1.5112430406836406, + "learning_rate": 3.752156073110131e-06, + "loss": 0.7553, + "step": 47200 + }, + { + "epoch": 1.8671518123751705, + "grad_norm": 1.5033706968451628, + "learning_rate": 3.7499280218458282e-06, + "loss": 0.7532, + "step": 47210 + }, + { + "epoch": 1.8675473115940595, + "grad_norm": 1.4677868973736172, + "learning_rate": 3.7477002353253545e-06, + "loss": 0.7656, + "step": 47220 + }, + { + "epoch": 1.8679428108129486, + "grad_norm": 1.5521075463754603, + "learning_rate": 3.7454727140205154e-06, + "loss": 0.7451, + "step": 47230 + }, + { + "epoch": 1.8683383100318376, + "grad_norm": 1.450933463574836, + "learning_rate": 3.743245458403063e-06, + "loss": 0.7338, + "step": 47240 + }, + { + "epoch": 1.8687338092507266, + "grad_norm": 1.2575614759195324, + "learning_rate": 3.741018468944692e-06, + "loss": 0.7627, + "step": 47250 + }, + { + "epoch": 1.8691293084696157, + "grad_norm": 1.602739832028912, + "learning_rate": 3.7387917461170396e-06, + "loss": 0.7489, + "step": 47260 + }, + { + "epoch": 1.8695248076885047, + "grad_norm": 1.4371462164398505, + "learning_rate": 3.7365652903916892e-06, + "loss": 0.7492, + "step": 47270 + }, + { + "epoch": 1.8699203069073937, + "grad_norm": 1.466234772809852, + "learning_rate": 3.7343391022401653e-06, + "loss": 0.774, + "step": 47280 + }, + { + "epoch": 1.8703158061262828, + "grad_norm": 1.3825117242074576, + "learning_rate": 3.732113182133935e-06, + "loss": 0.7547, + "step": 47290 + }, + { + "epoch": 1.8707113053451718, + "grad_norm": 1.378978237589993, + "learning_rate": 3.729887530544411e-06, + "loss": 0.7558, + "step": 47300 + }, + { + "epoch": 1.8711068045640609, + "grad_norm": 1.3327701163729486, + "learning_rate": 3.7276621479429475e-06, + "loss": 0.7589, + "step": 47310 + }, + { + "epoch": 1.87150230378295, + "grad_norm": 1.5149515703076581, + "learning_rate": 3.725437034800844e-06, + "loss": 0.7439, + "step": 47320 + }, + { + "epoch": 1.871897803001839, + "grad_norm": 1.5615731571460778, + "learning_rate": 3.7232121915893414e-06, + "loss": 0.7734, + "step": 47330 + }, + { + "epoch": 1.872293302220728, + "grad_norm": 1.4618045624031561, + "learning_rate": 3.720987618779621e-06, + "loss": 0.7632, + "step": 47340 + }, + { + "epoch": 1.872688801439617, + "grad_norm": 1.5872922801959648, + "learning_rate": 3.718763316842811e-06, + "loss": 0.7646, + "step": 47350 + }, + { + "epoch": 1.873084300658506, + "grad_norm": 1.2933564937170532, + "learning_rate": 3.71653928624998e-06, + "loss": 0.7731, + "step": 47360 + }, + { + "epoch": 1.873479799877395, + "grad_norm": 1.334385568275373, + "learning_rate": 3.71431552747214e-06, + "loss": 0.7692, + "step": 47370 + }, + { + "epoch": 1.8738752990962841, + "grad_norm": 1.2515286619272854, + "learning_rate": 3.712092040980244e-06, + "loss": 0.7519, + "step": 47380 + }, + { + "epoch": 1.8742707983151732, + "grad_norm": 1.4741879738673807, + "learning_rate": 3.7098688272451893e-06, + "loss": 0.7568, + "step": 47390 + }, + { + "epoch": 1.8746662975340622, + "grad_norm": 1.4057151664095175, + "learning_rate": 3.707645886737814e-06, + "loss": 0.7701, + "step": 47400 + }, + { + "epoch": 1.8750617967529513, + "grad_norm": 1.3707203368565195, + "learning_rate": 3.705423219928902e-06, + "loss": 0.784, + "step": 47410 + }, + { + "epoch": 1.8754572959718403, + "grad_norm": 1.2574671352759665, + "learning_rate": 3.70320082728917e-06, + "loss": 0.7908, + "step": 47420 + }, + { + "epoch": 1.8758527951907293, + "grad_norm": 1.5146159927892748, + "learning_rate": 3.7009787092892863e-06, + "loss": 0.7476, + "step": 47430 + }, + { + "epoch": 1.8762482944096184, + "grad_norm": 1.6680939106558197, + "learning_rate": 3.698756866399857e-06, + "loss": 0.7459, + "step": 47440 + }, + { + "epoch": 1.8766437936285074, + "grad_norm": 1.434283660416888, + "learning_rate": 3.6965352990914295e-06, + "loss": 0.7574, + "step": 47450 + }, + { + "epoch": 1.8770392928473967, + "grad_norm": 1.3387187797510527, + "learning_rate": 3.694314007834495e-06, + "loss": 0.75, + "step": 47460 + }, + { + "epoch": 1.8774347920662857, + "grad_norm": 1.5593558973189203, + "learning_rate": 3.692092993099484e-06, + "loss": 0.7484, + "step": 47470 + }, + { + "epoch": 1.8778302912851748, + "grad_norm": 1.3467444091179797, + "learning_rate": 3.6898722553567706e-06, + "loss": 0.761, + "step": 47480 + }, + { + "epoch": 1.8782257905040638, + "grad_norm": 1.2569072830455288, + "learning_rate": 3.6876517950766675e-06, + "loss": 0.7695, + "step": 47490 + }, + { + "epoch": 1.8786212897229528, + "grad_norm": 1.1683779265078604, + "learning_rate": 3.685431612729431e-06, + "loss": 0.7866, + "step": 47500 + }, + { + "epoch": 1.8790167889418419, + "grad_norm": 1.534253366707865, + "learning_rate": 3.6832117087852587e-06, + "loss": 0.7629, + "step": 47510 + }, + { + "epoch": 1.879412288160731, + "grad_norm": 1.6231268002130992, + "learning_rate": 3.6809920837142853e-06, + "loss": 0.7495, + "step": 47520 + }, + { + "epoch": 1.87980778737962, + "grad_norm": 1.5771848540727516, + "learning_rate": 3.6787727379865934e-06, + "loss": 0.7445, + "step": 47530 + }, + { + "epoch": 1.880203286598509, + "grad_norm": 1.7399216172378273, + "learning_rate": 3.676553672072198e-06, + "loss": 0.7316, + "step": 47540 + }, + { + "epoch": 1.880598785817398, + "grad_norm": 1.676484051376917, + "learning_rate": 3.674334886441061e-06, + "loss": 0.7674, + "step": 47550 + }, + { + "epoch": 1.880994285036287, + "grad_norm": 1.311049970554493, + "learning_rate": 3.6721163815630855e-06, + "loss": 0.7757, + "step": 47560 + }, + { + "epoch": 1.8813897842551761, + "grad_norm": 1.4625221614169013, + "learning_rate": 3.6698981579081093e-06, + "loss": 0.7537, + "step": 47570 + }, + { + "epoch": 1.8817852834740652, + "grad_norm": 1.4499266913354638, + "learning_rate": 3.6676802159459155e-06, + "loss": 0.7529, + "step": 47580 + }, + { + "epoch": 1.8821807826929542, + "grad_norm": 1.522334245925788, + "learning_rate": 3.665462556146227e-06, + "loss": 0.7661, + "step": 47590 + }, + { + "epoch": 1.8825762819118432, + "grad_norm": 1.4144920461254835, + "learning_rate": 3.6632451789787056e-06, + "loss": 0.7611, + "step": 47600 + }, + { + "epoch": 1.8829717811307323, + "grad_norm": 1.4214050480361953, + "learning_rate": 3.6610280849129533e-06, + "loss": 0.7473, + "step": 47610 + }, + { + "epoch": 1.8833672803496213, + "grad_norm": 1.338640980951445, + "learning_rate": 3.6588112744185135e-06, + "loss": 0.7688, + "step": 47620 + }, + { + "epoch": 1.8837627795685103, + "grad_norm": 1.709520237250599, + "learning_rate": 3.656594747964868e-06, + "loss": 0.7356, + "step": 47630 + }, + { + "epoch": 1.8841582787873994, + "grad_norm": 1.311865762541882, + "learning_rate": 3.6543785060214387e-06, + "loss": 0.775, + "step": 47640 + }, + { + "epoch": 1.8845537780062884, + "grad_norm": 1.5860889959222024, + "learning_rate": 3.652162549057592e-06, + "loss": 0.7499, + "step": 47650 + }, + { + "epoch": 1.8849492772251775, + "grad_norm": 1.4018758600151087, + "learning_rate": 3.649946877542623e-06, + "loss": 0.7857, + "step": 47660 + }, + { + "epoch": 1.8853447764440665, + "grad_norm": 1.596874494146977, + "learning_rate": 3.647731491945775e-06, + "loss": 0.7574, + "step": 47670 + }, + { + "epoch": 1.8857402756629555, + "grad_norm": 1.265950007452047, + "learning_rate": 3.6455163927362315e-06, + "loss": 0.7764, + "step": 47680 + }, + { + "epoch": 1.8861357748818446, + "grad_norm": 1.3941556923120133, + "learning_rate": 3.6433015803831098e-06, + "loss": 0.7685, + "step": 47690 + }, + { + "epoch": 1.8865312741007336, + "grad_norm": 1.5619352056022604, + "learning_rate": 3.64108705535547e-06, + "loss": 0.7476, + "step": 47700 + }, + { + "epoch": 1.8869267733196227, + "grad_norm": 1.3659212859114231, + "learning_rate": 3.638872818122311e-06, + "loss": 0.7805, + "step": 47710 + }, + { + "epoch": 1.8873222725385117, + "grad_norm": 1.3161352415324021, + "learning_rate": 3.6366588691525706e-06, + "loss": 0.7765, + "step": 47720 + }, + { + "epoch": 1.8877177717574007, + "grad_norm": 1.522333657829966, + "learning_rate": 3.6344452089151238e-06, + "loss": 0.7485, + "step": 47730 + }, + { + "epoch": 1.8881132709762898, + "grad_norm": 1.5097266628952783, + "learning_rate": 3.6322318378787885e-06, + "loss": 0.7492, + "step": 47740 + }, + { + "epoch": 1.8885087701951788, + "grad_norm": 1.5333860312914256, + "learning_rate": 3.630018756512316e-06, + "loss": 0.7455, + "step": 47750 + }, + { + "epoch": 1.8889042694140679, + "grad_norm": 1.3129360080000902, + "learning_rate": 3.6278059652843995e-06, + "loss": 0.7635, + "step": 47760 + }, + { + "epoch": 1.889299768632957, + "grad_norm": 1.68370343450865, + "learning_rate": 3.6255934646636724e-06, + "loss": 0.7396, + "step": 47770 + }, + { + "epoch": 1.889695267851846, + "grad_norm": 1.4258204400074574, + "learning_rate": 3.623381255118702e-06, + "loss": 0.771, + "step": 47780 + }, + { + "epoch": 1.8900907670707352, + "grad_norm": 1.5769144004212399, + "learning_rate": 3.621169337117997e-06, + "loss": 0.7437, + "step": 47790 + }, + { + "epoch": 1.8904862662896242, + "grad_norm": 1.3636321368100723, + "learning_rate": 3.6189577111300043e-06, + "loss": 0.8037, + "step": 47800 + }, + { + "epoch": 1.8908817655085133, + "grad_norm": 1.6077616835747133, + "learning_rate": 3.6167463776231084e-06, + "loss": 0.7538, + "step": 47810 + }, + { + "epoch": 1.8912772647274023, + "grad_norm": 1.5906856982618143, + "learning_rate": 3.614535337065631e-06, + "loss": 0.7948, + "step": 47820 + }, + { + "epoch": 1.8916727639462914, + "grad_norm": 1.47615168405027, + "learning_rate": 3.612324589925833e-06, + "loss": 0.7553, + "step": 47830 + }, + { + "epoch": 1.8920682631651804, + "grad_norm": 1.6017589507133478, + "learning_rate": 3.6101141366719127e-06, + "loss": 0.7433, + "step": 47840 + }, + { + "epoch": 1.8924637623840694, + "grad_norm": 1.361011087813422, + "learning_rate": 3.607903977772007e-06, + "loss": 0.7581, + "step": 47850 + }, + { + "epoch": 1.8928592616029585, + "grad_norm": 1.3921189746976228, + "learning_rate": 3.605694113694189e-06, + "loss": 0.7779, + "step": 47860 + }, + { + "epoch": 1.8932547608218475, + "grad_norm": 1.769656467299996, + "learning_rate": 3.6034845449064702e-06, + "loss": 0.7508, + "step": 47870 + }, + { + "epoch": 1.8936502600407366, + "grad_norm": 1.4526432721423574, + "learning_rate": 3.6012752718767997e-06, + "loss": 0.7695, + "step": 47880 + }, + { + "epoch": 1.8940457592596256, + "grad_norm": 1.4970301621613045, + "learning_rate": 3.5990662950730627e-06, + "loss": 0.7691, + "step": 47890 + }, + { + "epoch": 1.8944412584785146, + "grad_norm": 1.5058105612611492, + "learning_rate": 3.596857614963086e-06, + "loss": 0.7726, + "step": 47900 + }, + { + "epoch": 1.8948367576974037, + "grad_norm": 1.424422232755828, + "learning_rate": 3.5946492320146254e-06, + "loss": 0.7605, + "step": 47910 + }, + { + "epoch": 1.8952322569162927, + "grad_norm": 1.5879870416430097, + "learning_rate": 3.5924411466953802e-06, + "loss": 0.7655, + "step": 47920 + }, + { + "epoch": 1.8956277561351818, + "grad_norm": 1.4769514544175584, + "learning_rate": 3.5902333594729865e-06, + "loss": 0.7578, + "step": 47930 + }, + { + "epoch": 1.8960232553540708, + "grad_norm": 1.6133341181371539, + "learning_rate": 3.588025870815014e-06, + "loss": 0.7689, + "step": 47940 + }, + { + "epoch": 1.8964187545729598, + "grad_norm": 1.3229647584098536, + "learning_rate": 3.585818681188972e-06, + "loss": 0.747, + "step": 47950 + }, + { + "epoch": 1.8968142537918489, + "grad_norm": 1.4620021502426253, + "learning_rate": 3.583611791062306e-06, + "loss": 0.7764, + "step": 47960 + }, + { + "epoch": 1.897209753010738, + "grad_norm": 1.4497171767610308, + "learning_rate": 3.581405200902396e-06, + "loss": 0.7777, + "step": 47970 + }, + { + "epoch": 1.897605252229627, + "grad_norm": 1.4626256602535805, + "learning_rate": 3.5791989111765623e-06, + "loss": 0.7365, + "step": 47980 + }, + { + "epoch": 1.898000751448516, + "grad_norm": 1.571066095295492, + "learning_rate": 3.576992922352057e-06, + "loss": 0.7651, + "step": 47990 + }, + { + "epoch": 1.898396250667405, + "grad_norm": 1.4671233826193422, + "learning_rate": 3.574787234896071e-06, + "loss": 0.7767, + "step": 48000 + }, + { + "epoch": 1.898791749886294, + "grad_norm": 1.246437399219056, + "learning_rate": 3.5725818492757313e-06, + "loss": 0.7522, + "step": 48010 + }, + { + "epoch": 1.899187249105183, + "grad_norm": 1.746162383396902, + "learning_rate": 3.5703767659581036e-06, + "loss": 0.7701, + "step": 48020 + }, + { + "epoch": 1.8995827483240721, + "grad_norm": 1.3201895423489198, + "learning_rate": 3.568171985410183e-06, + "loss": 0.7495, + "step": 48030 + }, + { + "epoch": 1.8999782475429612, + "grad_norm": 1.3726797329295566, + "learning_rate": 3.5659675080989048e-06, + "loss": 0.7704, + "step": 48040 + }, + { + "epoch": 1.9003737467618502, + "grad_norm": 1.5378643294240089, + "learning_rate": 3.5637633344911405e-06, + "loss": 0.7518, + "step": 48050 + }, + { + "epoch": 1.9007692459807393, + "grad_norm": 1.2225824926877078, + "learning_rate": 3.5615594650536957e-06, + "loss": 0.7743, + "step": 48060 + }, + { + "epoch": 1.9011647451996283, + "grad_norm": 1.6485901652446218, + "learning_rate": 3.5593559002533127e-06, + "loss": 0.7312, + "step": 48070 + }, + { + "epoch": 1.9015602444185173, + "grad_norm": 1.5411959745657817, + "learning_rate": 3.5571526405566685e-06, + "loss": 0.7784, + "step": 48080 + }, + { + "epoch": 1.9019557436374064, + "grad_norm": 1.294695125082055, + "learning_rate": 3.5549496864303762e-06, + "loss": 0.769, + "step": 48090 + }, + { + "epoch": 1.9023512428562954, + "grad_norm": 1.9705813153659901, + "learning_rate": 3.5527470383409833e-06, + "loss": 0.7509, + "step": 48100 + }, + { + "epoch": 1.9027467420751845, + "grad_norm": 1.160740302996935, + "learning_rate": 3.550544696754973e-06, + "loss": 0.7647, + "step": 48110 + }, + { + "epoch": 1.9031422412940735, + "grad_norm": 1.4084496739700558, + "learning_rate": 3.548342662138764e-06, + "loss": 0.7665, + "step": 48120 + }, + { + "epoch": 1.9035377405129625, + "grad_norm": 1.1630777810269677, + "learning_rate": 3.546140934958708e-06, + "loss": 0.7598, + "step": 48130 + }, + { + "epoch": 1.9039332397318516, + "grad_norm": 1.2672876152702206, + "learning_rate": 3.5439395156810974e-06, + "loss": 0.7537, + "step": 48140 + }, + { + "epoch": 1.9043287389507406, + "grad_norm": 1.534236507509776, + "learning_rate": 3.5417384047721496e-06, + "loss": 0.7706, + "step": 48150 + }, + { + "epoch": 1.9047242381696297, + "grad_norm": 1.466027993497561, + "learning_rate": 3.5395376026980246e-06, + "loss": 0.7588, + "step": 48160 + }, + { + "epoch": 1.9051197373885187, + "grad_norm": 1.5466614037393556, + "learning_rate": 3.5373371099248137e-06, + "loss": 0.7463, + "step": 48170 + }, + { + "epoch": 1.9055152366074077, + "grad_norm": 1.439383345988291, + "learning_rate": 3.5351369269185456e-06, + "loss": 0.7677, + "step": 48180 + }, + { + "epoch": 1.9059107358262968, + "grad_norm": 1.2989300898032212, + "learning_rate": 3.5329370541451785e-06, + "loss": 0.7929, + "step": 48190 + }, + { + "epoch": 1.9063062350451858, + "grad_norm": 1.5356930205726336, + "learning_rate": 3.53073749207061e-06, + "loss": 0.7331, + "step": 48200 + }, + { + "epoch": 1.9067017342640749, + "grad_norm": 1.260874660528549, + "learning_rate": 3.52853824116067e-06, + "loss": 0.7709, + "step": 48210 + }, + { + "epoch": 1.907097233482964, + "grad_norm": 1.5015594923176887, + "learning_rate": 3.5263393018811203e-06, + "loss": 0.7584, + "step": 48220 + }, + { + "epoch": 1.907492732701853, + "grad_norm": 1.6731768466238122, + "learning_rate": 3.5241406746976593e-06, + "loss": 0.7483, + "step": 48230 + }, + { + "epoch": 1.907888231920742, + "grad_norm": 1.58439474247732, + "learning_rate": 3.5219423600759183e-06, + "loss": 0.7702, + "step": 48240 + }, + { + "epoch": 1.908283731139631, + "grad_norm": 1.432798190434656, + "learning_rate": 3.519744358481464e-06, + "loss": 0.7583, + "step": 48250 + }, + { + "epoch": 1.90867923035852, + "grad_norm": 1.4980797527771759, + "learning_rate": 3.517546670379795e-06, + "loss": 0.7765, + "step": 48260 + }, + { + "epoch": 1.909074729577409, + "grad_norm": 1.6516813346331432, + "learning_rate": 3.5153492962363435e-06, + "loss": 0.7418, + "step": 48270 + }, + { + "epoch": 1.9094702287962981, + "grad_norm": 1.2163086985860219, + "learning_rate": 3.513152236516475e-06, + "loss": 0.754, + "step": 48280 + }, + { + "epoch": 1.9098657280151872, + "grad_norm": 1.378064511799827, + "learning_rate": 3.5109554916854893e-06, + "loss": 0.7214, + "step": 48290 + }, + { + "epoch": 1.9102612272340762, + "grad_norm": 1.2866435107623866, + "learning_rate": 3.5087590622086205e-06, + "loss": 0.7776, + "step": 48300 + }, + { + "epoch": 1.9106567264529652, + "grad_norm": 1.4979418450519701, + "learning_rate": 3.5065629485510338e-06, + "loss": 0.7628, + "step": 48310 + }, + { + "epoch": 1.9110522256718543, + "grad_norm": 1.4333142807341872, + "learning_rate": 3.504367151177829e-06, + "loss": 0.7511, + "step": 48320 + }, + { + "epoch": 1.9114477248907433, + "grad_norm": 1.4839016128316724, + "learning_rate": 3.5021716705540375e-06, + "loss": 0.7526, + "step": 48330 + }, + { + "epoch": 1.9118432241096324, + "grad_norm": 1.2815094503791602, + "learning_rate": 3.4999765071446258e-06, + "loss": 0.7624, + "step": 48340 + }, + { + "epoch": 1.9122387233285214, + "grad_norm": 1.1597589019802432, + "learning_rate": 3.497781661414491e-06, + "loss": 0.7963, + "step": 48350 + }, + { + "epoch": 1.9126342225474104, + "grad_norm": 1.2710576182177307, + "learning_rate": 3.4955871338284637e-06, + "loss": 0.7732, + "step": 48360 + }, + { + "epoch": 1.9130297217662995, + "grad_norm": 1.5194220002008914, + "learning_rate": 3.4933929248513075e-06, + "loss": 0.7568, + "step": 48370 + }, + { + "epoch": 1.9134252209851885, + "grad_norm": 1.648735011704732, + "learning_rate": 3.4911990349477187e-06, + "loss": 0.7574, + "step": 48380 + }, + { + "epoch": 1.9138207202040776, + "grad_norm": 1.8509268928634877, + "learning_rate": 3.4890054645823274e-06, + "loss": 0.7491, + "step": 48390 + }, + { + "epoch": 1.9142162194229666, + "grad_norm": 1.7741533543757104, + "learning_rate": 3.4868122142196897e-06, + "loss": 0.7512, + "step": 48400 + }, + { + "epoch": 1.9146117186418556, + "grad_norm": 1.5846453909260099, + "learning_rate": 3.484619284324301e-06, + "loss": 0.7453, + "step": 48410 + }, + { + "epoch": 1.9150072178607447, + "grad_norm": 1.385084597775232, + "learning_rate": 3.4824266753605864e-06, + "loss": 0.7714, + "step": 48420 + }, + { + "epoch": 1.9154027170796337, + "grad_norm": 1.5675267484745021, + "learning_rate": 3.4802343877929017e-06, + "loss": 0.7719, + "step": 48430 + }, + { + "epoch": 1.9157982162985228, + "grad_norm": 1.5766883116121044, + "learning_rate": 3.4780424220855375e-06, + "loss": 0.7489, + "step": 48440 + }, + { + "epoch": 1.9161937155174118, + "grad_norm": 1.4727666318080896, + "learning_rate": 3.4758507787027146e-06, + "loss": 0.7889, + "step": 48450 + }, + { + "epoch": 1.9165892147363008, + "grad_norm": 1.4609562670679748, + "learning_rate": 3.4736594581085837e-06, + "loss": 0.7569, + "step": 48460 + }, + { + "epoch": 1.9169847139551899, + "grad_norm": 1.6013206194665468, + "learning_rate": 3.47146846076723e-06, + "loss": 0.7751, + "step": 48470 + }, + { + "epoch": 1.917380213174079, + "grad_norm": 1.637435622210086, + "learning_rate": 3.4692777871426695e-06, + "loss": 0.7535, + "step": 48480 + }, + { + "epoch": 1.917775712392968, + "grad_norm": 1.340507590365922, + "learning_rate": 3.467087437698849e-06, + "loss": 0.7448, + "step": 48490 + }, + { + "epoch": 1.918171211611857, + "grad_norm": 1.3083632649820889, + "learning_rate": 3.4648974128996472e-06, + "loss": 0.7688, + "step": 48500 + }, + { + "epoch": 1.918566710830746, + "grad_norm": 1.3394916046289926, + "learning_rate": 3.4627077132088748e-06, + "loss": 0.781, + "step": 48510 + }, + { + "epoch": 1.918962210049635, + "grad_norm": 1.5131590329594262, + "learning_rate": 3.4605183390902703e-06, + "loss": 0.7182, + "step": 48520 + }, + { + "epoch": 1.9193577092685241, + "grad_norm": 1.4898860941093186, + "learning_rate": 3.458329291007507e-06, + "loss": 0.7577, + "step": 48530 + }, + { + "epoch": 1.9197532084874132, + "grad_norm": 1.7497731059162311, + "learning_rate": 3.4561405694241872e-06, + "loss": 0.7349, + "step": 48540 + }, + { + "epoch": 1.9201487077063022, + "grad_norm": 1.7178735818647393, + "learning_rate": 3.453952174803845e-06, + "loss": 0.7698, + "step": 48550 + }, + { + "epoch": 1.9205442069251912, + "grad_norm": 1.2341161006663444, + "learning_rate": 3.4517641076099455e-06, + "loss": 0.7392, + "step": 48560 + }, + { + "epoch": 1.9209397061440803, + "grad_norm": 1.2990106198790634, + "learning_rate": 3.4495763683058837e-06, + "loss": 0.7599, + "step": 48570 + }, + { + "epoch": 1.9213352053629693, + "grad_norm": 1.6598828214155192, + "learning_rate": 3.447388957354984e-06, + "loss": 0.7609, + "step": 48580 + }, + { + "epoch": 1.9217307045818584, + "grad_norm": 1.2076112304285591, + "learning_rate": 3.445201875220504e-06, + "loss": 0.7702, + "step": 48590 + }, + { + "epoch": 1.9221262038007474, + "grad_norm": 1.2795179070829605, + "learning_rate": 3.4430151223656293e-06, + "loss": 0.7486, + "step": 48600 + }, + { + "epoch": 1.9225217030196364, + "grad_norm": 1.4980234359919122, + "learning_rate": 3.4408286992534778e-06, + "loss": 0.7855, + "step": 48610 + }, + { + "epoch": 1.9229172022385255, + "grad_norm": 1.2031824654733618, + "learning_rate": 3.4386426063470952e-06, + "loss": 0.7468, + "step": 48620 + }, + { + "epoch": 1.9233127014574145, + "grad_norm": 1.3813304905431087, + "learning_rate": 3.4364568441094614e-06, + "loss": 0.8035, + "step": 48630 + }, + { + "epoch": 1.9237082006763035, + "grad_norm": 1.5370940755529536, + "learning_rate": 3.4342714130034794e-06, + "loss": 0.7831, + "step": 48640 + }, + { + "epoch": 1.9241036998951926, + "grad_norm": 1.6620931428241879, + "learning_rate": 3.4320863134919867e-06, + "loss": 0.7384, + "step": 48650 + }, + { + "epoch": 1.9244991991140816, + "grad_norm": 1.4524789950188688, + "learning_rate": 3.4299015460377517e-06, + "loss": 0.7419, + "step": 48660 + }, + { + "epoch": 1.9248946983329707, + "grad_norm": 1.4055892594981305, + "learning_rate": 3.4277171111034703e-06, + "loss": 0.7724, + "step": 48670 + }, + { + "epoch": 1.9252901975518597, + "grad_norm": 1.3104777161366816, + "learning_rate": 3.425533009151769e-06, + "loss": 0.7721, + "step": 48680 + }, + { + "epoch": 1.9256856967707487, + "grad_norm": 1.294980341625533, + "learning_rate": 3.423349240645201e-06, + "loss": 0.7515, + "step": 48690 + }, + { + "epoch": 1.9260811959896378, + "grad_norm": 1.3831266983043116, + "learning_rate": 3.421165806046253e-06, + "loss": 0.761, + "step": 48700 + }, + { + "epoch": 1.9264766952085268, + "grad_norm": 1.8914936315985789, + "learning_rate": 3.4189827058173373e-06, + "loss": 0.7625, + "step": 48710 + }, + { + "epoch": 1.9268721944274159, + "grad_norm": 1.4046195571591606, + "learning_rate": 3.416799940420799e-06, + "loss": 0.763, + "step": 48720 + }, + { + "epoch": 1.927267693646305, + "grad_norm": 1.5114377469229656, + "learning_rate": 3.4146175103189093e-06, + "loss": 0.7578, + "step": 48730 + }, + { + "epoch": 1.927663192865194, + "grad_norm": 1.505956086773398, + "learning_rate": 3.4124354159738706e-06, + "loss": 0.7728, + "step": 48740 + }, + { + "epoch": 1.928058692084083, + "grad_norm": 1.7177236509143539, + "learning_rate": 3.4102536578478128e-06, + "loss": 0.764, + "step": 48750 + }, + { + "epoch": 1.928454191302972, + "grad_norm": 1.4892797158043651, + "learning_rate": 3.408072236402794e-06, + "loss": 0.7516, + "step": 48760 + }, + { + "epoch": 1.928849690521861, + "grad_norm": 1.5692821668259482, + "learning_rate": 3.4058911521008015e-06, + "loss": 0.7585, + "step": 48770 + }, + { + "epoch": 1.92924518974075, + "grad_norm": 1.2803271335932815, + "learning_rate": 3.4037104054037527e-06, + "loss": 0.7461, + "step": 48780 + }, + { + "epoch": 1.9296406889596391, + "grad_norm": 1.5675715242674635, + "learning_rate": 3.4015299967734918e-06, + "loss": 0.7289, + "step": 48790 + }, + { + "epoch": 1.9300361881785284, + "grad_norm": 1.3890223069054999, + "learning_rate": 3.3993499266717923e-06, + "loss": 0.7697, + "step": 48800 + }, + { + "epoch": 1.9304316873974174, + "grad_norm": 1.5466298113770032, + "learning_rate": 3.3971701955603566e-06, + "loss": 0.737, + "step": 48810 + }, + { + "epoch": 1.9308271866163065, + "grad_norm": 1.3084112723217525, + "learning_rate": 3.3949908039008122e-06, + "loss": 0.7503, + "step": 48820 + }, + { + "epoch": 1.9312226858351955, + "grad_norm": 1.4734269768329404, + "learning_rate": 3.392811752154719e-06, + "loss": 0.7413, + "step": 48830 + }, + { + "epoch": 1.9316181850540846, + "grad_norm": 1.5581941009276234, + "learning_rate": 3.390633040783562e-06, + "loss": 0.7446, + "step": 48840 + }, + { + "epoch": 1.9320136842729736, + "grad_norm": 1.5897203197148722, + "learning_rate": 3.388454670248754e-06, + "loss": 0.7447, + "step": 48850 + }, + { + "epoch": 1.9324091834918626, + "grad_norm": 1.4761346466781393, + "learning_rate": 3.38627664101164e-06, + "loss": 0.7403, + "step": 48860 + }, + { + "epoch": 1.9328046827107517, + "grad_norm": 1.2618642542204772, + "learning_rate": 3.384098953533485e-06, + "loss": 0.7461, + "step": 48870 + }, + { + "epoch": 1.9332001819296407, + "grad_norm": 1.3521030552741606, + "learning_rate": 3.381921608275489e-06, + "loss": 0.7648, + "step": 48880 + }, + { + "epoch": 1.9335956811485298, + "grad_norm": 1.1312807510275773, + "learning_rate": 3.3797446056987737e-06, + "loss": 0.7666, + "step": 48890 + }, + { + "epoch": 1.9339911803674188, + "grad_norm": 1.4020658782001285, + "learning_rate": 3.377567946264393e-06, + "loss": 0.7352, + "step": 48900 + }, + { + "epoch": 1.9343866795863078, + "grad_norm": 1.5485316861848237, + "learning_rate": 3.3753916304333258e-06, + "loss": 0.7393, + "step": 48910 + }, + { + "epoch": 1.9347821788051969, + "grad_norm": 1.2181391824146819, + "learning_rate": 3.3732156586664777e-06, + "loss": 0.7644, + "step": 48920 + }, + { + "epoch": 1.935177678024086, + "grad_norm": 1.5293651300305036, + "learning_rate": 3.371040031424683e-06, + "loss": 0.7908, + "step": 48930 + }, + { + "epoch": 1.935573177242975, + "grad_norm": 1.5103491018102542, + "learning_rate": 3.3688647491687014e-06, + "loss": 0.7681, + "step": 48940 + }, + { + "epoch": 1.935968676461864, + "grad_norm": 1.543737479420635, + "learning_rate": 3.3666898123592214e-06, + "loss": 0.725, + "step": 48950 + }, + { + "epoch": 1.936364175680753, + "grad_norm": 1.578888859247185, + "learning_rate": 3.3645152214568567e-06, + "loss": 0.7679, + "step": 48960 + }, + { + "epoch": 1.936759674899642, + "grad_norm": 1.4035187295134819, + "learning_rate": 3.3623409769221482e-06, + "loss": 0.7762, + "step": 48970 + }, + { + "epoch": 1.937155174118531, + "grad_norm": 1.546934871807321, + "learning_rate": 3.360167079215565e-06, + "loss": 0.7611, + "step": 48980 + }, + { + "epoch": 1.9375506733374201, + "grad_norm": 1.3246807323843184, + "learning_rate": 3.3579935287975003e-06, + "loss": 0.7625, + "step": 48990 + }, + { + "epoch": 1.9379461725563092, + "grad_norm": 1.4447933562823514, + "learning_rate": 3.3558203261282767e-06, + "loss": 0.7315, + "step": 49000 + }, + { + "epoch": 1.9383416717751982, + "grad_norm": 1.4436902647042094, + "learning_rate": 3.353647471668138e-06, + "loss": 0.7608, + "step": 49010 + }, + { + "epoch": 1.9387371709940873, + "grad_norm": 1.4259839029396255, + "learning_rate": 3.351474965877258e-06, + "loss": 0.7819, + "step": 49020 + }, + { + "epoch": 1.9391326702129763, + "grad_norm": 1.5828484745009785, + "learning_rate": 3.3493028092157386e-06, + "loss": 0.7506, + "step": 49030 + }, + { + "epoch": 1.9395281694318653, + "grad_norm": 1.4226258962452412, + "learning_rate": 3.3471310021436044e-06, + "loss": 0.7425, + "step": 49040 + }, + { + "epoch": 1.9399236686507544, + "grad_norm": 1.4578316701814398, + "learning_rate": 3.3449595451208062e-06, + "loss": 0.7613, + "step": 49050 + }, + { + "epoch": 1.9403191678696434, + "grad_norm": 1.4540644265331437, + "learning_rate": 3.3427884386072216e-06, + "loss": 0.7696, + "step": 49060 + }, + { + "epoch": 1.9407146670885325, + "grad_norm": 1.3271743142653192, + "learning_rate": 3.3406176830626547e-06, + "loss": 0.772, + "step": 49070 + }, + { + "epoch": 1.9411101663074215, + "grad_norm": 1.3187071815920612, + "learning_rate": 3.3384472789468323e-06, + "loss": 0.7879, + "step": 49080 + }, + { + "epoch": 1.9415056655263105, + "grad_norm": 1.4905494881325165, + "learning_rate": 3.3362772267194117e-06, + "loss": 0.7665, + "step": 49090 + }, + { + "epoch": 1.9419011647451996, + "grad_norm": 1.4644771396011695, + "learning_rate": 3.3341075268399716e-06, + "loss": 0.7401, + "step": 49100 + }, + { + "epoch": 1.9422966639640886, + "grad_norm": 1.2426524751269876, + "learning_rate": 3.331938179768016e-06, + "loss": 0.7301, + "step": 49110 + }, + { + "epoch": 1.9426921631829777, + "grad_norm": 1.5486693015515494, + "learning_rate": 3.3297691859629776e-06, + "loss": 0.7563, + "step": 49120 + }, + { + "epoch": 1.943087662401867, + "grad_norm": 1.7099274403192768, + "learning_rate": 3.32760054588421e-06, + "loss": 0.7519, + "step": 49130 + }, + { + "epoch": 1.943483161620756, + "grad_norm": 1.9001952166461693, + "learning_rate": 3.3254322599909944e-06, + "loss": 0.7422, + "step": 49140 + }, + { + "epoch": 1.943878660839645, + "grad_norm": 1.7038576805585015, + "learning_rate": 3.323264328742538e-06, + "loss": 0.7348, + "step": 49150 + }, + { + "epoch": 1.944274160058534, + "grad_norm": 1.5195556853745318, + "learning_rate": 3.3210967525979705e-06, + "loss": 0.7271, + "step": 49160 + }, + { + "epoch": 1.944669659277423, + "grad_norm": 1.5813132310844498, + "learning_rate": 3.3189295320163465e-06, + "loss": 0.7533, + "step": 49170 + }, + { + "epoch": 1.9450651584963121, + "grad_norm": 1.4061286335668366, + "learning_rate": 3.3167626674566477e-06, + "loss": 0.7534, + "step": 49180 + }, + { + "epoch": 1.9454606577152012, + "grad_norm": 1.4926974593798348, + "learning_rate": 3.3145961593777785e-06, + "loss": 0.7554, + "step": 49190 + }, + { + "epoch": 1.9458561569340902, + "grad_norm": 1.5428129311889018, + "learning_rate": 3.312430008238568e-06, + "loss": 0.7489, + "step": 49200 + }, + { + "epoch": 1.9462516561529792, + "grad_norm": 1.267836886416892, + "learning_rate": 3.3102642144977702e-06, + "loss": 0.7596, + "step": 49210 + }, + { + "epoch": 1.9466471553718683, + "grad_norm": 1.3416948983027432, + "learning_rate": 3.308098778614062e-06, + "loss": 0.7685, + "step": 49220 + }, + { + "epoch": 1.9470426545907573, + "grad_norm": 2.079395250847278, + "learning_rate": 3.305933701046048e-06, + "loss": 0.7446, + "step": 49230 + }, + { + "epoch": 1.9474381538096464, + "grad_norm": 1.4652609744736982, + "learning_rate": 3.303768982252254e-06, + "loss": 0.7453, + "step": 49240 + }, + { + "epoch": 1.9478336530285354, + "grad_norm": 1.520013031890141, + "learning_rate": 3.3016046226911275e-06, + "loss": 0.7513, + "step": 49250 + }, + { + "epoch": 1.9482291522474244, + "grad_norm": 1.5059464962824864, + "learning_rate": 3.2994406228210446e-06, + "loss": 0.7572, + "step": 49260 + }, + { + "epoch": 1.9486246514663135, + "grad_norm": 1.5431473929414292, + "learning_rate": 3.2972769831003037e-06, + "loss": 0.7624, + "step": 49270 + }, + { + "epoch": 1.9490201506852025, + "grad_norm": 1.6683943154884664, + "learning_rate": 3.295113703987126e-06, + "loss": 0.7625, + "step": 49280 + }, + { + "epoch": 1.9494156499040916, + "grad_norm": 1.7187373024759323, + "learning_rate": 3.2929507859396583e-06, + "loss": 0.777, + "step": 49290 + }, + { + "epoch": 1.9498111491229806, + "grad_norm": 1.4338741058739413, + "learning_rate": 3.2907882294159676e-06, + "loss": 0.7622, + "step": 49300 + }, + { + "epoch": 1.9502066483418696, + "grad_norm": 1.2694241209479993, + "learning_rate": 3.2886260348740486e-06, + "loss": 0.7518, + "step": 49310 + }, + { + "epoch": 1.9506021475607587, + "grad_norm": 1.2918390528054295, + "learning_rate": 3.2864642027718145e-06, + "loss": 0.7618, + "step": 49320 + }, + { + "epoch": 1.9509976467796477, + "grad_norm": 1.8233829713077558, + "learning_rate": 3.2843027335671073e-06, + "loss": 0.7195, + "step": 49330 + }, + { + "epoch": 1.9513931459985367, + "grad_norm": 1.4349392796064928, + "learning_rate": 3.2821416277176866e-06, + "loss": 0.7812, + "step": 49340 + }, + { + "epoch": 1.9517886452174258, + "grad_norm": 1.5777762532946922, + "learning_rate": 3.279980885681238e-06, + "loss": 0.7555, + "step": 49350 + }, + { + "epoch": 1.9521841444363148, + "grad_norm": 1.2528261526068245, + "learning_rate": 3.277820507915371e-06, + "loss": 0.7498, + "step": 49360 + }, + { + "epoch": 1.9525796436552039, + "grad_norm": 1.4311290839123112, + "learning_rate": 3.2756604948776162e-06, + "loss": 0.7464, + "step": 49370 + }, + { + "epoch": 1.952975142874093, + "grad_norm": 1.4245912022898137, + "learning_rate": 3.2735008470254253e-06, + "loss": 0.7319, + "step": 49380 + }, + { + "epoch": 1.953370642092982, + "grad_norm": 1.3913165342796276, + "learning_rate": 3.2713415648161784e-06, + "loss": 0.76, + "step": 49390 + }, + { + "epoch": 1.953766141311871, + "grad_norm": 1.3409731134475538, + "learning_rate": 3.2691826487071706e-06, + "loss": 0.7574, + "step": 49400 + }, + { + "epoch": 1.95416164053076, + "grad_norm": 1.538553591011493, + "learning_rate": 3.2670240991556246e-06, + "loss": 0.7515, + "step": 49410 + }, + { + "epoch": 1.954557139749649, + "grad_norm": 1.2293005895073346, + "learning_rate": 3.264865916618686e-06, + "loss": 0.7366, + "step": 49420 + }, + { + "epoch": 1.954952638968538, + "grad_norm": 1.6330229907868956, + "learning_rate": 3.262708101553419e-06, + "loss": 0.7554, + "step": 49430 + }, + { + "epoch": 1.9553481381874271, + "grad_norm": 1.227599314546672, + "learning_rate": 3.260550654416812e-06, + "loss": 0.7638, + "step": 49440 + }, + { + "epoch": 1.9557436374063162, + "grad_norm": 1.782317767172154, + "learning_rate": 3.2583935756657765e-06, + "loss": 0.7275, + "step": 49450 + }, + { + "epoch": 1.9561391366252052, + "grad_norm": 1.517257695031236, + "learning_rate": 3.256236865757144e-06, + "loss": 0.7758, + "step": 49460 + }, + { + "epoch": 1.9565346358440943, + "grad_norm": 1.848947159628992, + "learning_rate": 3.2540805251476686e-06, + "loss": 0.7657, + "step": 49470 + }, + { + "epoch": 1.9569301350629833, + "grad_norm": 1.357699368333366, + "learning_rate": 3.251924554294027e-06, + "loss": 0.7397, + "step": 49480 + }, + { + "epoch": 1.9573256342818723, + "grad_norm": 1.488430604393747, + "learning_rate": 3.249768953652818e-06, + "loss": 0.7564, + "step": 49490 + }, + { + "epoch": 1.9577211335007614, + "grad_norm": 1.5031648287920552, + "learning_rate": 3.247613723680558e-06, + "loss": 0.7272, + "step": 49500 + }, + { + "epoch": 1.9581166327196504, + "grad_norm": 1.5734375701773522, + "learning_rate": 3.2454588648336883e-06, + "loss": 0.7555, + "step": 49510 + }, + { + "epoch": 1.9585121319385395, + "grad_norm": 1.5026824892908905, + "learning_rate": 3.2433043775685726e-06, + "loss": 0.7507, + "step": 49520 + }, + { + "epoch": 1.9589076311574285, + "grad_norm": 1.391410395956086, + "learning_rate": 3.2411502623414925e-06, + "loss": 0.7667, + "step": 49530 + }, + { + "epoch": 1.9593031303763175, + "grad_norm": 1.375393984038744, + "learning_rate": 3.238996519608655e-06, + "loss": 0.7321, + "step": 49540 + }, + { + "epoch": 1.9596986295952066, + "grad_norm": 1.4985941461219778, + "learning_rate": 3.2368431498261843e-06, + "loss": 0.7452, + "step": 49550 + }, + { + "epoch": 1.9600941288140956, + "grad_norm": 1.3261002167446854, + "learning_rate": 3.2346901534501284e-06, + "loss": 0.7438, + "step": 49560 + }, + { + "epoch": 1.9604896280329847, + "grad_norm": 1.3234637320520948, + "learning_rate": 3.232537530936455e-06, + "loss": 0.7297, + "step": 49570 + }, + { + "epoch": 1.9608851272518737, + "grad_norm": 1.6337549339603916, + "learning_rate": 3.2303852827410507e-06, + "loss": 0.7363, + "step": 49580 + }, + { + "epoch": 1.9612806264707627, + "grad_norm": 1.6417254549010538, + "learning_rate": 3.2282334093197264e-06, + "loss": 0.7578, + "step": 49590 + }, + { + "epoch": 1.9616761256896518, + "grad_norm": 1.4706272719879383, + "learning_rate": 3.2260819111282116e-06, + "loss": 0.7684, + "step": 49600 + }, + { + "epoch": 1.9620716249085408, + "grad_norm": 1.4205193802129494, + "learning_rate": 3.2239307886221584e-06, + "loss": 0.7593, + "step": 49610 + }, + { + "epoch": 1.9624671241274299, + "grad_norm": 1.2850868967229856, + "learning_rate": 3.2217800422571355e-06, + "loss": 0.7203, + "step": 49620 + }, + { + "epoch": 1.962862623346319, + "grad_norm": 1.629159667372725, + "learning_rate": 3.2196296724886344e-06, + "loss": 0.7482, + "step": 49630 + }, + { + "epoch": 1.963258122565208, + "grad_norm": 1.4239030714198087, + "learning_rate": 3.217479679772067e-06, + "loss": 0.7187, + "step": 49640 + }, + { + "epoch": 1.963653621784097, + "grad_norm": 1.344343091529516, + "learning_rate": 3.215330064562765e-06, + "loss": 0.7679, + "step": 49650 + }, + { + "epoch": 1.964049121002986, + "grad_norm": 1.5223572839653838, + "learning_rate": 3.2131808273159797e-06, + "loss": 0.74, + "step": 49660 + }, + { + "epoch": 1.964444620221875, + "grad_norm": 1.3005502780582872, + "learning_rate": 3.2110319684868828e-06, + "loss": 0.757, + "step": 49670 + }, + { + "epoch": 1.964840119440764, + "grad_norm": 1.5729654691099022, + "learning_rate": 3.2088834885305663e-06, + "loss": 0.7544, + "step": 49680 + }, + { + "epoch": 1.9652356186596531, + "grad_norm": 1.532141530435542, + "learning_rate": 3.2067353879020417e-06, + "loss": 0.7391, + "step": 49690 + }, + { + "epoch": 1.9656311178785422, + "grad_norm": 1.4893212963382882, + "learning_rate": 3.2045876670562392e-06, + "loss": 0.7412, + "step": 49700 + }, + { + "epoch": 1.9660266170974312, + "grad_norm": 1.3284123493225042, + "learning_rate": 3.2024403264480093e-06, + "loss": 0.756, + "step": 49710 + }, + { + "epoch": 1.9664221163163202, + "grad_norm": 1.6925962773624412, + "learning_rate": 3.200293366532122e-06, + "loss": 0.7148, + "step": 49720 + }, + { + "epoch": 1.9668176155352093, + "grad_norm": 1.2955586304860902, + "learning_rate": 3.198146787763269e-06, + "loss": 0.7627, + "step": 49730 + }, + { + "epoch": 1.9672131147540983, + "grad_norm": 1.2757727569359905, + "learning_rate": 3.1960005905960543e-06, + "loss": 0.7373, + "step": 49740 + }, + { + "epoch": 1.9676086139729874, + "grad_norm": 1.3369547190427764, + "learning_rate": 3.193854775485008e-06, + "loss": 0.7919, + "step": 49750 + }, + { + "epoch": 1.9680041131918764, + "grad_norm": 1.2374774626039289, + "learning_rate": 3.191709342884578e-06, + "loss": 0.739, + "step": 49760 + }, + { + "epoch": 1.9683996124107654, + "grad_norm": 1.3403133770543079, + "learning_rate": 3.189564293249128e-06, + "loss": 0.7395, + "step": 49770 + }, + { + "epoch": 1.9687951116296545, + "grad_norm": 1.4439156004199367, + "learning_rate": 3.187419627032945e-06, + "loss": 0.7428, + "step": 49780 + }, + { + "epoch": 1.9691906108485435, + "grad_norm": 1.2464371198076574, + "learning_rate": 3.1852753446902308e-06, + "loss": 0.7469, + "step": 49790 + }, + { + "epoch": 1.9695861100674326, + "grad_norm": 1.4960067420592544, + "learning_rate": 3.1831314466751094e-06, + "loss": 0.7456, + "step": 49800 + }, + { + "epoch": 1.9699816092863216, + "grad_norm": 1.5620929600686453, + "learning_rate": 3.18098793344162e-06, + "loss": 0.742, + "step": 49810 + }, + { + "epoch": 1.9703771085052106, + "grad_norm": 1.4174597496704604, + "learning_rate": 3.1788448054437226e-06, + "loss": 0.7818, + "step": 49820 + }, + { + "epoch": 1.9707726077240997, + "grad_norm": 1.6933624540242442, + "learning_rate": 3.1767020631352944e-06, + "loss": 0.7624, + "step": 49830 + }, + { + "epoch": 1.9711681069429887, + "grad_norm": 1.5719732507415092, + "learning_rate": 3.174559706970133e-06, + "loss": 0.7381, + "step": 49840 + }, + { + "epoch": 1.9715636061618778, + "grad_norm": 1.5205689202300559, + "learning_rate": 3.1724177374019516e-06, + "loss": 0.7483, + "step": 49850 + }, + { + "epoch": 1.9719591053807668, + "grad_norm": 1.3824346838352402, + "learning_rate": 3.1702761548843846e-06, + "loss": 0.7429, + "step": 49860 + }, + { + "epoch": 1.9723546045996558, + "grad_norm": 1.7177389822960694, + "learning_rate": 3.1681349598709786e-06, + "loss": 0.7534, + "step": 49870 + }, + { + "epoch": 1.9727501038185449, + "grad_norm": 1.5848933922888981, + "learning_rate": 3.165994152815205e-06, + "loss": 0.7475, + "step": 49880 + }, + { + "epoch": 1.973145603037434, + "grad_norm": 1.6432615433092956, + "learning_rate": 3.163853734170449e-06, + "loss": 0.7187, + "step": 49890 + }, + { + "epoch": 1.973541102256323, + "grad_norm": 1.3280025483371978, + "learning_rate": 3.161713704390015e-06, + "loss": 0.7468, + "step": 49900 + }, + { + "epoch": 1.973936601475212, + "grad_norm": 1.4042848197815534, + "learning_rate": 3.1595740639271244e-06, + "loss": 0.7524, + "step": 49910 + }, + { + "epoch": 1.974332100694101, + "grad_norm": 1.5327978854117172, + "learning_rate": 3.1574348132349166e-06, + "loss": 0.744, + "step": 49920 + }, + { + "epoch": 1.97472759991299, + "grad_norm": 1.369787456526148, + "learning_rate": 3.1552959527664486e-06, + "loss": 0.7637, + "step": 49930 + }, + { + "epoch": 1.9751230991318791, + "grad_norm": 1.4162618286042996, + "learning_rate": 3.153157482974694e-06, + "loss": 0.7845, + "step": 49940 + }, + { + "epoch": 1.9755185983507682, + "grad_norm": 1.5572962395347227, + "learning_rate": 3.151019404312543e-06, + "loss": 0.7593, + "step": 49950 + }, + { + "epoch": 1.9759140975696572, + "grad_norm": 1.6630340158767258, + "learning_rate": 3.148881717232806e-06, + "loss": 0.7228, + "step": 49960 + }, + { + "epoch": 1.9763095967885462, + "grad_norm": 1.436640404635433, + "learning_rate": 3.146744422188207e-06, + "loss": 0.7507, + "step": 49970 + }, + { + "epoch": 1.9767050960074353, + "grad_norm": 1.5900334690830433, + "learning_rate": 3.144607519631391e-06, + "loss": 0.7123, + "step": 49980 + }, + { + "epoch": 1.9771005952263243, + "grad_norm": 1.8065022828426256, + "learning_rate": 3.1424710100149138e-06, + "loss": 0.7266, + "step": 49990 + }, + { + "epoch": 1.9774960944452133, + "grad_norm": 1.5708774212769452, + "learning_rate": 3.140334893791253e-06, + "loss": 0.751, + "step": 50000 + }, + { + "epoch": 1.9778915936641024, + "grad_norm": 1.2041803179200292, + "learning_rate": 3.1381991714128014e-06, + "loss": 0.7645, + "step": 50010 + }, + { + "epoch": 1.9782870928829914, + "grad_norm": 1.5161607960909291, + "learning_rate": 3.136063843331869e-06, + "loss": 0.7669, + "step": 50020 + }, + { + "epoch": 1.9786825921018805, + "grad_norm": 1.1949110223981865, + "learning_rate": 3.133928910000681e-06, + "loss": 0.7298, + "step": 50030 + }, + { + "epoch": 1.9790780913207695, + "grad_norm": 1.57743696187895, + "learning_rate": 3.131794371871381e-06, + "loss": 0.7436, + "step": 50040 + }, + { + "epoch": 1.9794735905396585, + "grad_norm": 1.7472260740809376, + "learning_rate": 3.1296602293960255e-06, + "loss": 0.7442, + "step": 50050 + }, + { + "epoch": 1.9798690897585476, + "grad_norm": 1.331231052866077, + "learning_rate": 3.1275264830265906e-06, + "loss": 0.7123, + "step": 50060 + }, + { + "epoch": 1.9802645889774366, + "grad_norm": 1.422859931294295, + "learning_rate": 3.1253931332149674e-06, + "loss": 0.7658, + "step": 50070 + }, + { + "epoch": 1.9806600881963257, + "grad_norm": 1.7125474026339809, + "learning_rate": 3.1232601804129614e-06, + "loss": 0.7459, + "step": 50080 + }, + { + "epoch": 1.9810555874152147, + "grad_norm": 1.411372049610973, + "learning_rate": 3.121127625072298e-06, + "loss": 0.7196, + "step": 50090 + }, + { + "epoch": 1.9814510866341037, + "grad_norm": 1.5085204450222867, + "learning_rate": 3.1189954676446157e-06, + "loss": 0.7368, + "step": 50100 + }, + { + "epoch": 1.9818465858529928, + "grad_norm": 1.684665455960958, + "learning_rate": 3.1168637085814646e-06, + "loss": 0.7488, + "step": 50110 + }, + { + "epoch": 1.9822420850718818, + "grad_norm": 1.3989186619439802, + "learning_rate": 3.114732348334319e-06, + "loss": 0.7752, + "step": 50120 + }, + { + "epoch": 1.9826375842907709, + "grad_norm": 1.6538681001106232, + "learning_rate": 3.112601387354563e-06, + "loss": 0.7577, + "step": 50130 + }, + { + "epoch": 1.9830330835096601, + "grad_norm": 1.3966945065203595, + "learning_rate": 3.110470826093498e-06, + "loss": 0.7991, + "step": 50140 + }, + { + "epoch": 1.9834285827285492, + "grad_norm": 1.2149962113615989, + "learning_rate": 3.1083406650023395e-06, + "loss": 0.7443, + "step": 50150 + }, + { + "epoch": 1.9838240819474382, + "grad_norm": 1.4709324582136063, + "learning_rate": 3.106210904532221e-06, + "loss": 0.7693, + "step": 50160 + }, + { + "epoch": 1.9842195811663272, + "grad_norm": 1.2485442819705834, + "learning_rate": 3.1040815451341877e-06, + "loss": 0.759, + "step": 50170 + }, + { + "epoch": 1.9846150803852163, + "grad_norm": 1.3859062218403437, + "learning_rate": 3.1019525872592016e-06, + "loss": 0.7472, + "step": 50180 + }, + { + "epoch": 1.9850105796041053, + "grad_norm": 1.8170528006030893, + "learning_rate": 3.0998240313581395e-06, + "loss": 0.7155, + "step": 50190 + }, + { + "epoch": 1.9854060788229944, + "grad_norm": 1.4772368201282566, + "learning_rate": 3.0976958778817945e-06, + "loss": 0.7479, + "step": 50200 + }, + { + "epoch": 1.9858015780418834, + "grad_norm": 1.2891779094272988, + "learning_rate": 3.095568127280871e-06, + "loss": 0.7401, + "step": 50210 + }, + { + "epoch": 1.9861970772607724, + "grad_norm": 1.228706551163606, + "learning_rate": 3.0934407800059936e-06, + "loss": 0.7746, + "step": 50220 + }, + { + "epoch": 1.9865925764796615, + "grad_norm": 1.4719983593967, + "learning_rate": 3.0913138365076935e-06, + "loss": 0.751, + "step": 50230 + }, + { + "epoch": 1.9869880756985505, + "grad_norm": 1.431622349470501, + "learning_rate": 3.089187297236422e-06, + "loss": 0.7414, + "step": 50240 + }, + { + "epoch": 1.9873835749174396, + "grad_norm": 1.2807268492912474, + "learning_rate": 3.0870611626425456e-06, + "loss": 0.7373, + "step": 50250 + }, + { + "epoch": 1.9877790741363286, + "grad_norm": 1.4242911829633138, + "learning_rate": 3.0849354331763417e-06, + "loss": 0.739, + "step": 50260 + }, + { + "epoch": 1.9881745733552176, + "grad_norm": 1.5430532314663867, + "learning_rate": 3.082810109288005e-06, + "loss": 0.7773, + "step": 50270 + }, + { + "epoch": 1.9885700725741067, + "grad_norm": 1.8474100325704523, + "learning_rate": 3.0806851914276404e-06, + "loss": 0.7476, + "step": 50280 + }, + { + "epoch": 1.9889655717929957, + "grad_norm": 1.4555674312606346, + "learning_rate": 3.0785606800452694e-06, + "loss": 0.7346, + "step": 50290 + }, + { + "epoch": 1.9893610710118848, + "grad_norm": 1.1870897621333016, + "learning_rate": 3.076436575590829e-06, + "loss": 0.7282, + "step": 50300 + }, + { + "epoch": 1.9897565702307738, + "grad_norm": 1.6382153059937383, + "learning_rate": 3.0743128785141662e-06, + "loss": 0.7614, + "step": 50310 + }, + { + "epoch": 1.9901520694496628, + "grad_norm": 1.3498307697155651, + "learning_rate": 3.0721895892650453e-06, + "loss": 0.7571, + "step": 50320 + }, + { + "epoch": 1.9905475686685519, + "grad_norm": 1.3613800681174704, + "learning_rate": 3.070066708293141e-06, + "loss": 0.7313, + "step": 50330 + }, + { + "epoch": 1.990943067887441, + "grad_norm": 1.3769070892965563, + "learning_rate": 3.0679442360480444e-06, + "loss": 0.7471, + "step": 50340 + }, + { + "epoch": 1.99133856710633, + "grad_norm": 1.1861380029663058, + "learning_rate": 3.06582217297926e-06, + "loss": 0.765, + "step": 50350 + }, + { + "epoch": 1.991734066325219, + "grad_norm": 1.3224926966755899, + "learning_rate": 3.0637005195362014e-06, + "loss": 0.7535, + "step": 50360 + }, + { + "epoch": 1.992129565544108, + "grad_norm": 1.4988078410324497, + "learning_rate": 3.0615792761681986e-06, + "loss": 0.7186, + "step": 50370 + }, + { + "epoch": 1.992525064762997, + "grad_norm": 1.3629812450419356, + "learning_rate": 3.059458443324497e-06, + "loss": 0.751, + "step": 50380 + }, + { + "epoch": 1.992920563981886, + "grad_norm": 1.3919346966392678, + "learning_rate": 3.0573380214542503e-06, + "loss": 0.7392, + "step": 50390 + }, + { + "epoch": 1.9933160632007751, + "grad_norm": 1.7689707022600436, + "learning_rate": 3.0552180110065287e-06, + "loss": 0.7452, + "step": 50400 + }, + { + "epoch": 1.9937115624196642, + "grad_norm": 1.508851741090035, + "learning_rate": 3.053098412430314e-06, + "loss": 0.7188, + "step": 50410 + }, + { + "epoch": 1.9941070616385532, + "grad_norm": 1.5179117928795338, + "learning_rate": 3.050979226174501e-06, + "loss": 0.7463, + "step": 50420 + }, + { + "epoch": 1.9945025608574423, + "grad_norm": 1.511734302566831, + "learning_rate": 3.0488604526878973e-06, + "loss": 0.7679, + "step": 50430 + }, + { + "epoch": 1.9948980600763313, + "grad_norm": 1.3499716359148253, + "learning_rate": 3.0467420924192222e-06, + "loss": 0.7718, + "step": 50440 + }, + { + "epoch": 1.9952935592952203, + "grad_norm": 1.4635847049541149, + "learning_rate": 3.044624145817109e-06, + "loss": 0.7809, + "step": 50450 + }, + { + "epoch": 1.9956890585141094, + "grad_norm": 1.6636553439630535, + "learning_rate": 3.0425066133301013e-06, + "loss": 0.7531, + "step": 50460 + }, + { + "epoch": 1.9960845577329986, + "grad_norm": 1.6397023814258374, + "learning_rate": 3.040389495406657e-06, + "loss": 0.7359, + "step": 50470 + }, + { + "epoch": 1.9964800569518877, + "grad_norm": 1.5760470468518895, + "learning_rate": 3.038272792495145e-06, + "loss": 0.7533, + "step": 50480 + }, + { + "epoch": 1.9968755561707767, + "grad_norm": 1.413545117194915, + "learning_rate": 3.036156505043847e-06, + "loss": 0.7352, + "step": 50490 + }, + { + "epoch": 1.9972710553896658, + "grad_norm": 1.482806844125226, + "learning_rate": 3.034040633500955e-06, + "loss": 0.7759, + "step": 50500 + }, + { + "epoch": 1.9976665546085548, + "grad_norm": 1.2633338655765045, + "learning_rate": 3.031925178314578e-06, + "loss": 0.7508, + "step": 50510 + }, + { + "epoch": 1.9980620538274438, + "grad_norm": 1.6399789811528698, + "learning_rate": 3.0298101399327296e-06, + "loss": 0.7427, + "step": 50520 + }, + { + "epoch": 1.9984575530463329, + "grad_norm": 2.029912877311006, + "learning_rate": 3.0276955188033395e-06, + "loss": 0.7421, + "step": 50530 + }, + { + "epoch": 1.998853052265222, + "grad_norm": 1.2634810149622835, + "learning_rate": 3.0255813153742488e-06, + "loss": 0.7576, + "step": 50540 + }, + { + "epoch": 1.999248551484111, + "grad_norm": 1.385864567449702, + "learning_rate": 3.0234675300932093e-06, + "loss": 0.7567, + "step": 50550 + }, + { + "epoch": 1.999644050703, + "grad_norm": 1.5902270149194355, + "learning_rate": 3.0213541634078847e-06, + "loss": 0.6993, + "step": 50560 + }, + { + "epoch": 2.000039549921889, + "grad_norm": 1.2503498514691747, + "learning_rate": 3.019241215765849e-06, + "loss": 0.7206, + "step": 50570 + }, + { + "epoch": 2.000435049140778, + "grad_norm": 1.5242846812726885, + "learning_rate": 3.01712868761459e-06, + "loss": 0.6639, + "step": 50580 + }, + { + "epoch": 2.000830548359667, + "grad_norm": 1.2428219186475273, + "learning_rate": 3.0150165794015055e-06, + "loss": 0.7109, + "step": 50590 + }, + { + "epoch": 2.001226047578556, + "grad_norm": 1.3345865451016519, + "learning_rate": 3.0129048915739013e-06, + "loss": 0.7023, + "step": 50600 + }, + { + "epoch": 2.001621546797445, + "grad_norm": 1.283190225337282, + "learning_rate": 3.010793624578997e-06, + "loss": 0.688, + "step": 50610 + }, + { + "epoch": 2.0020170460163342, + "grad_norm": 1.3809873563864985, + "learning_rate": 3.0086827788639233e-06, + "loss": 0.6404, + "step": 50620 + }, + { + "epoch": 2.0024125452352233, + "grad_norm": 1.5080959929557487, + "learning_rate": 3.006572354875722e-06, + "loss": 0.663, + "step": 50630 + }, + { + "epoch": 2.0028080444541123, + "grad_norm": 1.407609094848851, + "learning_rate": 3.0044623530613437e-06, + "loss": 0.6746, + "step": 50640 + }, + { + "epoch": 2.0032035436730014, + "grad_norm": 1.849854331500034, + "learning_rate": 3.0023527738676518e-06, + "loss": 0.6774, + "step": 50650 + }, + { + "epoch": 2.0035990428918904, + "grad_norm": 1.6437960896583887, + "learning_rate": 3.0002436177414175e-06, + "loss": 0.6708, + "step": 50660 + }, + { + "epoch": 2.0039945421107794, + "grad_norm": 1.3265830976424084, + "learning_rate": 2.9981348851293254e-06, + "loss": 0.7002, + "step": 50670 + }, + { + "epoch": 2.0043900413296685, + "grad_norm": 1.6576160939199058, + "learning_rate": 2.996026576477967e-06, + "loss": 0.6894, + "step": 50680 + }, + { + "epoch": 2.0047855405485575, + "grad_norm": 1.3296136604093665, + "learning_rate": 2.99391869223385e-06, + "loss": 0.6512, + "step": 50690 + }, + { + "epoch": 2.0051810397674465, + "grad_norm": 1.3516789424581421, + "learning_rate": 2.9918112328433835e-06, + "loss": 0.7059, + "step": 50700 + }, + { + "epoch": 2.0055765389863356, + "grad_norm": 1.59714220712334, + "learning_rate": 2.9897041987528943e-06, + "loss": 0.6825, + "step": 50710 + }, + { + "epoch": 2.0059720382052246, + "grad_norm": 1.3536228274613509, + "learning_rate": 2.987597590408614e-06, + "loss": 0.7068, + "step": 50720 + }, + { + "epoch": 2.0063675374241137, + "grad_norm": 1.4035241484722534, + "learning_rate": 2.9854914082566876e-06, + "loss": 0.678, + "step": 50730 + }, + { + "epoch": 2.0067630366430027, + "grad_norm": 1.3902323666999807, + "learning_rate": 2.9833856527431686e-06, + "loss": 0.6882, + "step": 50740 + }, + { + "epoch": 2.0071585358618917, + "grad_norm": 1.379686579583006, + "learning_rate": 2.9812803243140188e-06, + "loss": 0.7059, + "step": 50750 + }, + { + "epoch": 2.007554035080781, + "grad_norm": 1.3427615297738003, + "learning_rate": 2.9791754234151106e-06, + "loss": 0.6908, + "step": 50760 + }, + { + "epoch": 2.00794953429967, + "grad_norm": 1.3745683001797573, + "learning_rate": 2.9770709504922266e-06, + "loss": 0.6483, + "step": 50770 + }, + { + "epoch": 2.008345033518559, + "grad_norm": 1.5354490076683796, + "learning_rate": 2.9749669059910586e-06, + "loss": 0.6949, + "step": 50780 + }, + { + "epoch": 2.008740532737448, + "grad_norm": 1.3935711216163609, + "learning_rate": 2.9728632903572065e-06, + "loss": 0.6699, + "step": 50790 + }, + { + "epoch": 2.009136031956337, + "grad_norm": 1.6777422963852027, + "learning_rate": 2.97076010403618e-06, + "loss": 0.7059, + "step": 50800 + }, + { + "epoch": 2.009531531175226, + "grad_norm": 1.2663557314927076, + "learning_rate": 2.9686573474733983e-06, + "loss": 0.6945, + "step": 50810 + }, + { + "epoch": 2.009927030394115, + "grad_norm": 1.4118856584692836, + "learning_rate": 2.9665550211141884e-06, + "loss": 0.668, + "step": 50820 + }, + { + "epoch": 2.010322529613004, + "grad_norm": 1.3243353188551914, + "learning_rate": 2.964453125403789e-06, + "loss": 0.6685, + "step": 50830 + }, + { + "epoch": 2.010718028831893, + "grad_norm": 1.2969200377065218, + "learning_rate": 2.962351660787345e-06, + "loss": 0.6806, + "step": 50840 + }, + { + "epoch": 2.011113528050782, + "grad_norm": 1.6311053422237278, + "learning_rate": 2.9602506277099085e-06, + "loss": 0.6719, + "step": 50850 + }, + { + "epoch": 2.011509027269671, + "grad_norm": 1.3307741847343124, + "learning_rate": 2.958150026616443e-06, + "loss": 0.6599, + "step": 50860 + }, + { + "epoch": 2.01190452648856, + "grad_norm": 1.4835150032198157, + "learning_rate": 2.9560498579518216e-06, + "loss": 0.6673, + "step": 50870 + }, + { + "epoch": 2.0123000257074493, + "grad_norm": 1.385214996983982, + "learning_rate": 2.9539501221608225e-06, + "loss": 0.6456, + "step": 50880 + }, + { + "epoch": 2.0126955249263383, + "grad_norm": 1.2772499801401835, + "learning_rate": 2.951850819688134e-06, + "loss": 0.6645, + "step": 50890 + }, + { + "epoch": 2.0130910241452273, + "grad_norm": 1.2386461563456654, + "learning_rate": 2.9497519509783524e-06, + "loss": 0.7074, + "step": 50900 + }, + { + "epoch": 2.0134865233641164, + "grad_norm": 1.5315591541942783, + "learning_rate": 2.9476535164759827e-06, + "loss": 0.6873, + "step": 50910 + }, + { + "epoch": 2.0138820225830054, + "grad_norm": 1.3425740280079377, + "learning_rate": 2.945555516625438e-06, + "loss": 0.679, + "step": 50920 + }, + { + "epoch": 2.0142775218018945, + "grad_norm": 1.3319633447164219, + "learning_rate": 2.943457951871037e-06, + "loss": 0.6709, + "step": 50930 + }, + { + "epoch": 2.0146730210207835, + "grad_norm": 1.3179112283657646, + "learning_rate": 2.941360822657008e-06, + "loss": 0.6874, + "step": 50940 + }, + { + "epoch": 2.0150685202396725, + "grad_norm": 1.3017835976189571, + "learning_rate": 2.939264129427489e-06, + "loss": 0.6543, + "step": 50950 + }, + { + "epoch": 2.0154640194585616, + "grad_norm": 1.3499179718961225, + "learning_rate": 2.9371678726265223e-06, + "loss": 0.6689, + "step": 50960 + }, + { + "epoch": 2.0158595186774506, + "grad_norm": 1.6671297883313765, + "learning_rate": 2.9350720526980592e-06, + "loss": 0.679, + "step": 50970 + }, + { + "epoch": 2.0162550178963397, + "grad_norm": 1.4018498982984247, + "learning_rate": 2.9329766700859586e-06, + "loss": 0.6512, + "step": 50980 + }, + { + "epoch": 2.0166505171152287, + "grad_norm": 1.4676454889004524, + "learning_rate": 2.930881725233986e-06, + "loss": 0.6605, + "step": 50990 + }, + { + "epoch": 2.0170460163341177, + "grad_norm": 1.3274383695240082, + "learning_rate": 2.928787218585816e-06, + "loss": 0.6733, + "step": 51000 + }, + { + "epoch": 2.0174415155530068, + "grad_norm": 1.5160342524855654, + "learning_rate": 2.926693150585028e-06, + "loss": 0.6805, + "step": 51010 + }, + { + "epoch": 2.017837014771896, + "grad_norm": 1.448429933318291, + "learning_rate": 2.9245995216751113e-06, + "loss": 0.6798, + "step": 51020 + }, + { + "epoch": 2.018232513990785, + "grad_norm": 1.4160325991828986, + "learning_rate": 2.922506332299459e-06, + "loss": 0.6567, + "step": 51030 + }, + { + "epoch": 2.018628013209674, + "grad_norm": 1.6586108652650249, + "learning_rate": 2.9204135829013735e-06, + "loss": 0.6531, + "step": 51040 + }, + { + "epoch": 2.019023512428563, + "grad_norm": 1.260584164412188, + "learning_rate": 2.9183212739240647e-06, + "loss": 0.7015, + "step": 51050 + }, + { + "epoch": 2.019419011647452, + "grad_norm": 1.331985372217651, + "learning_rate": 2.9162294058106444e-06, + "loss": 0.6808, + "step": 51060 + }, + { + "epoch": 2.019814510866341, + "grad_norm": 1.497903133786886, + "learning_rate": 2.914137979004138e-06, + "loss": 0.6536, + "step": 51070 + }, + { + "epoch": 2.02021001008523, + "grad_norm": 1.6507433804618468, + "learning_rate": 2.9120469939474728e-06, + "loss": 0.6804, + "step": 51080 + }, + { + "epoch": 2.020605509304119, + "grad_norm": 1.2715119936228625, + "learning_rate": 2.90995645108348e-06, + "loss": 0.6826, + "step": 51090 + }, + { + "epoch": 2.021001008523008, + "grad_norm": 1.4486293293535164, + "learning_rate": 2.9078663508549064e-06, + "loss": 0.6924, + "step": 51100 + }, + { + "epoch": 2.021396507741897, + "grad_norm": 1.2879757972428043, + "learning_rate": 2.9057766937043975e-06, + "loss": 0.6922, + "step": 51110 + }, + { + "epoch": 2.021792006960786, + "grad_norm": 1.5685520761087486, + "learning_rate": 2.9036874800745035e-06, + "loss": 0.6794, + "step": 51120 + }, + { + "epoch": 2.0221875061796752, + "grad_norm": 1.3158964222984417, + "learning_rate": 2.9015987104076893e-06, + "loss": 0.6895, + "step": 51130 + }, + { + "epoch": 2.0225830053985643, + "grad_norm": 1.3960220197219224, + "learning_rate": 2.899510385146316e-06, + "loss": 0.6938, + "step": 51140 + }, + { + "epoch": 2.0229785046174533, + "grad_norm": 1.2181485021077414, + "learning_rate": 2.897422504732659e-06, + "loss": 0.69, + "step": 51150 + }, + { + "epoch": 2.0233740038363424, + "grad_norm": 1.2331212391951465, + "learning_rate": 2.895335069608891e-06, + "loss": 0.6804, + "step": 51160 + }, + { + "epoch": 2.0237695030552314, + "grad_norm": 1.3131492109852732, + "learning_rate": 2.8932480802171005e-06, + "loss": 0.686, + "step": 51170 + }, + { + "epoch": 2.0241650022741204, + "grad_norm": 1.439822595920632, + "learning_rate": 2.891161536999271e-06, + "loss": 0.6678, + "step": 51180 + }, + { + "epoch": 2.0245605014930095, + "grad_norm": 1.2464753737473369, + "learning_rate": 2.8890754403973015e-06, + "loss": 0.7049, + "step": 51190 + }, + { + "epoch": 2.0249560007118985, + "grad_norm": 1.2402150879044864, + "learning_rate": 2.8869897908529885e-06, + "loss": 0.6532, + "step": 51200 + }, + { + "epoch": 2.0253514999307876, + "grad_norm": 1.5991207384443442, + "learning_rate": 2.8849045888080375e-06, + "loss": 0.6795, + "step": 51210 + }, + { + "epoch": 2.0257469991496766, + "grad_norm": 1.5347594990005007, + "learning_rate": 2.8828198347040567e-06, + "loss": 0.6552, + "step": 51220 + }, + { + "epoch": 2.0261424983685656, + "grad_norm": 1.608453122797535, + "learning_rate": 2.8807355289825645e-06, + "loss": 0.7042, + "step": 51230 + }, + { + "epoch": 2.0265379975874547, + "grad_norm": 1.8781780266172596, + "learning_rate": 2.878651672084978e-06, + "loss": 0.6463, + "step": 51240 + }, + { + "epoch": 2.0269334968063437, + "grad_norm": 1.6421720318587067, + "learning_rate": 2.876568264452625e-06, + "loss": 0.6593, + "step": 51250 + }, + { + "epoch": 2.0273289960252328, + "grad_norm": 1.5979964200325398, + "learning_rate": 2.874485306526733e-06, + "loss": 0.6739, + "step": 51260 + }, + { + "epoch": 2.027724495244122, + "grad_norm": 1.4108229885269192, + "learning_rate": 2.8724027987484417e-06, + "loss": 0.6759, + "step": 51270 + }, + { + "epoch": 2.028119994463011, + "grad_norm": 1.6001752683500097, + "learning_rate": 2.870320741558785e-06, + "loss": 0.6651, + "step": 51280 + }, + { + "epoch": 2.0285154936819, + "grad_norm": 1.4589700924840185, + "learning_rate": 2.8682391353987087e-06, + "loss": 0.7056, + "step": 51290 + }, + { + "epoch": 2.028910992900789, + "grad_norm": 1.79256884361263, + "learning_rate": 2.8661579807090634e-06, + "loss": 0.6485, + "step": 51300 + }, + { + "epoch": 2.029306492119678, + "grad_norm": 1.5533611885916156, + "learning_rate": 2.8640772779305985e-06, + "loss": 0.6788, + "step": 51310 + }, + { + "epoch": 2.029701991338567, + "grad_norm": 1.2682116196971873, + "learning_rate": 2.8619970275039755e-06, + "loss": 0.6764, + "step": 51320 + }, + { + "epoch": 2.030097490557456, + "grad_norm": 1.409051980441851, + "learning_rate": 2.8599172298697526e-06, + "loss": 0.67, + "step": 51330 + }, + { + "epoch": 2.030492989776345, + "grad_norm": 1.599522980345227, + "learning_rate": 2.857837885468396e-06, + "loss": 0.6834, + "step": 51340 + }, + { + "epoch": 2.030888488995234, + "grad_norm": 1.4713719749726173, + "learning_rate": 2.855758994740274e-06, + "loss": 0.7017, + "step": 51350 + }, + { + "epoch": 2.031283988214123, + "grad_norm": 1.6546618583822588, + "learning_rate": 2.853680558125663e-06, + "loss": 0.6281, + "step": 51360 + }, + { + "epoch": 2.031679487433012, + "grad_norm": 1.6168192095449252, + "learning_rate": 2.851602576064737e-06, + "loss": 0.6866, + "step": 51370 + }, + { + "epoch": 2.0320749866519012, + "grad_norm": 1.41902590109875, + "learning_rate": 2.8495250489975806e-06, + "loss": 0.7171, + "step": 51380 + }, + { + "epoch": 2.0324704858707903, + "grad_norm": 1.511544137845957, + "learning_rate": 2.8474479773641737e-06, + "loss": 0.684, + "step": 51390 + }, + { + "epoch": 2.0328659850896793, + "grad_norm": 1.4672154781081783, + "learning_rate": 2.8453713616044106e-06, + "loss": 0.7136, + "step": 51400 + }, + { + "epoch": 2.0332614843085683, + "grad_norm": 1.3807271315009328, + "learning_rate": 2.843295202158077e-06, + "loss": 0.6548, + "step": 51410 + }, + { + "epoch": 2.0336569835274574, + "grad_norm": 1.7527289993783621, + "learning_rate": 2.841219499464872e-06, + "loss": 0.6813, + "step": 51420 + }, + { + "epoch": 2.0340524827463464, + "grad_norm": 1.6253010217478865, + "learning_rate": 2.839144253964391e-06, + "loss": 0.6417, + "step": 51430 + }, + { + "epoch": 2.0344479819652355, + "grad_norm": 1.318797320732602, + "learning_rate": 2.8370694660961386e-06, + "loss": 0.6769, + "step": 51440 + }, + { + "epoch": 2.0348434811841245, + "grad_norm": 1.4737748701668791, + "learning_rate": 2.8349951362995177e-06, + "loss": 0.6977, + "step": 51450 + }, + { + "epoch": 2.0352389804030135, + "grad_norm": 1.428404610531691, + "learning_rate": 2.832921265013835e-06, + "loss": 0.6444, + "step": 51460 + }, + { + "epoch": 2.0356344796219026, + "grad_norm": 1.5207348819296758, + "learning_rate": 2.8308478526783e-06, + "loss": 0.6645, + "step": 51470 + }, + { + "epoch": 2.0360299788407916, + "grad_norm": 1.4151771378902087, + "learning_rate": 2.8287748997320296e-06, + "loss": 0.679, + "step": 51480 + }, + { + "epoch": 2.0364254780596807, + "grad_norm": 1.3974089996903323, + "learning_rate": 2.826702406614036e-06, + "loss": 0.6933, + "step": 51490 + }, + { + "epoch": 2.0368209772785697, + "grad_norm": 1.4122223333777773, + "learning_rate": 2.82463037376324e-06, + "loss": 0.6686, + "step": 51500 + }, + { + "epoch": 2.0372164764974587, + "grad_norm": 1.43840570170672, + "learning_rate": 2.8225588016184635e-06, + "loss": 0.6822, + "step": 51510 + }, + { + "epoch": 2.037611975716348, + "grad_norm": 1.1813850276128999, + "learning_rate": 2.8204876906184255e-06, + "loss": 0.694, + "step": 51520 + }, + { + "epoch": 2.038007474935237, + "grad_norm": 1.8225240542674708, + "learning_rate": 2.8184170412017576e-06, + "loss": 0.6667, + "step": 51530 + }, + { + "epoch": 2.038402974154126, + "grad_norm": 1.4965125674565252, + "learning_rate": 2.8163468538069823e-06, + "loss": 0.6732, + "step": 51540 + }, + { + "epoch": 2.038798473373015, + "grad_norm": 1.448677894727122, + "learning_rate": 2.8142771288725345e-06, + "loss": 0.6772, + "step": 51550 + }, + { + "epoch": 2.039193972591904, + "grad_norm": 1.3194978096594925, + "learning_rate": 2.8122078668367435e-06, + "loss": 0.6793, + "step": 51560 + }, + { + "epoch": 2.039589471810793, + "grad_norm": 1.5670614469670083, + "learning_rate": 2.810139068137848e-06, + "loss": 0.6789, + "step": 51570 + }, + { + "epoch": 2.039984971029682, + "grad_norm": 1.4063689584703747, + "learning_rate": 2.808070733213977e-06, + "loss": 0.677, + "step": 51580 + }, + { + "epoch": 2.040380470248571, + "grad_norm": 1.5446635384179739, + "learning_rate": 2.806002862503174e-06, + "loss": 0.679, + "step": 51590 + }, + { + "epoch": 2.04077596946746, + "grad_norm": 1.315420834173595, + "learning_rate": 2.8039354564433746e-06, + "loss": 0.665, + "step": 51600 + }, + { + "epoch": 2.041171468686349, + "grad_norm": 1.4561359291183944, + "learning_rate": 2.8018685154724246e-06, + "loss": 0.6514, + "step": 51610 + }, + { + "epoch": 2.041566967905238, + "grad_norm": 1.4501766365352706, + "learning_rate": 2.799802040028062e-06, + "loss": 0.6785, + "step": 51620 + }, + { + "epoch": 2.041962467124127, + "grad_norm": 1.643693782075612, + "learning_rate": 2.797736030547935e-06, + "loss": 0.6798, + "step": 51630 + }, + { + "epoch": 2.0423579663430163, + "grad_norm": 1.4187275409018834, + "learning_rate": 2.795670487469585e-06, + "loss": 0.6926, + "step": 51640 + }, + { + "epoch": 2.0427534655619057, + "grad_norm": 1.4312540627714883, + "learning_rate": 2.793605411230463e-06, + "loss": 0.6528, + "step": 51650 + }, + { + "epoch": 2.0431489647807948, + "grad_norm": 1.7056937881056342, + "learning_rate": 2.7915408022679126e-06, + "loss": 0.6832, + "step": 51660 + }, + { + "epoch": 2.043544463999684, + "grad_norm": 1.3243362374528815, + "learning_rate": 2.789476661019186e-06, + "loss": 0.6756, + "step": 51670 + }, + { + "epoch": 2.043939963218573, + "grad_norm": 1.3395807097369474, + "learning_rate": 2.78741298792143e-06, + "loss": 0.6727, + "step": 51680 + }, + { + "epoch": 2.044335462437462, + "grad_norm": 1.2042112999230044, + "learning_rate": 2.7853497834117005e-06, + "loss": 0.6761, + "step": 51690 + }, + { + "epoch": 2.044730961656351, + "grad_norm": 1.4510951398769156, + "learning_rate": 2.7832870479269414e-06, + "loss": 0.6873, + "step": 51700 + }, + { + "epoch": 2.04512646087524, + "grad_norm": 1.4410161339203527, + "learning_rate": 2.7812247819040105e-06, + "loss": 0.665, + "step": 51710 + }, + { + "epoch": 2.045521960094129, + "grad_norm": 1.4850022883181755, + "learning_rate": 2.779162985779655e-06, + "loss": 0.6752, + "step": 51720 + }, + { + "epoch": 2.045917459313018, + "grad_norm": 1.3824240175203364, + "learning_rate": 2.7771016599905354e-06, + "loss": 0.6675, + "step": 51730 + }, + { + "epoch": 2.046312958531907, + "grad_norm": 1.57840654228568, + "learning_rate": 2.7750408049731976e-06, + "loss": 0.6751, + "step": 51740 + }, + { + "epoch": 2.046708457750796, + "grad_norm": 1.5595061154067895, + "learning_rate": 2.7729804211641008e-06, + "loss": 0.661, + "step": 51750 + }, + { + "epoch": 2.047103956969685, + "grad_norm": 1.4522336237441211, + "learning_rate": 2.7709205089995983e-06, + "loss": 0.6903, + "step": 51760 + }, + { + "epoch": 2.047499456188574, + "grad_norm": 1.3382892906889068, + "learning_rate": 2.76886106891594e-06, + "loss": 0.6531, + "step": 51770 + }, + { + "epoch": 2.0478949554074632, + "grad_norm": 1.5907294576561035, + "learning_rate": 2.7668021013492853e-06, + "loss": 0.6634, + "step": 51780 + }, + { + "epoch": 2.0482904546263523, + "grad_norm": 1.2589676471675642, + "learning_rate": 2.7647436067356837e-06, + "loss": 0.688, + "step": 51790 + }, + { + "epoch": 2.0486859538452413, + "grad_norm": 1.4836115450633147, + "learning_rate": 2.7626855855110933e-06, + "loss": 0.6804, + "step": 51800 + }, + { + "epoch": 2.0490814530641304, + "grad_norm": 1.2705691695055399, + "learning_rate": 2.7606280381113647e-06, + "loss": 0.6528, + "step": 51810 + }, + { + "epoch": 2.0494769522830194, + "grad_norm": 1.5990336497105846, + "learning_rate": 2.758570964972256e-06, + "loss": 0.6858, + "step": 51820 + }, + { + "epoch": 2.0498724515019084, + "grad_norm": 1.6320340247285556, + "learning_rate": 2.7565143665294113e-06, + "loss": 0.6502, + "step": 51830 + }, + { + "epoch": 2.0502679507207975, + "grad_norm": 1.3638568833601041, + "learning_rate": 2.754458243218391e-06, + "loss": 0.6913, + "step": 51840 + }, + { + "epoch": 2.0506634499396865, + "grad_norm": 1.6690454724771784, + "learning_rate": 2.7524025954746416e-06, + "loss": 0.6843, + "step": 51850 + }, + { + "epoch": 2.0510589491585756, + "grad_norm": 1.5581749548571813, + "learning_rate": 2.7503474237335178e-06, + "loss": 0.668, + "step": 51860 + }, + { + "epoch": 2.0514544483774646, + "grad_norm": 1.5845702276915152, + "learning_rate": 2.7482927284302664e-06, + "loss": 0.6642, + "step": 51870 + }, + { + "epoch": 2.0518499475963536, + "grad_norm": 1.18781631151122, + "learning_rate": 2.74623851000004e-06, + "loss": 0.67, + "step": 51880 + }, + { + "epoch": 2.0522454468152427, + "grad_norm": 1.38478460380908, + "learning_rate": 2.7441847688778837e-06, + "loss": 0.6691, + "step": 51890 + }, + { + "epoch": 2.0526409460341317, + "grad_norm": 1.4919508849025567, + "learning_rate": 2.7421315054987485e-06, + "loss": 0.6953, + "step": 51900 + }, + { + "epoch": 2.0530364452530208, + "grad_norm": 1.409645879476153, + "learning_rate": 2.740078720297476e-06, + "loss": 0.6498, + "step": 51910 + }, + { + "epoch": 2.05343194447191, + "grad_norm": 1.3887768232572557, + "learning_rate": 2.7380264137088152e-06, + "loss": 0.6593, + "step": 51920 + }, + { + "epoch": 2.053827443690799, + "grad_norm": 1.5356611157694373, + "learning_rate": 2.735974586167407e-06, + "loss": 0.6548, + "step": 51930 + }, + { + "epoch": 2.054222942909688, + "grad_norm": 1.4512133736684516, + "learning_rate": 2.7339232381077947e-06, + "loss": 0.6903, + "step": 51940 + }, + { + "epoch": 2.054618442128577, + "grad_norm": 1.325614361693809, + "learning_rate": 2.7318723699644144e-06, + "loss": 0.6828, + "step": 51950 + }, + { + "epoch": 2.055013941347466, + "grad_norm": 1.4889499623905664, + "learning_rate": 2.729821982171611e-06, + "loss": 0.6895, + "step": 51960 + }, + { + "epoch": 2.055409440566355, + "grad_norm": 1.2440146129922824, + "learning_rate": 2.727772075163617e-06, + "loss": 0.6655, + "step": 51970 + }, + { + "epoch": 2.055804939785244, + "grad_norm": 1.3290034827721968, + "learning_rate": 2.72572264937457e-06, + "loss": 0.6594, + "step": 51980 + }, + { + "epoch": 2.056200439004133, + "grad_norm": 1.5596145538617112, + "learning_rate": 2.7236737052385042e-06, + "loss": 0.6908, + "step": 51990 + }, + { + "epoch": 2.056595938223022, + "grad_norm": 1.570329150959247, + "learning_rate": 2.7216252431893463e-06, + "loss": 0.654, + "step": 52000 + }, + { + "epoch": 2.056991437441911, + "grad_norm": 1.4310978721575889, + "learning_rate": 2.71957726366093e-06, + "loss": 0.6782, + "step": 52010 + }, + { + "epoch": 2.0573869366608, + "grad_norm": 1.4169390400429067, + "learning_rate": 2.717529767086979e-06, + "loss": 0.678, + "step": 52020 + }, + { + "epoch": 2.0577824358796892, + "grad_norm": 1.5811118727831657, + "learning_rate": 2.715482753901122e-06, + "loss": 0.6665, + "step": 52030 + }, + { + "epoch": 2.0581779350985783, + "grad_norm": 1.794965533863638, + "learning_rate": 2.713436224536876e-06, + "loss": 0.6806, + "step": 52040 + }, + { + "epoch": 2.0585734343174673, + "grad_norm": 1.238557125605881, + "learning_rate": 2.7113901794276666e-06, + "loss": 0.6479, + "step": 52050 + }, + { + "epoch": 2.0589689335363563, + "grad_norm": 1.374570653604843, + "learning_rate": 2.709344619006808e-06, + "loss": 0.6614, + "step": 52060 + }, + { + "epoch": 2.0593644327552454, + "grad_norm": 1.5715357755839472, + "learning_rate": 2.7072995437075152e-06, + "loss": 0.6566, + "step": 52070 + }, + { + "epoch": 2.0597599319741344, + "grad_norm": 1.452515575478075, + "learning_rate": 2.705254953962898e-06, + "loss": 0.6581, + "step": 52080 + }, + { + "epoch": 2.0601554311930235, + "grad_norm": 1.4301612305510367, + "learning_rate": 2.7032108502059696e-06, + "loss": 0.6876, + "step": 52090 + }, + { + "epoch": 2.0605509304119125, + "grad_norm": 1.620012252015546, + "learning_rate": 2.7011672328696316e-06, + "loss": 0.6767, + "step": 52100 + }, + { + "epoch": 2.0609464296308015, + "grad_norm": 1.4335361240090754, + "learning_rate": 2.6991241023866922e-06, + "loss": 0.6487, + "step": 52110 + }, + { + "epoch": 2.0613419288496906, + "grad_norm": 1.4164320536519117, + "learning_rate": 2.6970814591898465e-06, + "loss": 0.6725, + "step": 52120 + }, + { + "epoch": 2.0617374280685796, + "grad_norm": 1.7188702025251388, + "learning_rate": 2.695039303711696e-06, + "loss": 0.6446, + "step": 52130 + }, + { + "epoch": 2.0621329272874687, + "grad_norm": 1.6467002183447301, + "learning_rate": 2.6929976363847298e-06, + "loss": 0.6744, + "step": 52140 + }, + { + "epoch": 2.0625284265063577, + "grad_norm": 1.260646261849889, + "learning_rate": 2.690956457641343e-06, + "loss": 0.6742, + "step": 52150 + }, + { + "epoch": 2.0629239257252467, + "grad_norm": 1.5837327861819737, + "learning_rate": 2.688915767913819e-06, + "loss": 0.6565, + "step": 52160 + }, + { + "epoch": 2.063319424944136, + "grad_norm": 1.5758511586232795, + "learning_rate": 2.6868755676343388e-06, + "loss": 0.6737, + "step": 52170 + }, + { + "epoch": 2.063714924163025, + "grad_norm": 1.3234507251222656, + "learning_rate": 2.684835857234987e-06, + "loss": 0.6814, + "step": 52180 + }, + { + "epoch": 2.064110423381914, + "grad_norm": 1.51400259042956, + "learning_rate": 2.6827966371477365e-06, + "loss": 0.6712, + "step": 52190 + }, + { + "epoch": 2.064505922600803, + "grad_norm": 1.6393694577024078, + "learning_rate": 2.680757907804458e-06, + "loss": 0.6886, + "step": 52200 + }, + { + "epoch": 2.064901421819692, + "grad_norm": 1.8605484471750249, + "learning_rate": 2.6787196696369226e-06, + "loss": 0.6625, + "step": 52210 + }, + { + "epoch": 2.065296921038581, + "grad_norm": 1.3868704108873273, + "learning_rate": 2.6766819230767927e-06, + "loss": 0.6959, + "step": 52220 + }, + { + "epoch": 2.06569242025747, + "grad_norm": 1.3622943554688556, + "learning_rate": 2.674644668555626e-06, + "loss": 0.6679, + "step": 52230 + }, + { + "epoch": 2.066087919476359, + "grad_norm": 1.5666464248519238, + "learning_rate": 2.6726079065048817e-06, + "loss": 0.6923, + "step": 52240 + }, + { + "epoch": 2.066483418695248, + "grad_norm": 1.6330178441254855, + "learning_rate": 2.670571637355908e-06, + "loss": 0.6647, + "step": 52250 + }, + { + "epoch": 2.066878917914137, + "grad_norm": 1.2037696642592384, + "learning_rate": 2.6685358615399546e-06, + "loss": 0.6865, + "step": 52260 + }, + { + "epoch": 2.067274417133026, + "grad_norm": 1.3697055894795223, + "learning_rate": 2.6665005794881615e-06, + "loss": 0.6932, + "step": 52270 + }, + { + "epoch": 2.067669916351915, + "grad_norm": 1.4453789879478172, + "learning_rate": 2.664465791631569e-06, + "loss": 0.6841, + "step": 52280 + }, + { + "epoch": 2.0680654155708043, + "grad_norm": 1.5849662028931002, + "learning_rate": 2.662431498401108e-06, + "loss": 0.6824, + "step": 52290 + }, + { + "epoch": 2.0684609147896933, + "grad_norm": 1.6251055355777595, + "learning_rate": 2.660397700227609e-06, + "loss": 0.6755, + "step": 52300 + }, + { + "epoch": 2.0688564140085823, + "grad_norm": 1.6157866842704838, + "learning_rate": 2.658364397541795e-06, + "loss": 0.6747, + "step": 52310 + }, + { + "epoch": 2.0692519132274714, + "grad_norm": 1.3054970930113385, + "learning_rate": 2.6563315907742837e-06, + "loss": 0.6948, + "step": 52320 + }, + { + "epoch": 2.0696474124463604, + "grad_norm": 1.433933684809099, + "learning_rate": 2.654299280355588e-06, + "loss": 0.6765, + "step": 52330 + }, + { + "epoch": 2.0700429116652495, + "grad_norm": 1.6551965881475086, + "learning_rate": 2.6522674667161197e-06, + "loss": 0.6665, + "step": 52340 + }, + { + "epoch": 2.0704384108841385, + "grad_norm": 1.2629502516352678, + "learning_rate": 2.6502361502861774e-06, + "loss": 0.6871, + "step": 52350 + }, + { + "epoch": 2.0708339101030275, + "grad_norm": 1.35933503253559, + "learning_rate": 2.6482053314959643e-06, + "loss": 0.6796, + "step": 52360 + }, + { + "epoch": 2.0712294093219166, + "grad_norm": 1.5336653955637916, + "learning_rate": 2.646175010775569e-06, + "loss": 0.646, + "step": 52370 + }, + { + "epoch": 2.0716249085408056, + "grad_norm": 1.3641744591861404, + "learning_rate": 2.6441451885549817e-06, + "loss": 0.6822, + "step": 52380 + }, + { + "epoch": 2.0720204077596946, + "grad_norm": 1.2623193094389407, + "learning_rate": 2.642115865264081e-06, + "loss": 0.6736, + "step": 52390 + }, + { + "epoch": 2.0724159069785837, + "grad_norm": 1.5433798706176225, + "learning_rate": 2.640087041332646e-06, + "loss": 0.6872, + "step": 52400 + }, + { + "epoch": 2.0728114061974727, + "grad_norm": 1.4669634110621415, + "learning_rate": 2.638058717190345e-06, + "loss": 0.6763, + "step": 52410 + }, + { + "epoch": 2.0732069054163618, + "grad_norm": 1.3891825449392798, + "learning_rate": 2.6360308932667412e-06, + "loss": 0.6897, + "step": 52420 + }, + { + "epoch": 2.073602404635251, + "grad_norm": 1.4227344362373493, + "learning_rate": 2.6340035699912956e-06, + "loss": 0.6769, + "step": 52430 + }, + { + "epoch": 2.07399790385414, + "grad_norm": 1.5198227351741198, + "learning_rate": 2.631976747793359e-06, + "loss": 0.6787, + "step": 52440 + }, + { + "epoch": 2.074393403073029, + "grad_norm": 1.810672175998034, + "learning_rate": 2.629950427102178e-06, + "loss": 0.6567, + "step": 52450 + }, + { + "epoch": 2.074788902291918, + "grad_norm": 1.7971121818083846, + "learning_rate": 2.6279246083468907e-06, + "loss": 0.724, + "step": 52460 + }, + { + "epoch": 2.075184401510807, + "grad_norm": 1.600834365798754, + "learning_rate": 2.625899291956534e-06, + "loss": 0.6803, + "step": 52470 + }, + { + "epoch": 2.075579900729696, + "grad_norm": 1.2861592094948155, + "learning_rate": 2.623874478360032e-06, + "loss": 0.6671, + "step": 52480 + }, + { + "epoch": 2.075975399948585, + "grad_norm": 1.437374644881475, + "learning_rate": 2.62185016798621e-06, + "loss": 0.6529, + "step": 52490 + }, + { + "epoch": 2.076370899167474, + "grad_norm": 1.5306426768862746, + "learning_rate": 2.6198263612637763e-06, + "loss": 0.6386, + "step": 52500 + }, + { + "epoch": 2.076766398386363, + "grad_norm": 1.3526599147974985, + "learning_rate": 2.6178030586213444e-06, + "loss": 0.6565, + "step": 52510 + }, + { + "epoch": 2.077161897605252, + "grad_norm": 1.5255294019284535, + "learning_rate": 2.6157802604874107e-06, + "loss": 0.7065, + "step": 52520 + }, + { + "epoch": 2.077557396824141, + "grad_norm": 1.5097679189189859, + "learning_rate": 2.613757967290372e-06, + "loss": 0.7013, + "step": 52530 + }, + { + "epoch": 2.0779528960430302, + "grad_norm": 1.3806041516365242, + "learning_rate": 2.611736179458513e-06, + "loss": 0.6665, + "step": 52540 + }, + { + "epoch": 2.0783483952619193, + "grad_norm": 1.520370423641659, + "learning_rate": 2.609714897420018e-06, + "loss": 0.6847, + "step": 52550 + }, + { + "epoch": 2.0787438944808083, + "grad_norm": 1.386264884028776, + "learning_rate": 2.607694121602954e-06, + "loss": 0.6685, + "step": 52560 + }, + { + "epoch": 2.0791393936996974, + "grad_norm": 1.5455819338474668, + "learning_rate": 2.605673852435291e-06, + "loss": 0.6548, + "step": 52570 + }, + { + "epoch": 2.0795348929185864, + "grad_norm": 1.4819437150042558, + "learning_rate": 2.603654090344885e-06, + "loss": 0.6805, + "step": 52580 + }, + { + "epoch": 2.0799303921374754, + "grad_norm": 1.512918354138061, + "learning_rate": 2.6016348357594888e-06, + "loss": 0.6768, + "step": 52590 + }, + { + "epoch": 2.0803258913563645, + "grad_norm": 1.6184862258917365, + "learning_rate": 2.5996160891067434e-06, + "loss": 0.7019, + "step": 52600 + }, + { + "epoch": 2.0807213905752535, + "grad_norm": 1.4476359865777522, + "learning_rate": 2.5975978508141888e-06, + "loss": 0.6578, + "step": 52610 + }, + { + "epoch": 2.0811168897941426, + "grad_norm": 1.2904862258690026, + "learning_rate": 2.595580121309249e-06, + "loss": 0.6831, + "step": 52620 + }, + { + "epoch": 2.0815123890130316, + "grad_norm": 1.4542574663689345, + "learning_rate": 2.593562901019249e-06, + "loss": 0.6453, + "step": 52630 + }, + { + "epoch": 2.0819078882319206, + "grad_norm": 1.541047213286812, + "learning_rate": 2.591546190371398e-06, + "loss": 0.6861, + "step": 52640 + }, + { + "epoch": 2.0823033874508097, + "grad_norm": 1.6199363763865149, + "learning_rate": 2.5895299897928006e-06, + "loss": 0.6846, + "step": 52650 + }, + { + "epoch": 2.0826988866696987, + "grad_norm": 1.5446952311698432, + "learning_rate": 2.587514299710456e-06, + "loss": 0.667, + "step": 52660 + }, + { + "epoch": 2.0830943858885878, + "grad_norm": 1.576213081727735, + "learning_rate": 2.585499120551252e-06, + "loss": 0.6638, + "step": 52670 + }, + { + "epoch": 2.083489885107477, + "grad_norm": 1.2719748237983468, + "learning_rate": 2.583484452741967e-06, + "loss": 0.6748, + "step": 52680 + }, + { + "epoch": 2.083885384326366, + "grad_norm": 1.1710654079071265, + "learning_rate": 2.5814702967092753e-06, + "loss": 0.6539, + "step": 52690 + }, + { + "epoch": 2.084280883545255, + "grad_norm": 1.398658133848948, + "learning_rate": 2.5794566528797415e-06, + "loss": 0.646, + "step": 52700 + }, + { + "epoch": 2.084676382764144, + "grad_norm": 1.2338963160268046, + "learning_rate": 2.5774435216798167e-06, + "loss": 0.6865, + "step": 52710 + }, + { + "epoch": 2.085071881983033, + "grad_norm": 1.697359288983376, + "learning_rate": 2.575430903535853e-06, + "loss": 0.6939, + "step": 52720 + }, + { + "epoch": 2.085467381201922, + "grad_norm": 1.2576503902545464, + "learning_rate": 2.573418798874083e-06, + "loss": 0.7158, + "step": 52730 + }, + { + "epoch": 2.085862880420811, + "grad_norm": 1.3741506318956629, + "learning_rate": 2.5714072081206407e-06, + "loss": 0.6665, + "step": 52740 + }, + { + "epoch": 2.0862583796397, + "grad_norm": 1.3470493251290068, + "learning_rate": 2.5693961317015427e-06, + "loss": 0.6736, + "step": 52750 + }, + { + "epoch": 2.086653878858589, + "grad_norm": 1.4764124229156748, + "learning_rate": 2.5673855700427046e-06, + "loss": 0.6806, + "step": 52760 + }, + { + "epoch": 2.087049378077478, + "grad_norm": 1.588954147864825, + "learning_rate": 2.565375523569925e-06, + "loss": 0.6765, + "step": 52770 + }, + { + "epoch": 2.087444877296367, + "grad_norm": 1.5939057146452666, + "learning_rate": 2.5633659927089007e-06, + "loss": 0.6687, + "step": 52780 + }, + { + "epoch": 2.0878403765152562, + "grad_norm": 1.1449002652091103, + "learning_rate": 2.561356977885213e-06, + "loss": 0.6782, + "step": 52790 + }, + { + "epoch": 2.0882358757341453, + "grad_norm": 1.2665726635762782, + "learning_rate": 2.5593484795243413e-06, + "loss": 0.6358, + "step": 52800 + }, + { + "epoch": 2.0886313749530343, + "grad_norm": 1.2797589686314872, + "learning_rate": 2.557340498051644e-06, + "loss": 0.7054, + "step": 52810 + }, + { + "epoch": 2.0890268741719233, + "grad_norm": 1.368405470057439, + "learning_rate": 2.5553330338923833e-06, + "loss": 0.6835, + "step": 52820 + }, + { + "epoch": 2.0894223733908124, + "grad_norm": 1.5883166974820286, + "learning_rate": 2.5533260874717013e-06, + "loss": 0.7001, + "step": 52830 + }, + { + "epoch": 2.0898178726097014, + "grad_norm": 1.4782327155067416, + "learning_rate": 2.5513196592146393e-06, + "loss": 0.6664, + "step": 52840 + }, + { + "epoch": 2.0902133718285905, + "grad_norm": 1.347431111755391, + "learning_rate": 2.5493137495461208e-06, + "loss": 0.7057, + "step": 52850 + }, + { + "epoch": 2.0906088710474795, + "grad_norm": 1.5451260950793235, + "learning_rate": 2.5473083588909654e-06, + "loss": 0.6547, + "step": 52860 + }, + { + "epoch": 2.0910043702663685, + "grad_norm": 1.2349444793984876, + "learning_rate": 2.5453034876738804e-06, + "loss": 0.6763, + "step": 52870 + }, + { + "epoch": 2.0913998694852576, + "grad_norm": 1.2750941373065188, + "learning_rate": 2.54329913631946e-06, + "loss": 0.6634, + "step": 52880 + }, + { + "epoch": 2.0917953687041466, + "grad_norm": 1.3420017160413147, + "learning_rate": 2.541295305252196e-06, + "loss": 0.6338, + "step": 52890 + }, + { + "epoch": 2.0921908679230357, + "grad_norm": 1.3481168399417625, + "learning_rate": 2.539291994896461e-06, + "loss": 0.6848, + "step": 52900 + }, + { + "epoch": 2.0925863671419247, + "grad_norm": 1.3653752338546252, + "learning_rate": 2.5372892056765264e-06, + "loss": 0.6793, + "step": 52910 + }, + { + "epoch": 2.0929818663608137, + "grad_norm": 1.5091123395393253, + "learning_rate": 2.5352869380165467e-06, + "loss": 0.6321, + "step": 52920 + }, + { + "epoch": 2.0933773655797028, + "grad_norm": 1.4791289171450297, + "learning_rate": 2.533285192340568e-06, + "loss": 0.6981, + "step": 52930 + }, + { + "epoch": 2.093772864798592, + "grad_norm": 1.3951629220824813, + "learning_rate": 2.531283969072522e-06, + "loss": 0.6315, + "step": 52940 + }, + { + "epoch": 2.094168364017481, + "grad_norm": 1.556194253506756, + "learning_rate": 2.5292832686362403e-06, + "loss": 0.6762, + "step": 52950 + }, + { + "epoch": 2.09456386323637, + "grad_norm": 1.3563370697161923, + "learning_rate": 2.5272830914554302e-06, + "loss": 0.6576, + "step": 52960 + }, + { + "epoch": 2.094959362455259, + "grad_norm": 1.4635084950990478, + "learning_rate": 2.5252834379537004e-06, + "loss": 0.6764, + "step": 52970 + }, + { + "epoch": 2.0953548616741484, + "grad_norm": 1.2901409397049215, + "learning_rate": 2.52328430855454e-06, + "loss": 0.668, + "step": 52980 + }, + { + "epoch": 2.095750360893037, + "grad_norm": 1.3269814231219497, + "learning_rate": 2.521285703681333e-06, + "loss": 0.6581, + "step": 52990 + }, + { + "epoch": 2.0961458601119265, + "grad_norm": 1.5281349010595011, + "learning_rate": 2.5192876237573464e-06, + "loss": 0.667, + "step": 53000 + }, + { + "epoch": 2.0965413593308155, + "grad_norm": 1.3806584739507546, + "learning_rate": 2.5172900692057433e-06, + "loss": 0.6538, + "step": 53010 + }, + { + "epoch": 2.0969368585497046, + "grad_norm": 1.382791330061861, + "learning_rate": 2.5152930404495666e-06, + "loss": 0.6716, + "step": 53020 + }, + { + "epoch": 2.0973323577685936, + "grad_norm": 1.6452506356859764, + "learning_rate": 2.513296537911758e-06, + "loss": 0.6574, + "step": 53030 + }, + { + "epoch": 2.0977278569874827, + "grad_norm": 1.3803466437735963, + "learning_rate": 2.51130056201514e-06, + "loss": 0.6697, + "step": 53040 + }, + { + "epoch": 2.0981233562063717, + "grad_norm": 1.3989944734573991, + "learning_rate": 2.509305113182425e-06, + "loss": 0.6586, + "step": 53050 + }, + { + "epoch": 2.0985188554252607, + "grad_norm": 1.56645721200081, + "learning_rate": 2.5073101918362154e-06, + "loss": 0.6738, + "step": 53060 + }, + { + "epoch": 2.0989143546441498, + "grad_norm": 1.6086521453677383, + "learning_rate": 2.505315798399003e-06, + "loss": 0.6112, + "step": 53070 + }, + { + "epoch": 2.099309853863039, + "grad_norm": 1.3652055358391482, + "learning_rate": 2.5033219332931637e-06, + "loss": 0.6753, + "step": 53080 + }, + { + "epoch": 2.099705353081928, + "grad_norm": 1.263788163685856, + "learning_rate": 2.5013285969409673e-06, + "loss": 0.6757, + "step": 53090 + }, + { + "epoch": 2.100100852300817, + "grad_norm": 1.5017467112050908, + "learning_rate": 2.499335789764566e-06, + "loss": 0.7035, + "step": 53100 + }, + { + "epoch": 2.100496351519706, + "grad_norm": 1.70164536757861, + "learning_rate": 2.497343512186001e-06, + "loss": 0.6854, + "step": 53110 + }, + { + "epoch": 2.100891850738595, + "grad_norm": 1.5623921866035804, + "learning_rate": 2.4953517646272052e-06, + "loss": 0.7, + "step": 53120 + }, + { + "epoch": 2.101287349957484, + "grad_norm": 1.2149791259308627, + "learning_rate": 2.493360547509994e-06, + "loss": 0.6983, + "step": 53130 + }, + { + "epoch": 2.101682849176373, + "grad_norm": 1.4920892407652067, + "learning_rate": 2.4913698612560774e-06, + "loss": 0.695, + "step": 53140 + }, + { + "epoch": 2.102078348395262, + "grad_norm": 1.6124787133435043, + "learning_rate": 2.4893797062870435e-06, + "loss": 0.6613, + "step": 53150 + }, + { + "epoch": 2.102473847614151, + "grad_norm": 1.4268511241186925, + "learning_rate": 2.4873900830243787e-06, + "loss": 0.6651, + "step": 53160 + }, + { + "epoch": 2.10286934683304, + "grad_norm": 1.3137992140730472, + "learning_rate": 2.4854009918894446e-06, + "loss": 0.6765, + "step": 53170 + }, + { + "epoch": 2.103264846051929, + "grad_norm": 1.8234777466559235, + "learning_rate": 2.4834124333035016e-06, + "loss": 0.6384, + "step": 53180 + }, + { + "epoch": 2.1036603452708182, + "grad_norm": 1.350062295028163, + "learning_rate": 2.481424407687688e-06, + "loss": 0.671, + "step": 53190 + }, + { + "epoch": 2.1040558444897073, + "grad_norm": 1.5857495747917962, + "learning_rate": 2.4794369154630387e-06, + "loss": 0.6576, + "step": 53200 + }, + { + "epoch": 2.1044513437085963, + "grad_norm": 1.540036969933618, + "learning_rate": 2.4774499570504663e-06, + "loss": 0.6601, + "step": 53210 + }, + { + "epoch": 2.1048468429274854, + "grad_norm": 1.3301296719195193, + "learning_rate": 2.475463532870777e-06, + "loss": 0.6656, + "step": 53220 + }, + { + "epoch": 2.1052423421463744, + "grad_norm": 1.7453460723866556, + "learning_rate": 2.47347764334466e-06, + "loss": 0.6523, + "step": 53230 + }, + { + "epoch": 2.1056378413652634, + "grad_norm": 1.4527756973509505, + "learning_rate": 2.4714922888926947e-06, + "loss": 0.6693, + "step": 53240 + }, + { + "epoch": 2.1060333405841525, + "grad_norm": 1.7908665413682205, + "learning_rate": 2.4695074699353418e-06, + "loss": 0.6645, + "step": 53250 + }, + { + "epoch": 2.1064288398030415, + "grad_norm": 1.26526736653631, + "learning_rate": 2.4675231868929554e-06, + "loss": 0.6653, + "step": 53260 + }, + { + "epoch": 2.1068243390219306, + "grad_norm": 1.9698257155815406, + "learning_rate": 2.4655394401857694e-06, + "loss": 0.636, + "step": 53270 + }, + { + "epoch": 2.1072198382408196, + "grad_norm": 1.275203795446927, + "learning_rate": 2.463556230233911e-06, + "loss": 0.6896, + "step": 53280 + }, + { + "epoch": 2.1076153374597086, + "grad_norm": 1.4919697929138263, + "learning_rate": 2.4615735574573884e-06, + "loss": 0.672, + "step": 53290 + }, + { + "epoch": 2.1080108366785977, + "grad_norm": 1.4886663256821036, + "learning_rate": 2.4595914222760973e-06, + "loss": 0.6697, + "step": 53300 + }, + { + "epoch": 2.1084063358974867, + "grad_norm": 1.3169866549925069, + "learning_rate": 2.457609825109819e-06, + "loss": 0.6838, + "step": 53310 + }, + { + "epoch": 2.1088018351163758, + "grad_norm": 1.358044933100365, + "learning_rate": 2.4556287663782246e-06, + "loss": 0.6726, + "step": 53320 + }, + { + "epoch": 2.109197334335265, + "grad_norm": 1.5942567648283803, + "learning_rate": 2.453648246500864e-06, + "loss": 0.6957, + "step": 53330 + }, + { + "epoch": 2.109592833554154, + "grad_norm": 1.6749818744613212, + "learning_rate": 2.4516682658971834e-06, + "loss": 0.6396, + "step": 53340 + }, + { + "epoch": 2.109988332773043, + "grad_norm": 1.2764814017765473, + "learning_rate": 2.449688824986505e-06, + "loss": 0.6364, + "step": 53350 + }, + { + "epoch": 2.110383831991932, + "grad_norm": 1.3994920100399224, + "learning_rate": 2.447709924188039e-06, + "loss": 0.6344, + "step": 53360 + }, + { + "epoch": 2.110779331210821, + "grad_norm": 1.5331450521644923, + "learning_rate": 2.4457315639208874e-06, + "loss": 0.6571, + "step": 53370 + }, + { + "epoch": 2.11117483042971, + "grad_norm": 1.6940648811409018, + "learning_rate": 2.443753744604028e-06, + "loss": 0.6609, + "step": 53380 + }, + { + "epoch": 2.111570329648599, + "grad_norm": 1.4692843636583646, + "learning_rate": 2.4417764666563333e-06, + "loss": 0.671, + "step": 53390 + }, + { + "epoch": 2.111965828867488, + "grad_norm": 1.4539245070302387, + "learning_rate": 2.4397997304965527e-06, + "loss": 0.6607, + "step": 53400 + }, + { + "epoch": 2.112361328086377, + "grad_norm": 1.49416317049423, + "learning_rate": 2.437823536543331e-06, + "loss": 0.6918, + "step": 53410 + }, + { + "epoch": 2.112756827305266, + "grad_norm": 1.7328016158080546, + "learning_rate": 2.4358478852151858e-06, + "loss": 0.6419, + "step": 53420 + }, + { + "epoch": 2.113152326524155, + "grad_norm": 1.7896688136449164, + "learning_rate": 2.4338727769305297e-06, + "loss": 0.6431, + "step": 53430 + }, + { + "epoch": 2.1135478257430442, + "grad_norm": 1.3489596809357487, + "learning_rate": 2.431898212107655e-06, + "loss": 0.6492, + "step": 53440 + }, + { + "epoch": 2.1139433249619333, + "grad_norm": 1.6706352027823188, + "learning_rate": 2.4299241911647437e-06, + "loss": 0.6458, + "step": 53450 + }, + { + "epoch": 2.1143388241808223, + "grad_norm": 1.3189804564082626, + "learning_rate": 2.4279507145198555e-06, + "loss": 0.6589, + "step": 53460 + }, + { + "epoch": 2.1147343233997113, + "grad_norm": 1.3579734464037865, + "learning_rate": 2.425977782590943e-06, + "loss": 0.6771, + "step": 53470 + }, + { + "epoch": 2.1151298226186004, + "grad_norm": 1.4704679190284409, + "learning_rate": 2.4240053957958355e-06, + "loss": 0.6746, + "step": 53480 + }, + { + "epoch": 2.1155253218374894, + "grad_norm": 1.4408933641084356, + "learning_rate": 2.422033554552255e-06, + "loss": 0.7022, + "step": 53490 + }, + { + "epoch": 2.1159208210563785, + "grad_norm": 1.4391374918190838, + "learning_rate": 2.4200622592777988e-06, + "loss": 0.6666, + "step": 53500 + }, + { + "epoch": 2.1163163202752675, + "grad_norm": 1.2962933769133393, + "learning_rate": 2.418091510389959e-06, + "loss": 0.6553, + "step": 53510 + }, + { + "epoch": 2.1167118194941565, + "grad_norm": 1.5751859118775398, + "learning_rate": 2.416121308306103e-06, + "loss": 0.6571, + "step": 53520 + }, + { + "epoch": 2.1171073187130456, + "grad_norm": 1.625446807409782, + "learning_rate": 2.414151653443486e-06, + "loss": 0.6768, + "step": 53530 + }, + { + "epoch": 2.1175028179319346, + "grad_norm": 1.2434371830761635, + "learning_rate": 2.412182546219247e-06, + "loss": 0.6731, + "step": 53540 + }, + { + "epoch": 2.1178983171508237, + "grad_norm": 1.512027827644632, + "learning_rate": 2.4102139870504106e-06, + "loss": 0.664, + "step": 53550 + }, + { + "epoch": 2.1182938163697127, + "grad_norm": 1.2590069779396396, + "learning_rate": 2.4082459763538806e-06, + "loss": 0.6708, + "step": 53560 + }, + { + "epoch": 2.1186893155886017, + "grad_norm": 1.5501126021660727, + "learning_rate": 2.4062785145464535e-06, + "loss": 0.6787, + "step": 53570 + }, + { + "epoch": 2.119084814807491, + "grad_norm": 1.6408829605958855, + "learning_rate": 2.4043116020448004e-06, + "loss": 0.6948, + "step": 53580 + }, + { + "epoch": 2.11948031402638, + "grad_norm": 1.5343757391839363, + "learning_rate": 2.4023452392654785e-06, + "loss": 0.6245, + "step": 53590 + }, + { + "epoch": 2.119875813245269, + "grad_norm": 1.5872563076749362, + "learning_rate": 2.4003794266249325e-06, + "loss": 0.689, + "step": 53600 + }, + { + "epoch": 2.120271312464158, + "grad_norm": 1.357578009161746, + "learning_rate": 2.3984141645394854e-06, + "loss": 0.6659, + "step": 53610 + }, + { + "epoch": 2.120666811683047, + "grad_norm": 1.2616945437501441, + "learning_rate": 2.3964494534253496e-06, + "loss": 0.6569, + "step": 53620 + }, + { + "epoch": 2.121062310901936, + "grad_norm": 1.1731663550018403, + "learning_rate": 2.394485293698613e-06, + "loss": 0.7181, + "step": 53630 + }, + { + "epoch": 2.121457810120825, + "grad_norm": 1.632773518719414, + "learning_rate": 2.3925216857752543e-06, + "loss": 0.6559, + "step": 53640 + }, + { + "epoch": 2.121853309339714, + "grad_norm": 1.52230042364544, + "learning_rate": 2.3905586300711314e-06, + "loss": 0.6636, + "step": 53650 + }, + { + "epoch": 2.122248808558603, + "grad_norm": 1.2901725974737541, + "learning_rate": 2.388596127001985e-06, + "loss": 0.7103, + "step": 53660 + }, + { + "epoch": 2.122644307777492, + "grad_norm": 1.4511785611787091, + "learning_rate": 2.386634176983438e-06, + "loss": 0.6896, + "step": 53670 + }, + { + "epoch": 2.123039806996381, + "grad_norm": 1.607219965731853, + "learning_rate": 2.3846727804310014e-06, + "loss": 0.6443, + "step": 53680 + }, + { + "epoch": 2.12343530621527, + "grad_norm": 1.5111949409734204, + "learning_rate": 2.382711937760062e-06, + "loss": 0.6716, + "step": 53690 + }, + { + "epoch": 2.1238308054341593, + "grad_norm": 1.4319820304323034, + "learning_rate": 2.3807516493858955e-06, + "loss": 0.6543, + "step": 53700 + }, + { + "epoch": 2.1242263046530483, + "grad_norm": 1.7562690778597556, + "learning_rate": 2.378791915723655e-06, + "loss": 0.6766, + "step": 53710 + }, + { + "epoch": 2.1246218038719373, + "grad_norm": 1.3989269646335447, + "learning_rate": 2.3768327371883813e-06, + "loss": 0.6829, + "step": 53720 + }, + { + "epoch": 2.1250173030908264, + "grad_norm": 1.5376655864437776, + "learning_rate": 2.3748741141949915e-06, + "loss": 0.6457, + "step": 53730 + }, + { + "epoch": 2.1254128023097154, + "grad_norm": 1.4184390567852088, + "learning_rate": 2.3729160471582923e-06, + "loss": 0.6642, + "step": 53740 + }, + { + "epoch": 2.1258083015286044, + "grad_norm": 1.4809553417427328, + "learning_rate": 2.3709585364929666e-06, + "loss": 0.6596, + "step": 53750 + }, + { + "epoch": 2.1262038007474935, + "grad_norm": 1.3336490811468675, + "learning_rate": 2.3690015826135794e-06, + "loss": 0.6686, + "step": 53760 + }, + { + "epoch": 2.1265992999663825, + "grad_norm": 1.4565373880158172, + "learning_rate": 2.367045185934584e-06, + "loss": 0.6744, + "step": 53770 + }, + { + "epoch": 2.1269947991852716, + "grad_norm": 1.3286726261138002, + "learning_rate": 2.36508934687031e-06, + "loss": 0.6837, + "step": 53780 + }, + { + "epoch": 2.1273902984041606, + "grad_norm": 1.4325503149989218, + "learning_rate": 2.3631340658349688e-06, + "loss": 0.6732, + "step": 53790 + }, + { + "epoch": 2.1277857976230496, + "grad_norm": 1.4666789408885827, + "learning_rate": 2.3611793432426593e-06, + "loss": 0.6744, + "step": 53800 + }, + { + "epoch": 2.1281812968419387, + "grad_norm": 1.571490968594034, + "learning_rate": 2.3592251795073564e-06, + "loss": 0.6869, + "step": 53810 + }, + { + "epoch": 2.1285767960608277, + "grad_norm": 1.292522513798636, + "learning_rate": 2.3572715750429155e-06, + "loss": 0.6968, + "step": 53820 + }, + { + "epoch": 2.1289722952797168, + "grad_norm": 1.3488799659021744, + "learning_rate": 2.3553185302630815e-06, + "loss": 0.7034, + "step": 53830 + }, + { + "epoch": 2.129367794498606, + "grad_norm": 1.4451203908014099, + "learning_rate": 2.3533660455814718e-06, + "loss": 0.6738, + "step": 53840 + }, + { + "epoch": 2.129763293717495, + "grad_norm": 1.393657861360009, + "learning_rate": 2.3514141214115927e-06, + "loss": 0.6559, + "step": 53850 + }, + { + "epoch": 2.130158792936384, + "grad_norm": 1.5912814754311821, + "learning_rate": 2.349462758166825e-06, + "loss": 0.6759, + "step": 53860 + }, + { + "epoch": 2.130554292155273, + "grad_norm": 1.5485787023789013, + "learning_rate": 2.347511956260437e-06, + "loss": 0.6736, + "step": 53870 + }, + { + "epoch": 2.130949791374162, + "grad_norm": 1.6080344965127011, + "learning_rate": 2.3455617161055726e-06, + "loss": 0.666, + "step": 53880 + }, + { + "epoch": 2.131345290593051, + "grad_norm": 1.531221782702087, + "learning_rate": 2.3436120381152614e-06, + "loss": 0.6696, + "step": 53890 + }, + { + "epoch": 2.13174078981194, + "grad_norm": 1.4445080563124875, + "learning_rate": 2.3416629227024117e-06, + "loss": 0.6479, + "step": 53900 + }, + { + "epoch": 2.132136289030829, + "grad_norm": 1.874177887668524, + "learning_rate": 2.3397143702798115e-06, + "loss": 0.6498, + "step": 53910 + }, + { + "epoch": 2.132531788249718, + "grad_norm": 1.3493915165648362, + "learning_rate": 2.337766381260129e-06, + "loss": 0.6527, + "step": 53920 + }, + { + "epoch": 2.132927287468607, + "grad_norm": 1.2167044156958442, + "learning_rate": 2.3358189560559193e-06, + "loss": 0.6788, + "step": 53930 + }, + { + "epoch": 2.133322786687496, + "grad_norm": 1.4357323115792433, + "learning_rate": 2.333872095079609e-06, + "loss": 0.691, + "step": 53940 + }, + { + "epoch": 2.1337182859063852, + "grad_norm": 1.306926339402921, + "learning_rate": 2.331925798743515e-06, + "loss": 0.6718, + "step": 53950 + }, + { + "epoch": 2.1341137851252743, + "grad_norm": 1.3008135420765468, + "learning_rate": 2.3299800674598245e-06, + "loss": 0.6743, + "step": 53960 + }, + { + "epoch": 2.1345092843441633, + "grad_norm": 1.425605224912783, + "learning_rate": 2.3280349016406147e-06, + "loss": 0.6526, + "step": 53970 + }, + { + "epoch": 2.1349047835630524, + "grad_norm": 1.4379848423230792, + "learning_rate": 2.3260903016978335e-06, + "loss": 0.6599, + "step": 53980 + }, + { + "epoch": 2.1353002827819414, + "grad_norm": 1.521055765933288, + "learning_rate": 2.324146268043319e-06, + "loss": 0.6873, + "step": 53990 + }, + { + "epoch": 2.1356957820008304, + "grad_norm": 1.319733720553865, + "learning_rate": 2.3222028010887816e-06, + "loss": 0.6593, + "step": 54000 + }, + { + "epoch": 2.1360912812197195, + "grad_norm": 1.2999696838911359, + "learning_rate": 2.320259901245812e-06, + "loss": 0.6943, + "step": 54010 + }, + { + "epoch": 2.1364867804386085, + "grad_norm": 1.6051226871964392, + "learning_rate": 2.318317568925888e-06, + "loss": 0.6562, + "step": 54020 + }, + { + "epoch": 2.1368822796574976, + "grad_norm": 1.8619276716937607, + "learning_rate": 2.31637580454036e-06, + "loss": 0.6891, + "step": 54030 + }, + { + "epoch": 2.1372777788763866, + "grad_norm": 1.59277340173497, + "learning_rate": 2.3144346085004597e-06, + "loss": 0.6563, + "step": 54040 + }, + { + "epoch": 2.1376732780952756, + "grad_norm": 1.273798492395337, + "learning_rate": 2.3124939812172982e-06, + "loss": 0.6849, + "step": 54050 + }, + { + "epoch": 2.1380687773141647, + "grad_norm": 1.321678538302541, + "learning_rate": 2.31055392310187e-06, + "loss": 0.6829, + "step": 54060 + }, + { + "epoch": 2.1384642765330537, + "grad_norm": 1.3963753388571825, + "learning_rate": 2.308614434565043e-06, + "loss": 0.6624, + "step": 54070 + }, + { + "epoch": 2.1388597757519427, + "grad_norm": 1.484462425097106, + "learning_rate": 2.3066755160175717e-06, + "loss": 0.6173, + "step": 54080 + }, + { + "epoch": 2.139255274970832, + "grad_norm": 1.6407093637554526, + "learning_rate": 2.3047371678700815e-06, + "loss": 0.6709, + "step": 54090 + }, + { + "epoch": 2.139650774189721, + "grad_norm": 1.509349572039471, + "learning_rate": 2.302799390533085e-06, + "loss": 0.6649, + "step": 54100 + }, + { + "epoch": 2.14004627340861, + "grad_norm": 1.4108010951524979, + "learning_rate": 2.300862184416967e-06, + "loss": 0.6792, + "step": 54110 + }, + { + "epoch": 2.140441772627499, + "grad_norm": 1.2472083765380304, + "learning_rate": 2.2989255499319984e-06, + "loss": 0.6652, + "step": 54120 + }, + { + "epoch": 2.140837271846388, + "grad_norm": 1.4015709150184785, + "learning_rate": 2.2969894874883207e-06, + "loss": 0.6619, + "step": 54130 + }, + { + "epoch": 2.141232771065277, + "grad_norm": 1.5410951199328564, + "learning_rate": 2.295053997495965e-06, + "loss": 0.6292, + "step": 54140 + }, + { + "epoch": 2.141628270284166, + "grad_norm": 1.6680565516903991, + "learning_rate": 2.293119080364827e-06, + "loss": 0.644, + "step": 54150 + }, + { + "epoch": 2.142023769503055, + "grad_norm": 1.3535567041046115, + "learning_rate": 2.291184736504695e-06, + "loss": 0.6705, + "step": 54160 + }, + { + "epoch": 2.142419268721944, + "grad_norm": 1.4945217778941242, + "learning_rate": 2.289250966325226e-06, + "loss": 0.6774, + "step": 54170 + }, + { + "epoch": 2.142814767940833, + "grad_norm": 1.434709533798301, + "learning_rate": 2.2873177702359635e-06, + "loss": 0.6858, + "step": 54180 + }, + { + "epoch": 2.143210267159722, + "grad_norm": 1.7442305799564761, + "learning_rate": 2.285385148646321e-06, + "loss": 0.638, + "step": 54190 + }, + { + "epoch": 2.143605766378611, + "grad_norm": 1.552536491614337, + "learning_rate": 2.283453101965598e-06, + "loss": 0.659, + "step": 54200 + }, + { + "epoch": 2.1440012655975003, + "grad_norm": 1.3283935986138802, + "learning_rate": 2.2815216306029663e-06, + "loss": 0.6883, + "step": 54210 + }, + { + "epoch": 2.1443967648163893, + "grad_norm": 1.509602973253364, + "learning_rate": 2.2795907349674817e-06, + "loss": 0.6659, + "step": 54220 + }, + { + "epoch": 2.1447922640352783, + "grad_norm": 1.5198556365869367, + "learning_rate": 2.2776604154680724e-06, + "loss": 0.6475, + "step": 54230 + }, + { + "epoch": 2.1451877632541674, + "grad_norm": 1.6092550299135604, + "learning_rate": 2.275730672513546e-06, + "loss": 0.6548, + "step": 54240 + }, + { + "epoch": 2.1455832624730564, + "grad_norm": 1.2477498386874621, + "learning_rate": 2.2738015065125925e-06, + "loss": 0.6733, + "step": 54250 + }, + { + "epoch": 2.1459787616919455, + "grad_norm": 1.492006757213174, + "learning_rate": 2.2718729178737718e-06, + "loss": 0.6244, + "step": 54260 + }, + { + "epoch": 2.1463742609108345, + "grad_norm": 1.5758683462952756, + "learning_rate": 2.2699449070055307e-06, + "loss": 0.6681, + "step": 54270 + }, + { + "epoch": 2.1467697601297235, + "grad_norm": 1.138003508520722, + "learning_rate": 2.2680174743161858e-06, + "loss": 0.6516, + "step": 54280 + }, + { + "epoch": 2.147165259348613, + "grad_norm": 1.5319169064967042, + "learning_rate": 2.2660906202139356e-06, + "loss": 0.6623, + "step": 54290 + }, + { + "epoch": 2.1475607585675016, + "grad_norm": 1.3087335506599393, + "learning_rate": 2.264164345106852e-06, + "loss": 0.6762, + "step": 54300 + }, + { + "epoch": 2.147956257786391, + "grad_norm": 1.3820860043636023, + "learning_rate": 2.2622386494028913e-06, + "loss": 0.6797, + "step": 54310 + }, + { + "epoch": 2.1483517570052797, + "grad_norm": 1.552956169200907, + "learning_rate": 2.2603135335098787e-06, + "loss": 0.6786, + "step": 54320 + }, + { + "epoch": 2.148747256224169, + "grad_norm": 1.331683367124008, + "learning_rate": 2.2583889978355252e-06, + "loss": 0.6497, + "step": 54330 + }, + { + "epoch": 2.1491427554430578, + "grad_norm": 1.320550715173064, + "learning_rate": 2.25646504278741e-06, + "loss": 0.6637, + "step": 54340 + }, + { + "epoch": 2.1495382546619473, + "grad_norm": 1.4287266092193445, + "learning_rate": 2.2545416687729977e-06, + "loss": 0.6645, + "step": 54350 + }, + { + "epoch": 2.149933753880836, + "grad_norm": 1.3896180673888243, + "learning_rate": 2.252618876199622e-06, + "loss": 0.6509, + "step": 54360 + }, + { + "epoch": 2.1503292530997253, + "grad_norm": 1.6553030079074602, + "learning_rate": 2.2506966654745023e-06, + "loss": 0.666, + "step": 54370 + }, + { + "epoch": 2.1507247523186144, + "grad_norm": 1.3238847842884285, + "learning_rate": 2.248775037004725e-06, + "loss": 0.6993, + "step": 54380 + }, + { + "epoch": 2.1511202515375034, + "grad_norm": 1.4404909444757465, + "learning_rate": 2.2468539911972643e-06, + "loss": 0.6371, + "step": 54390 + }, + { + "epoch": 2.1515157507563925, + "grad_norm": 1.609388912757096, + "learning_rate": 2.2449335284589567e-06, + "loss": 0.6639, + "step": 54400 + }, + { + "epoch": 2.1519112499752815, + "grad_norm": 1.8396210372809116, + "learning_rate": 2.2430136491965297e-06, + "loss": 0.6707, + "step": 54410 + }, + { + "epoch": 2.1523067491941705, + "grad_norm": 1.591304405457969, + "learning_rate": 2.241094353816577e-06, + "loss": 0.6494, + "step": 54420 + }, + { + "epoch": 2.1527022484130596, + "grad_norm": 1.658368004839637, + "learning_rate": 2.2391756427255757e-06, + "loss": 0.6717, + "step": 54430 + }, + { + "epoch": 2.1530977476319486, + "grad_norm": 1.54570432897911, + "learning_rate": 2.237257516329872e-06, + "loss": 0.6738, + "step": 54440 + }, + { + "epoch": 2.1534932468508377, + "grad_norm": 1.3149618666280893, + "learning_rate": 2.235339975035697e-06, + "loss": 0.685, + "step": 54450 + }, + { + "epoch": 2.1538887460697267, + "grad_norm": 1.466269762252572, + "learning_rate": 2.2334230192491503e-06, + "loss": 0.6774, + "step": 54460 + }, + { + "epoch": 2.1542842452886157, + "grad_norm": 1.3511419063448677, + "learning_rate": 2.231506649376209e-06, + "loss": 0.6753, + "step": 54470 + }, + { + "epoch": 2.1546797445075048, + "grad_norm": 1.456886402546452, + "learning_rate": 2.2295908658227307e-06, + "loss": 0.6681, + "step": 54480 + }, + { + "epoch": 2.155075243726394, + "grad_norm": 1.434479465961671, + "learning_rate": 2.2276756689944418e-06, + "loss": 0.6642, + "step": 54490 + }, + { + "epoch": 2.155470742945283, + "grad_norm": 1.3994210665916298, + "learning_rate": 2.2257610592969518e-06, + "loss": 0.6798, + "step": 54500 + }, + { + "epoch": 2.155866242164172, + "grad_norm": 1.5036375280952452, + "learning_rate": 2.2238470371357413e-06, + "loss": 0.6636, + "step": 54510 + }, + { + "epoch": 2.156261741383061, + "grad_norm": 1.107226191294125, + "learning_rate": 2.2219336029161665e-06, + "loss": 0.6724, + "step": 54520 + }, + { + "epoch": 2.15665724060195, + "grad_norm": 1.5462140734909817, + "learning_rate": 2.2200207570434584e-06, + "loss": 0.6495, + "step": 54530 + }, + { + "epoch": 2.157052739820839, + "grad_norm": 1.6795116088316382, + "learning_rate": 2.218108499922729e-06, + "loss": 0.673, + "step": 54540 + }, + { + "epoch": 2.157448239039728, + "grad_norm": 1.4596072097923238, + "learning_rate": 2.216196831958957e-06, + "loss": 0.6758, + "step": 54550 + }, + { + "epoch": 2.157843738258617, + "grad_norm": 1.5878795690377225, + "learning_rate": 2.2142857535570055e-06, + "loss": 0.6619, + "step": 54560 + }, + { + "epoch": 2.158239237477506, + "grad_norm": 1.3146101433583148, + "learning_rate": 2.212375265121604e-06, + "loss": 0.6653, + "step": 54570 + }, + { + "epoch": 2.158634736696395, + "grad_norm": 1.4576376459562261, + "learning_rate": 2.210465367057365e-06, + "loss": 0.6385, + "step": 54580 + }, + { + "epoch": 2.159030235915284, + "grad_norm": 1.6223548733116042, + "learning_rate": 2.2085560597687687e-06, + "loss": 0.658, + "step": 54590 + }, + { + "epoch": 2.1594257351341732, + "grad_norm": 1.5426116085171147, + "learning_rate": 2.2066473436601776e-06, + "loss": 0.6571, + "step": 54600 + }, + { + "epoch": 2.1598212343530623, + "grad_norm": 1.4974485469410952, + "learning_rate": 2.2047392191358208e-06, + "loss": 0.644, + "step": 54610 + }, + { + "epoch": 2.1602167335719513, + "grad_norm": 1.195370393755823, + "learning_rate": 2.20283168659981e-06, + "loss": 0.6801, + "step": 54620 + }, + { + "epoch": 2.1606122327908404, + "grad_norm": 1.3148991024053085, + "learning_rate": 2.2009247464561266e-06, + "loss": 0.6597, + "step": 54630 + }, + { + "epoch": 2.1610077320097294, + "grad_norm": 1.3794626140536659, + "learning_rate": 2.1990183991086277e-06, + "loss": 0.6622, + "step": 54640 + }, + { + "epoch": 2.1614032312286184, + "grad_norm": 1.7012542168239617, + "learning_rate": 2.197112644961043e-06, + "loss": 0.6482, + "step": 54650 + }, + { + "epoch": 2.1617987304475075, + "grad_norm": 1.4982081528093467, + "learning_rate": 2.1952074844169823e-06, + "loss": 0.6591, + "step": 54660 + }, + { + "epoch": 2.1621942296663965, + "grad_norm": 1.5420257954861227, + "learning_rate": 2.1933029178799225e-06, + "loss": 0.6496, + "step": 54670 + }, + { + "epoch": 2.1625897288852856, + "grad_norm": 1.1460707043086187, + "learning_rate": 2.1913989457532213e-06, + "loss": 0.6702, + "step": 54680 + }, + { + "epoch": 2.1629852281041746, + "grad_norm": 1.6270352799197734, + "learning_rate": 2.1894955684401064e-06, + "loss": 0.6909, + "step": 54690 + }, + { + "epoch": 2.1633807273230636, + "grad_norm": 1.3069684359085167, + "learning_rate": 2.1875927863436776e-06, + "loss": 0.6557, + "step": 54700 + }, + { + "epoch": 2.1637762265419527, + "grad_norm": 1.2356279574277762, + "learning_rate": 2.1856905998669166e-06, + "loss": 0.6756, + "step": 54710 + }, + { + "epoch": 2.1641717257608417, + "grad_norm": 1.4164068573765753, + "learning_rate": 2.1837890094126685e-06, + "loss": 0.6541, + "step": 54720 + }, + { + "epoch": 2.1645672249797308, + "grad_norm": 1.491296864607764, + "learning_rate": 2.1818880153836624e-06, + "loss": 0.6276, + "step": 54730 + }, + { + "epoch": 2.16496272419862, + "grad_norm": 1.3115475109060677, + "learning_rate": 2.179987618182493e-06, + "loss": 0.6836, + "step": 54740 + }, + { + "epoch": 2.165358223417509, + "grad_norm": 1.5795478878929916, + "learning_rate": 2.1780878182116346e-06, + "loss": 0.6889, + "step": 54750 + }, + { + "epoch": 2.165753722636398, + "grad_norm": 1.627230469850056, + "learning_rate": 2.17618861587343e-06, + "loss": 0.6778, + "step": 54760 + }, + { + "epoch": 2.166149221855287, + "grad_norm": 1.844227277785479, + "learning_rate": 2.1742900115700993e-06, + "loss": 0.6392, + "step": 54770 + }, + { + "epoch": 2.166544721074176, + "grad_norm": 1.2597116520771612, + "learning_rate": 2.1723920057037318e-06, + "loss": 0.684, + "step": 54780 + }, + { + "epoch": 2.166940220293065, + "grad_norm": 1.6131081013569701, + "learning_rate": 2.170494598676296e-06, + "loss": 0.6809, + "step": 54790 + }, + { + "epoch": 2.167335719511954, + "grad_norm": 1.4791812610230533, + "learning_rate": 2.1685977908896265e-06, + "loss": 0.6749, + "step": 54800 + }, + { + "epoch": 2.167731218730843, + "grad_norm": 1.2586990219671308, + "learning_rate": 2.166701582745439e-06, + "loss": 0.6557, + "step": 54810 + }, + { + "epoch": 2.168126717949732, + "grad_norm": 1.2427136825322551, + "learning_rate": 2.1648059746453136e-06, + "loss": 0.6685, + "step": 54820 + }, + { + "epoch": 2.168522217168621, + "grad_norm": 1.402752931165652, + "learning_rate": 2.1629109669907116e-06, + "loss": 0.6413, + "step": 54830 + }, + { + "epoch": 2.16891771638751, + "grad_norm": 1.3592600023884258, + "learning_rate": 2.161016560182959e-06, + "loss": 0.65, + "step": 54840 + }, + { + "epoch": 2.1693132156063992, + "grad_norm": 1.6332465760251773, + "learning_rate": 2.1591227546232633e-06, + "loss": 0.662, + "step": 54850 + }, + { + "epoch": 2.1697087148252883, + "grad_norm": 1.3031249624795824, + "learning_rate": 2.157229550712696e-06, + "loss": 0.6961, + "step": 54860 + }, + { + "epoch": 2.1701042140441773, + "grad_norm": 1.560155811013162, + "learning_rate": 2.1553369488522084e-06, + "loss": 0.6585, + "step": 54870 + }, + { + "epoch": 2.1704997132630663, + "grad_norm": 1.3158720730677125, + "learning_rate": 2.1534449494426203e-06, + "loss": 0.6866, + "step": 54880 + }, + { + "epoch": 2.1708952124819554, + "grad_norm": 1.3360160633558826, + "learning_rate": 2.1515535528846238e-06, + "loss": 0.6659, + "step": 54890 + }, + { + "epoch": 2.1712907117008444, + "grad_norm": 1.803842540897346, + "learning_rate": 2.1496627595787827e-06, + "loss": 0.6188, + "step": 54900 + }, + { + "epoch": 2.1716862109197335, + "grad_norm": 1.48380766661754, + "learning_rate": 2.1477725699255384e-06, + "loss": 0.65, + "step": 54910 + }, + { + "epoch": 2.1720817101386225, + "grad_norm": 1.6226487076725338, + "learning_rate": 2.1458829843251973e-06, + "loss": 0.6762, + "step": 54920 + }, + { + "epoch": 2.1724772093575115, + "grad_norm": 1.3850841981657511, + "learning_rate": 2.1439940031779443e-06, + "loss": 0.6611, + "step": 54930 + }, + { + "epoch": 2.1728727085764006, + "grad_norm": 1.5037113775171946, + "learning_rate": 2.1421056268838324e-06, + "loss": 0.6585, + "step": 54940 + }, + { + "epoch": 2.1732682077952896, + "grad_norm": 1.2312445835536803, + "learning_rate": 2.1402178558427846e-06, + "loss": 0.6567, + "step": 54950 + }, + { + "epoch": 2.1736637070141787, + "grad_norm": 1.6411216422318087, + "learning_rate": 2.1383306904546027e-06, + "loss": 0.6845, + "step": 54960 + }, + { + "epoch": 2.1740592062330677, + "grad_norm": 1.6024632241331704, + "learning_rate": 2.1364441311189515e-06, + "loss": 0.6691, + "step": 54970 + }, + { + "epoch": 2.1744547054519567, + "grad_norm": 1.3754437725374022, + "learning_rate": 2.1345581782353765e-06, + "loss": 0.6763, + "step": 54980 + }, + { + "epoch": 2.1748502046708458, + "grad_norm": 1.5567165298381425, + "learning_rate": 2.1326728322032863e-06, + "loss": 0.6578, + "step": 54990 + }, + { + "epoch": 2.175245703889735, + "grad_norm": 1.2703706760644615, + "learning_rate": 2.13078809342197e-06, + "loss": 0.691, + "step": 55000 + }, + { + "epoch": 2.175641203108624, + "grad_norm": 1.6043818402896892, + "learning_rate": 2.128903962290576e-06, + "loss": 0.6799, + "step": 55010 + }, + { + "epoch": 2.176036702327513, + "grad_norm": 1.4042953948996555, + "learning_rate": 2.1270204392081366e-06, + "loss": 0.6664, + "step": 55020 + }, + { + "epoch": 2.176432201546402, + "grad_norm": 1.3462383248655843, + "learning_rate": 2.1251375245735463e-06, + "loss": 0.6546, + "step": 55030 + }, + { + "epoch": 2.176827700765291, + "grad_norm": 1.8395580276812962, + "learning_rate": 2.123255218785577e-06, + "loss": 0.6419, + "step": 55040 + }, + { + "epoch": 2.17722319998418, + "grad_norm": 1.4291423292389434, + "learning_rate": 2.121373522242866e-06, + "loss": 0.6637, + "step": 55050 + }, + { + "epoch": 2.177618699203069, + "grad_norm": 1.2932672609945908, + "learning_rate": 2.119492435343927e-06, + "loss": 0.6965, + "step": 55060 + }, + { + "epoch": 2.178014198421958, + "grad_norm": 1.4295757892204086, + "learning_rate": 2.1176119584871395e-06, + "loss": 0.6599, + "step": 55070 + }, + { + "epoch": 2.178409697640847, + "grad_norm": 1.3950743531240286, + "learning_rate": 2.1157320920707593e-06, + "loss": 0.6386, + "step": 55080 + }, + { + "epoch": 2.178805196859736, + "grad_norm": 1.4170559422726166, + "learning_rate": 2.113852836492906e-06, + "loss": 0.6506, + "step": 55090 + }, + { + "epoch": 2.179200696078625, + "grad_norm": 1.2460317146698219, + "learning_rate": 2.111974192151578e-06, + "loss": 0.6644, + "step": 55100 + }, + { + "epoch": 2.1795961952975142, + "grad_norm": 1.5005435649255958, + "learning_rate": 2.1100961594446377e-06, + "loss": 0.6739, + "step": 55110 + }, + { + "epoch": 2.1799916945164033, + "grad_norm": 1.337576581434563, + "learning_rate": 2.1082187387698212e-06, + "loss": 0.6743, + "step": 55120 + }, + { + "epoch": 2.1803871937352923, + "grad_norm": 1.542551486678024, + "learning_rate": 2.106341930524731e-06, + "loss": 0.6708, + "step": 55130 + }, + { + "epoch": 2.1807826929541814, + "grad_norm": 1.3268083381965459, + "learning_rate": 2.1044657351068467e-06, + "loss": 0.6705, + "step": 55140 + }, + { + "epoch": 2.1811781921730704, + "grad_norm": 1.4727253360596646, + "learning_rate": 2.1025901529135113e-06, + "loss": 0.6815, + "step": 55150 + }, + { + "epoch": 2.1815736913919594, + "grad_norm": 1.6765978470726397, + "learning_rate": 2.1007151843419443e-06, + "loss": 0.6251, + "step": 55160 + }, + { + "epoch": 2.1819691906108485, + "grad_norm": 1.525952958619666, + "learning_rate": 2.09884082978923e-06, + "loss": 0.6466, + "step": 55170 + }, + { + "epoch": 2.1823646898297375, + "grad_norm": 1.4821454817183586, + "learning_rate": 2.0969670896523233e-06, + "loss": 0.6569, + "step": 55180 + }, + { + "epoch": 2.1827601890486266, + "grad_norm": 1.473021833103436, + "learning_rate": 2.095093964328053e-06, + "loss": 0.6665, + "step": 55190 + }, + { + "epoch": 2.1831556882675156, + "grad_norm": 1.5388775406837523, + "learning_rate": 2.0932214542131125e-06, + "loss": 0.6641, + "step": 55200 + }, + { + "epoch": 2.1835511874864046, + "grad_norm": 1.4147031653262039, + "learning_rate": 2.09134955970407e-06, + "loss": 0.6556, + "step": 55210 + }, + { + "epoch": 2.1839466867052937, + "grad_norm": 1.4589106048394442, + "learning_rate": 2.0894782811973574e-06, + "loss": 0.6588, + "step": 55220 + }, + { + "epoch": 2.1843421859241827, + "grad_norm": 1.4575392943643484, + "learning_rate": 2.087607619089283e-06, + "loss": 0.6524, + "step": 55230 + }, + { + "epoch": 2.1847376851430718, + "grad_norm": 1.4381784157098458, + "learning_rate": 2.0857375737760194e-06, + "loss": 0.6589, + "step": 55240 + }, + { + "epoch": 2.185133184361961, + "grad_norm": 1.815968991448052, + "learning_rate": 2.0838681456536096e-06, + "loss": 0.6614, + "step": 55250 + }, + { + "epoch": 2.18552868358085, + "grad_norm": 1.4321266325091235, + "learning_rate": 2.081999335117965e-06, + "loss": 0.6887, + "step": 55260 + }, + { + "epoch": 2.185924182799739, + "grad_norm": 1.5833569557253544, + "learning_rate": 2.0801311425648718e-06, + "loss": 0.6591, + "step": 55270 + }, + { + "epoch": 2.186319682018628, + "grad_norm": 1.349337713206238, + "learning_rate": 2.078263568389977e-06, + "loss": 0.6982, + "step": 55280 + }, + { + "epoch": 2.186715181237517, + "grad_norm": 1.28846024488585, + "learning_rate": 2.076396612988804e-06, + "loss": 0.6721, + "step": 55290 + }, + { + "epoch": 2.187110680456406, + "grad_norm": 1.7136283349944543, + "learning_rate": 2.074530276756739e-06, + "loss": 0.6627, + "step": 55300 + }, + { + "epoch": 2.187506179675295, + "grad_norm": 1.3837230247038177, + "learning_rate": 2.0726645600890438e-06, + "loss": 0.6371, + "step": 55310 + }, + { + "epoch": 2.187901678894184, + "grad_norm": 1.2763360790287788, + "learning_rate": 2.070799463380841e-06, + "loss": 0.6734, + "step": 55320 + }, + { + "epoch": 2.188297178113073, + "grad_norm": 1.1839243288028374, + "learning_rate": 2.0689349870271302e-06, + "loss": 0.6763, + "step": 55330 + }, + { + "epoch": 2.188692677331962, + "grad_norm": 1.3844961064365453, + "learning_rate": 2.0670711314227737e-06, + "loss": 0.6737, + "step": 55340 + }, + { + "epoch": 2.189088176550851, + "grad_norm": 1.3543975863261621, + "learning_rate": 2.065207896962502e-06, + "loss": 0.6876, + "step": 55350 + }, + { + "epoch": 2.1894836757697402, + "grad_norm": 1.6200709306981103, + "learning_rate": 2.0633452840409203e-06, + "loss": 0.6677, + "step": 55360 + }, + { + "epoch": 2.1898791749886293, + "grad_norm": 1.3088681856730384, + "learning_rate": 2.0614832930524956e-06, + "loss": 0.6628, + "step": 55370 + }, + { + "epoch": 2.1902746742075183, + "grad_norm": 1.585168510119543, + "learning_rate": 2.0596219243915648e-06, + "loss": 0.6618, + "step": 55380 + }, + { + "epoch": 2.1906701734264074, + "grad_norm": 1.415108320183506, + "learning_rate": 2.0577611784523376e-06, + "loss": 0.677, + "step": 55390 + }, + { + "epoch": 2.1910656726452964, + "grad_norm": 1.378881458503356, + "learning_rate": 2.0559010556288853e-06, + "loss": 0.6895, + "step": 55400 + }, + { + "epoch": 2.1914611718641854, + "grad_norm": 1.7175462402128503, + "learning_rate": 2.054041556315149e-06, + "loss": 0.6558, + "step": 55410 + }, + { + "epoch": 2.1918566710830745, + "grad_norm": 1.434742667447602, + "learning_rate": 2.0521826809049415e-06, + "loss": 0.6672, + "step": 55420 + }, + { + "epoch": 2.1922521703019635, + "grad_norm": 1.3923857888628648, + "learning_rate": 2.050324429791938e-06, + "loss": 0.6781, + "step": 55430 + }, + { + "epoch": 2.1926476695208525, + "grad_norm": 1.4864855189505777, + "learning_rate": 2.0484668033696887e-06, + "loss": 0.6445, + "step": 55440 + }, + { + "epoch": 2.1930431687397416, + "grad_norm": 1.514001876148184, + "learning_rate": 2.0466098020316017e-06, + "loss": 0.7041, + "step": 55450 + }, + { + "epoch": 2.1934386679586306, + "grad_norm": 1.4393693179062899, + "learning_rate": 2.0447534261709623e-06, + "loss": 0.6483, + "step": 55460 + }, + { + "epoch": 2.1938341671775197, + "grad_norm": 1.6471815359178719, + "learning_rate": 2.0428976761809156e-06, + "loss": 0.6333, + "step": 55470 + }, + { + "epoch": 2.1942296663964087, + "grad_norm": 1.50965200079764, + "learning_rate": 2.0410425524544813e-06, + "loss": 0.6456, + "step": 55480 + }, + { + "epoch": 2.1946251656152977, + "grad_norm": 1.4271593861899239, + "learning_rate": 2.0391880553845405e-06, + "loss": 0.6774, + "step": 55490 + }, + { + "epoch": 2.195020664834187, + "grad_norm": 1.5024065913579605, + "learning_rate": 2.037334185363845e-06, + "loss": 0.6267, + "step": 55500 + }, + { + "epoch": 2.195416164053076, + "grad_norm": 1.251720669120062, + "learning_rate": 2.03548094278501e-06, + "loss": 0.6988, + "step": 55510 + }, + { + "epoch": 2.195811663271965, + "grad_norm": 1.4577803130311289, + "learning_rate": 2.033628328040525e-06, + "loss": 0.6707, + "step": 55520 + }, + { + "epoch": 2.196207162490854, + "grad_norm": 1.733576936940708, + "learning_rate": 2.031776341522737e-06, + "loss": 0.6667, + "step": 55530 + }, + { + "epoch": 2.196602661709743, + "grad_norm": 1.5985478513808171, + "learning_rate": 2.0299249836238698e-06, + "loss": 0.6611, + "step": 55540 + }, + { + "epoch": 2.196998160928632, + "grad_norm": 1.373277732981005, + "learning_rate": 2.028074254736006e-06, + "loss": 0.6536, + "step": 55550 + }, + { + "epoch": 2.197393660147521, + "grad_norm": 1.4428927938146197, + "learning_rate": 2.026224155251101e-06, + "loss": 0.6344, + "step": 55560 + }, + { + "epoch": 2.19778915936641, + "grad_norm": 1.7125403072980094, + "learning_rate": 2.0243746855609705e-06, + "loss": 0.6641, + "step": 55570 + }, + { + "epoch": 2.198184658585299, + "grad_norm": 1.4616725632791163, + "learning_rate": 2.0225258460573044e-06, + "loss": 0.6744, + "step": 55580 + }, + { + "epoch": 2.198580157804188, + "grad_norm": 1.56280968591115, + "learning_rate": 2.020677637131653e-06, + "loss": 0.6586, + "step": 55590 + }, + { + "epoch": 2.198975657023077, + "grad_norm": 1.3897154816337431, + "learning_rate": 2.0188300591754353e-06, + "loss": 0.7077, + "step": 55600 + }, + { + "epoch": 2.199371156241966, + "grad_norm": 1.671967927327787, + "learning_rate": 2.0169831125799377e-06, + "loss": 0.6727, + "step": 55610 + }, + { + "epoch": 2.1997666554608553, + "grad_norm": 1.6562121360254696, + "learning_rate": 2.0151367977363117e-06, + "loss": 0.6716, + "step": 55620 + }, + { + "epoch": 2.2001621546797443, + "grad_norm": 1.2709020694628932, + "learning_rate": 2.0132911150355744e-06, + "loss": 0.6983, + "step": 55630 + }, + { + "epoch": 2.200557653898634, + "grad_norm": 1.4108853072245306, + "learning_rate": 2.0114460648686083e-06, + "loss": 0.6387, + "step": 55640 + }, + { + "epoch": 2.2009531531175224, + "grad_norm": 1.3144000303238608, + "learning_rate": 2.0096016476261678e-06, + "loss": 0.6377, + "step": 55650 + }, + { + "epoch": 2.201348652336412, + "grad_norm": 1.4126155160785583, + "learning_rate": 2.007757863698864e-06, + "loss": 0.6299, + "step": 55660 + }, + { + "epoch": 2.2017441515553005, + "grad_norm": 1.5177191937709549, + "learning_rate": 2.0059147134771824e-06, + "loss": 0.6471, + "step": 55670 + }, + { + "epoch": 2.20213965077419, + "grad_norm": 1.8833569262949779, + "learning_rate": 2.0040721973514677e-06, + "loss": 0.66, + "step": 55680 + }, + { + "epoch": 2.2025351499930785, + "grad_norm": 1.7941984966864863, + "learning_rate": 2.0022303157119367e-06, + "loss": 0.6219, + "step": 55690 + }, + { + "epoch": 2.202930649211968, + "grad_norm": 1.466832427685248, + "learning_rate": 2.0003890689486643e-06, + "loss": 0.6386, + "step": 55700 + }, + { + "epoch": 2.203326148430857, + "grad_norm": 1.4680888869112423, + "learning_rate": 1.9985484574515993e-06, + "loss": 0.6358, + "step": 55710 + }, + { + "epoch": 2.203721647649746, + "grad_norm": 1.826877541323233, + "learning_rate": 1.996708481610548e-06, + "loss": 0.6808, + "step": 55720 + }, + { + "epoch": 2.204117146868635, + "grad_norm": 1.2128174528183193, + "learning_rate": 1.9948691418151904e-06, + "loss": 0.6708, + "step": 55730 + }, + { + "epoch": 2.204512646087524, + "grad_norm": 1.7102703882369514, + "learning_rate": 1.9930304384550607e-06, + "loss": 0.6459, + "step": 55740 + }, + { + "epoch": 2.204908145306413, + "grad_norm": 1.3154355290699002, + "learning_rate": 1.9911923719195704e-06, + "loss": 0.6455, + "step": 55750 + }, + { + "epoch": 2.2053036445253023, + "grad_norm": 1.7461869371676682, + "learning_rate": 1.989354942597986e-06, + "loss": 0.6769, + "step": 55760 + }, + { + "epoch": 2.2056991437441913, + "grad_norm": 1.6766754589499235, + "learning_rate": 1.9875181508794476e-06, + "loss": 0.647, + "step": 55770 + }, + { + "epoch": 2.2060946429630803, + "grad_norm": 1.3733294435849956, + "learning_rate": 1.9856819971529527e-06, + "loss": 0.6793, + "step": 55780 + }, + { + "epoch": 2.2064901421819694, + "grad_norm": 1.350790783662417, + "learning_rate": 1.983846481807371e-06, + "loss": 0.679, + "step": 55790 + }, + { + "epoch": 2.2068856414008584, + "grad_norm": 1.511280442973517, + "learning_rate": 1.982011605231429e-06, + "loss": 0.6698, + "step": 55800 + }, + { + "epoch": 2.2072811406197475, + "grad_norm": 1.6302616143014053, + "learning_rate": 1.9801773678137266e-06, + "loss": 0.6756, + "step": 55810 + }, + { + "epoch": 2.2076766398386365, + "grad_norm": 1.6669755135186197, + "learning_rate": 1.978343769942721e-06, + "loss": 0.6687, + "step": 55820 + }, + { + "epoch": 2.2080721390575255, + "grad_norm": 1.419657251798545, + "learning_rate": 1.9765108120067355e-06, + "loss": 0.6697, + "step": 55830 + }, + { + "epoch": 2.2084676382764146, + "grad_norm": 1.9387774666590267, + "learning_rate": 1.9746784943939627e-06, + "loss": 0.6129, + "step": 55840 + }, + { + "epoch": 2.2088631374953036, + "grad_norm": 1.4085316120697977, + "learning_rate": 1.972846817492453e-06, + "loss": 0.6548, + "step": 55850 + }, + { + "epoch": 2.2092586367141926, + "grad_norm": 1.3703629194926459, + "learning_rate": 1.9710157816901275e-06, + "loss": 0.6797, + "step": 55860 + }, + { + "epoch": 2.2096541359330817, + "grad_norm": 1.6305671899887957, + "learning_rate": 1.969185387374766e-06, + "loss": 0.665, + "step": 55870 + }, + { + "epoch": 2.2100496351519707, + "grad_norm": 1.4197981325960134, + "learning_rate": 1.967355634934015e-06, + "loss": 0.6645, + "step": 55880 + }, + { + "epoch": 2.2104451343708598, + "grad_norm": 1.4597196294207555, + "learning_rate": 1.9655265247553833e-06, + "loss": 0.6616, + "step": 55890 + }, + { + "epoch": 2.210840633589749, + "grad_norm": 1.4349110107401246, + "learning_rate": 1.9636980572262476e-06, + "loss": 0.6833, + "step": 55900 + }, + { + "epoch": 2.211236132808638, + "grad_norm": 1.3285041627939984, + "learning_rate": 1.9618702327338433e-06, + "loss": 0.6419, + "step": 55910 + }, + { + "epoch": 2.211631632027527, + "grad_norm": 1.336458420742804, + "learning_rate": 1.9600430516652765e-06, + "loss": 0.6548, + "step": 55920 + }, + { + "epoch": 2.212027131246416, + "grad_norm": 1.3712226329795318, + "learning_rate": 1.9582165144075073e-06, + "loss": 0.6715, + "step": 55930 + }, + { + "epoch": 2.212422630465305, + "grad_norm": 1.289725068137227, + "learning_rate": 1.9563906213473705e-06, + "loss": 0.6793, + "step": 55940 + }, + { + "epoch": 2.212818129684194, + "grad_norm": 1.3684855015971884, + "learning_rate": 1.954565372871554e-06, + "loss": 0.637, + "step": 55950 + }, + { + "epoch": 2.213213628903083, + "grad_norm": 1.6592268240201962, + "learning_rate": 1.952740769366619e-06, + "loss": 0.6687, + "step": 55960 + }, + { + "epoch": 2.213609128121972, + "grad_norm": 1.4120912613364356, + "learning_rate": 1.9509168112189804e-06, + "loss": 0.6502, + "step": 55970 + }, + { + "epoch": 2.214004627340861, + "grad_norm": 1.5333297648380488, + "learning_rate": 1.9490934988149275e-06, + "loss": 0.6822, + "step": 55980 + }, + { + "epoch": 2.21440012655975, + "grad_norm": 1.6433836364096805, + "learning_rate": 1.9472708325405993e-06, + "loss": 0.6679, + "step": 55990 + }, + { + "epoch": 2.214795625778639, + "grad_norm": 1.6214430695055573, + "learning_rate": 1.945448812782011e-06, + "loss": 0.6845, + "step": 56000 + }, + { + "epoch": 2.2151911249975282, + "grad_norm": 1.3626081798981615, + "learning_rate": 1.9436274399250303e-06, + "loss": 0.6722, + "step": 56010 + }, + { + "epoch": 2.2155866242164173, + "grad_norm": 1.4780307921497446, + "learning_rate": 1.9418067143553983e-06, + "loss": 0.6636, + "step": 56020 + }, + { + "epoch": 2.2159821234353063, + "grad_norm": 1.4394379393799175, + "learning_rate": 1.9399866364587084e-06, + "loss": 0.6788, + "step": 56030 + }, + { + "epoch": 2.2163776226541954, + "grad_norm": 1.4512876050047627, + "learning_rate": 1.938167206620426e-06, + "loss": 0.6615, + "step": 56040 + }, + { + "epoch": 2.2167731218730844, + "grad_norm": 1.192293141807901, + "learning_rate": 1.936348425225873e-06, + "loss": 0.6657, + "step": 56050 + }, + { + "epoch": 2.2171686210919734, + "grad_norm": 1.307150319908702, + "learning_rate": 1.934530292660235e-06, + "loss": 0.675, + "step": 56060 + }, + { + "epoch": 2.2175641203108625, + "grad_norm": 1.3936515514615915, + "learning_rate": 1.9327128093085647e-06, + "loss": 0.6448, + "step": 56070 + }, + { + "epoch": 2.2179596195297515, + "grad_norm": 1.2604982469995145, + "learning_rate": 1.93089597555577e-06, + "loss": 0.6858, + "step": 56080 + }, + { + "epoch": 2.2183551187486406, + "grad_norm": 1.3388377138240866, + "learning_rate": 1.9290797917866293e-06, + "loss": 0.6614, + "step": 56090 + }, + { + "epoch": 2.2187506179675296, + "grad_norm": 1.5756046515903475, + "learning_rate": 1.927264258385777e-06, + "loss": 0.6348, + "step": 56100 + }, + { + "epoch": 2.2191461171864186, + "grad_norm": 1.266632090976251, + "learning_rate": 1.9254493757377118e-06, + "loss": 0.6523, + "step": 56110 + }, + { + "epoch": 2.2195416164053077, + "grad_norm": 1.3605980661976635, + "learning_rate": 1.9236351442267936e-06, + "loss": 0.6679, + "step": 56120 + }, + { + "epoch": 2.2199371156241967, + "grad_norm": 1.435802616820598, + "learning_rate": 1.9218215642372483e-06, + "loss": 0.6734, + "step": 56130 + }, + { + "epoch": 2.2203326148430858, + "grad_norm": 1.3487513921350804, + "learning_rate": 1.920008636153158e-06, + "loss": 0.6955, + "step": 56140 + }, + { + "epoch": 2.220728114061975, + "grad_norm": 1.3844409532429283, + "learning_rate": 1.918196360358474e-06, + "loss": 0.6598, + "step": 56150 + }, + { + "epoch": 2.221123613280864, + "grad_norm": 1.4692629930619787, + "learning_rate": 1.916384737237001e-06, + "loss": 0.6534, + "step": 56160 + }, + { + "epoch": 2.221519112499753, + "grad_norm": 1.568874716346847, + "learning_rate": 1.914573767172413e-06, + "loss": 0.6384, + "step": 56170 + }, + { + "epoch": 2.221914611718642, + "grad_norm": 1.2888907028910421, + "learning_rate": 1.9127634505482394e-06, + "loss": 0.6895, + "step": 56180 + }, + { + "epoch": 2.222310110937531, + "grad_norm": 1.4315007722405082, + "learning_rate": 1.9109537877478773e-06, + "loss": 0.6883, + "step": 56190 + }, + { + "epoch": 2.22270561015642, + "grad_norm": 1.4925370882725184, + "learning_rate": 1.9091447791545797e-06, + "loss": 0.7027, + "step": 56200 + }, + { + "epoch": 2.223101109375309, + "grad_norm": 1.434823166299685, + "learning_rate": 1.9073364251514658e-06, + "loss": 0.6679, + "step": 56210 + }, + { + "epoch": 2.223496608594198, + "grad_norm": 1.314422053543772, + "learning_rate": 1.9055287261215133e-06, + "loss": 0.6746, + "step": 56220 + }, + { + "epoch": 2.223892107813087, + "grad_norm": 1.4634406846981405, + "learning_rate": 1.9037216824475618e-06, + "loss": 0.6784, + "step": 56230 + }, + { + "epoch": 2.224287607031976, + "grad_norm": 1.7209427741649248, + "learning_rate": 1.9019152945123098e-06, + "loss": 0.6678, + "step": 56240 + }, + { + "epoch": 2.224683106250865, + "grad_norm": 1.3133593028830022, + "learning_rate": 1.900109562698323e-06, + "loss": 0.642, + "step": 56250 + }, + { + "epoch": 2.2250786054697542, + "grad_norm": 1.4217547369672734, + "learning_rate": 1.8983044873880213e-06, + "loss": 0.6278, + "step": 56260 + }, + { + "epoch": 2.2254741046886433, + "grad_norm": 1.2041896901158697, + "learning_rate": 1.8965000689636925e-06, + "loss": 0.6649, + "step": 56270 + }, + { + "epoch": 2.2258696039075323, + "grad_norm": 1.396883148452029, + "learning_rate": 1.8946963078074794e-06, + "loss": 0.6842, + "step": 56280 + }, + { + "epoch": 2.2262651031264213, + "grad_norm": 1.245185383960608, + "learning_rate": 1.8928932043013854e-06, + "loss": 0.662, + "step": 56290 + }, + { + "epoch": 2.2266606023453104, + "grad_norm": 1.4469023243975065, + "learning_rate": 1.891090758827281e-06, + "loss": 0.6654, + "step": 56300 + }, + { + "epoch": 2.2270561015641994, + "grad_norm": 1.3647389039208557, + "learning_rate": 1.88928897176689e-06, + "loss": 0.6447, + "step": 56310 + }, + { + "epoch": 2.2274516007830885, + "grad_norm": 1.258365695865808, + "learning_rate": 1.8874878435018028e-06, + "loss": 0.6634, + "step": 56320 + }, + { + "epoch": 2.2278471000019775, + "grad_norm": 1.2789613698259803, + "learning_rate": 1.8856873744134647e-06, + "loss": 0.6951, + "step": 56330 + }, + { + "epoch": 2.2282425992208665, + "grad_norm": 1.5063845721766689, + "learning_rate": 1.8838875648831874e-06, + "loss": 0.6632, + "step": 56340 + }, + { + "epoch": 2.2286380984397556, + "grad_norm": 1.343985022364594, + "learning_rate": 1.8820884152921382e-06, + "loss": 0.6727, + "step": 56350 + }, + { + "epoch": 2.2290335976586446, + "grad_norm": 1.3195762296427764, + "learning_rate": 1.8802899260213458e-06, + "loss": 0.6633, + "step": 56360 + }, + { + "epoch": 2.2294290968775337, + "grad_norm": 1.4487709596771288, + "learning_rate": 1.878492097451698e-06, + "loss": 0.6842, + "step": 56370 + }, + { + "epoch": 2.2298245960964227, + "grad_norm": 1.238861972100529, + "learning_rate": 1.8766949299639475e-06, + "loss": 0.634, + "step": 56380 + }, + { + "epoch": 2.2302200953153117, + "grad_norm": 1.265566653488306, + "learning_rate": 1.8748984239386996e-06, + "loss": 0.656, + "step": 56390 + }, + { + "epoch": 2.2306155945342008, + "grad_norm": 1.6392009645232655, + "learning_rate": 1.8731025797564278e-06, + "loss": 0.6135, + "step": 56400 + }, + { + "epoch": 2.23101109375309, + "grad_norm": 1.8560676463992838, + "learning_rate": 1.8713073977974572e-06, + "loss": 0.6358, + "step": 56410 + }, + { + "epoch": 2.231406592971979, + "grad_norm": 1.4270879044693834, + "learning_rate": 1.8695128784419803e-06, + "loss": 0.6647, + "step": 56420 + }, + { + "epoch": 2.231802092190868, + "grad_norm": 1.1775420223359605, + "learning_rate": 1.8677190220700419e-06, + "loss": 0.6688, + "step": 56430 + }, + { + "epoch": 2.232197591409757, + "grad_norm": 1.3969876657621385, + "learning_rate": 1.8659258290615535e-06, + "loss": 0.6525, + "step": 56440 + }, + { + "epoch": 2.232593090628646, + "grad_norm": 1.422047648398132, + "learning_rate": 1.8641332997962786e-06, + "loss": 0.681, + "step": 56450 + }, + { + "epoch": 2.232988589847535, + "grad_norm": 1.788866674564538, + "learning_rate": 1.8623414346538488e-06, + "loss": 0.6588, + "step": 56460 + }, + { + "epoch": 2.233384089066424, + "grad_norm": 1.4063625912303486, + "learning_rate": 1.8605502340137483e-06, + "loss": 0.6804, + "step": 56470 + }, + { + "epoch": 2.233779588285313, + "grad_norm": 1.3971012230148743, + "learning_rate": 1.8587596982553224e-06, + "loss": 0.6399, + "step": 56480 + }, + { + "epoch": 2.234175087504202, + "grad_norm": 1.5483857583153908, + "learning_rate": 1.8569698277577746e-06, + "loss": 0.6531, + "step": 56490 + }, + { + "epoch": 2.234570586723091, + "grad_norm": 1.233723600661161, + "learning_rate": 1.8551806229001718e-06, + "loss": 0.6842, + "step": 56500 + }, + { + "epoch": 2.23496608594198, + "grad_norm": 1.6947271254138232, + "learning_rate": 1.8533920840614334e-06, + "loss": 0.6198, + "step": 56510 + }, + { + "epoch": 2.2353615851608692, + "grad_norm": 1.4090932462271977, + "learning_rate": 1.8516042116203452e-06, + "loss": 0.6483, + "step": 56520 + }, + { + "epoch": 2.2357570843797583, + "grad_norm": 1.7536861712117378, + "learning_rate": 1.8498170059555466e-06, + "loss": 0.6525, + "step": 56530 + }, + { + "epoch": 2.2361525835986473, + "grad_norm": 1.4169051713145988, + "learning_rate": 1.8480304674455347e-06, + "loss": 0.6718, + "step": 56540 + }, + { + "epoch": 2.2365480828175364, + "grad_norm": 1.4518560967693332, + "learning_rate": 1.846244596468671e-06, + "loss": 0.66, + "step": 56550 + }, + { + "epoch": 2.2369435820364254, + "grad_norm": 1.321908633996863, + "learning_rate": 1.8444593934031695e-06, + "loss": 0.6525, + "step": 56560 + }, + { + "epoch": 2.2373390812553144, + "grad_norm": 1.4557489658831209, + "learning_rate": 1.8426748586271087e-06, + "loss": 0.6437, + "step": 56570 + }, + { + "epoch": 2.2377345804742035, + "grad_norm": 1.662164731970988, + "learning_rate": 1.8408909925184193e-06, + "loss": 0.6677, + "step": 56580 + }, + { + "epoch": 2.2381300796930925, + "grad_norm": 1.3354636469933834, + "learning_rate": 1.8391077954548992e-06, + "loss": 0.6425, + "step": 56590 + }, + { + "epoch": 2.2385255789119816, + "grad_norm": 2.0407428627736817, + "learning_rate": 1.8373252678141912e-06, + "loss": 0.6464, + "step": 56600 + }, + { + "epoch": 2.2389210781308706, + "grad_norm": 1.4228120371973003, + "learning_rate": 1.8355434099738095e-06, + "loss": 0.6433, + "step": 56610 + }, + { + "epoch": 2.2393165773497596, + "grad_norm": 1.459021274733276, + "learning_rate": 1.8337622223111178e-06, + "loss": 0.6855, + "step": 56620 + }, + { + "epoch": 2.2397120765686487, + "grad_norm": 1.500836335928012, + "learning_rate": 1.8319817052033445e-06, + "loss": 0.6963, + "step": 56630 + }, + { + "epoch": 2.2401075757875377, + "grad_norm": 1.3895081909386187, + "learning_rate": 1.8302018590275694e-06, + "loss": 0.6721, + "step": 56640 + }, + { + "epoch": 2.2405030750064268, + "grad_norm": 1.4772143176780497, + "learning_rate": 1.8284226841607366e-06, + "loss": 0.6814, + "step": 56650 + }, + { + "epoch": 2.240898574225316, + "grad_norm": 1.7482312266724933, + "learning_rate": 1.8266441809796414e-06, + "loss": 0.6662, + "step": 56660 + }, + { + "epoch": 2.241294073444205, + "grad_norm": 1.3033825177029148, + "learning_rate": 1.8248663498609443e-06, + "loss": 0.6581, + "step": 56670 + }, + { + "epoch": 2.241689572663094, + "grad_norm": 1.667824217317311, + "learning_rate": 1.8230891911811554e-06, + "loss": 0.6442, + "step": 56680 + }, + { + "epoch": 2.242085071881983, + "grad_norm": 1.5148632174043146, + "learning_rate": 1.8213127053166496e-06, + "loss": 0.6408, + "step": 56690 + }, + { + "epoch": 2.242480571100872, + "grad_norm": 1.3933994678234267, + "learning_rate": 1.8195368926436558e-06, + "loss": 0.6605, + "step": 56700 + }, + { + "epoch": 2.242876070319761, + "grad_norm": 1.5538457724383805, + "learning_rate": 1.8177617535382592e-06, + "loss": 0.6225, + "step": 56710 + }, + { + "epoch": 2.24327156953865, + "grad_norm": 1.6427098243480491, + "learning_rate": 1.815987288376403e-06, + "loss": 0.6678, + "step": 56720 + }, + { + "epoch": 2.243667068757539, + "grad_norm": 1.7193446250775357, + "learning_rate": 1.8142134975338915e-06, + "loss": 0.657, + "step": 56730 + }, + { + "epoch": 2.244062567976428, + "grad_norm": 1.4362532723815795, + "learning_rate": 1.81244038138638e-06, + "loss": 0.6628, + "step": 56740 + }, + { + "epoch": 2.244458067195317, + "grad_norm": 1.2615105301421554, + "learning_rate": 1.810667940309388e-06, + "loss": 0.6889, + "step": 56750 + }, + { + "epoch": 2.244853566414206, + "grad_norm": 1.4974413493231196, + "learning_rate": 1.8088961746782856e-06, + "loss": 0.6506, + "step": 56760 + }, + { + "epoch": 2.2452490656330952, + "grad_norm": 1.702283462940168, + "learning_rate": 1.8071250848683015e-06, + "loss": 0.6796, + "step": 56770 + }, + { + "epoch": 2.2456445648519843, + "grad_norm": 1.5010915981516817, + "learning_rate": 1.805354671254525e-06, + "loss": 0.6693, + "step": 56780 + }, + { + "epoch": 2.2460400640708733, + "grad_norm": 1.3987135652736051, + "learning_rate": 1.803584934211896e-06, + "loss": 0.6482, + "step": 56790 + }, + { + "epoch": 2.2464355632897623, + "grad_norm": 1.4714821305013848, + "learning_rate": 1.8018158741152181e-06, + "loss": 0.6612, + "step": 56800 + }, + { + "epoch": 2.2468310625086514, + "grad_norm": 1.4467126209922527, + "learning_rate": 1.8000474913391447e-06, + "loss": 0.6767, + "step": 56810 + }, + { + "epoch": 2.2472265617275404, + "grad_norm": 1.3764708445008866, + "learning_rate": 1.7982797862581919e-06, + "loss": 0.6771, + "step": 56820 + }, + { + "epoch": 2.2476220609464295, + "grad_norm": 1.4056222687071145, + "learning_rate": 1.7965127592467264e-06, + "loss": 0.6565, + "step": 56830 + }, + { + "epoch": 2.2480175601653185, + "grad_norm": 1.4447925842674119, + "learning_rate": 1.7947464106789786e-06, + "loss": 0.6598, + "step": 56840 + }, + { + "epoch": 2.2484130593842075, + "grad_norm": 1.6981502707829608, + "learning_rate": 1.7929807409290251e-06, + "loss": 0.6248, + "step": 56850 + }, + { + "epoch": 2.2488085586030966, + "grad_norm": 1.3381738169941404, + "learning_rate": 1.7912157503708089e-06, + "loss": 0.6636, + "step": 56860 + }, + { + "epoch": 2.2492040578219856, + "grad_norm": 1.7812758873685959, + "learning_rate": 1.789451439378122e-06, + "loss": 0.6624, + "step": 56870 + }, + { + "epoch": 2.2495995570408747, + "grad_norm": 1.6468211772174832, + "learning_rate": 1.787687808324618e-06, + "loss": 0.6651, + "step": 56880 + }, + { + "epoch": 2.2499950562597637, + "grad_norm": 1.7019329011960929, + "learning_rate": 1.7859248575838e-06, + "loss": 0.687, + "step": 56890 + }, + { + "epoch": 2.2503905554786527, + "grad_norm": 1.53289615213947, + "learning_rate": 1.7841625875290353e-06, + "loss": 0.6742, + "step": 56900 + }, + { + "epoch": 2.250786054697542, + "grad_norm": 1.442120115058658, + "learning_rate": 1.7824009985335383e-06, + "loss": 0.6698, + "step": 56910 + }, + { + "epoch": 2.251181553916431, + "grad_norm": 1.5389697924827754, + "learning_rate": 1.7806400909703875e-06, + "loss": 0.6705, + "step": 56920 + }, + { + "epoch": 2.2515770531353203, + "grad_norm": 1.4902695257279648, + "learning_rate": 1.77887986521251e-06, + "loss": 0.6564, + "step": 56930 + }, + { + "epoch": 2.251972552354209, + "grad_norm": 1.718848011671999, + "learning_rate": 1.777120321632691e-06, + "loss": 0.6556, + "step": 56940 + }, + { + "epoch": 2.2523680515730984, + "grad_norm": 1.5808295743640888, + "learning_rate": 1.7753614606035746e-06, + "loss": 0.6529, + "step": 56950 + }, + { + "epoch": 2.252763550791987, + "grad_norm": 1.4850794532445288, + "learning_rate": 1.773603282497655e-06, + "loss": 0.6489, + "step": 56960 + }, + { + "epoch": 2.2531590500108765, + "grad_norm": 1.5127981658388805, + "learning_rate": 1.7718457876872841e-06, + "loss": 0.6737, + "step": 56970 + }, + { + "epoch": 2.253554549229765, + "grad_norm": 1.670672164536921, + "learning_rate": 1.7700889765446717e-06, + "loss": 0.6632, + "step": 56980 + }, + { + "epoch": 2.2539500484486545, + "grad_norm": 1.2796211535244366, + "learning_rate": 1.7683328494418777e-06, + "loss": 0.6745, + "step": 56990 + }, + { + "epoch": 2.254345547667543, + "grad_norm": 1.737965518331303, + "learning_rate": 1.7665774067508201e-06, + "loss": 0.6584, + "step": 57000 + }, + { + "epoch": 2.2547410468864326, + "grad_norm": 1.3051597217878708, + "learning_rate": 1.764822648843273e-06, + "loss": 0.657, + "step": 57010 + }, + { + "epoch": 2.255136546105321, + "grad_norm": 1.416979843607973, + "learning_rate": 1.7630685760908623e-06, + "loss": 0.6965, + "step": 57020 + }, + { + "epoch": 2.2555320453242107, + "grad_norm": 1.2901936454592218, + "learning_rate": 1.7613151888650726e-06, + "loss": 0.6432, + "step": 57030 + }, + { + "epoch": 2.2559275445430993, + "grad_norm": 1.6155756378778239, + "learning_rate": 1.7595624875372385e-06, + "loss": 0.6664, + "step": 57040 + }, + { + "epoch": 2.256323043761989, + "grad_norm": 1.4291756886832887, + "learning_rate": 1.7578104724785556e-06, + "loss": 0.6671, + "step": 57050 + }, + { + "epoch": 2.2567185429808774, + "grad_norm": 1.9051673408208258, + "learning_rate": 1.7560591440600665e-06, + "loss": 0.6221, + "step": 57060 + }, + { + "epoch": 2.257114042199767, + "grad_norm": 1.4345013343982636, + "learning_rate": 1.7543085026526774e-06, + "loss": 0.6662, + "step": 57070 + }, + { + "epoch": 2.2575095414186555, + "grad_norm": 1.4066077448653886, + "learning_rate": 1.7525585486271412e-06, + "loss": 0.6867, + "step": 57080 + }, + { + "epoch": 2.257905040637545, + "grad_norm": 1.2310540951399676, + "learning_rate": 1.750809282354069e-06, + "loss": 0.6592, + "step": 57090 + }, + { + "epoch": 2.258300539856434, + "grad_norm": 1.479782097624296, + "learning_rate": 1.7490607042039226e-06, + "loss": 0.6496, + "step": 57100 + }, + { + "epoch": 2.258696039075323, + "grad_norm": 1.6279422057663602, + "learning_rate": 1.7473128145470258e-06, + "loss": 0.6322, + "step": 57110 + }, + { + "epoch": 2.259091538294212, + "grad_norm": 1.406948905517128, + "learning_rate": 1.7455656137535471e-06, + "loss": 0.6725, + "step": 57120 + }, + { + "epoch": 2.259487037513101, + "grad_norm": 1.2998861084656765, + "learning_rate": 1.743819102193518e-06, + "loss": 0.639, + "step": 57130 + }, + { + "epoch": 2.25988253673199, + "grad_norm": 1.4955724776244252, + "learning_rate": 1.742073280236815e-06, + "loss": 0.6693, + "step": 57140 + }, + { + "epoch": 2.260278035950879, + "grad_norm": 1.5301768579838604, + "learning_rate": 1.740328148253178e-06, + "loss": 0.6483, + "step": 57150 + }, + { + "epoch": 2.260673535169768, + "grad_norm": 1.4628432047613005, + "learning_rate": 1.7385837066121924e-06, + "loss": 0.6426, + "step": 57160 + }, + { + "epoch": 2.2610690343886573, + "grad_norm": 1.5267684020560457, + "learning_rate": 1.7368399556833043e-06, + "loss": 0.6649, + "step": 57170 + }, + { + "epoch": 2.2614645336075463, + "grad_norm": 1.1257282297719096, + "learning_rate": 1.7350968958358083e-06, + "loss": 0.6685, + "step": 57180 + }, + { + "epoch": 2.2618600328264353, + "grad_norm": 1.3845602174893739, + "learning_rate": 1.733354527438853e-06, + "loss": 0.6435, + "step": 57190 + }, + { + "epoch": 2.2622555320453244, + "grad_norm": 1.7651997036133584, + "learning_rate": 1.731612850861446e-06, + "loss": 0.6684, + "step": 57200 + }, + { + "epoch": 2.2626510312642134, + "grad_norm": 1.6026459719233057, + "learning_rate": 1.7298718664724423e-06, + "loss": 0.6374, + "step": 57210 + }, + { + "epoch": 2.2630465304831024, + "grad_norm": 1.4384708999583027, + "learning_rate": 1.7281315746405526e-06, + "loss": 0.6769, + "step": 57220 + }, + { + "epoch": 2.2634420297019915, + "grad_norm": 1.3515263856584332, + "learning_rate": 1.72639197573434e-06, + "loss": 0.6841, + "step": 57230 + }, + { + "epoch": 2.2638375289208805, + "grad_norm": 1.594954434061927, + "learning_rate": 1.724653070122224e-06, + "loss": 0.6755, + "step": 57240 + }, + { + "epoch": 2.2642330281397696, + "grad_norm": 1.5447594339387254, + "learning_rate": 1.7229148581724726e-06, + "loss": 0.6648, + "step": 57250 + }, + { + "epoch": 2.2646285273586586, + "grad_norm": 1.6356337120313031, + "learning_rate": 1.7211773402532123e-06, + "loss": 0.6459, + "step": 57260 + }, + { + "epoch": 2.2650240265775476, + "grad_norm": 1.250288578927964, + "learning_rate": 1.7194405167324156e-06, + "loss": 0.6581, + "step": 57270 + }, + { + "epoch": 2.2654195257964367, + "grad_norm": 1.6810977534904172, + "learning_rate": 1.7177043879779171e-06, + "loss": 0.6659, + "step": 57280 + }, + { + "epoch": 2.2658150250153257, + "grad_norm": 1.6086514165376045, + "learning_rate": 1.7159689543573937e-06, + "loss": 0.6561, + "step": 57290 + }, + { + "epoch": 2.2662105242342148, + "grad_norm": 1.2587524167102466, + "learning_rate": 1.7142342162383852e-06, + "loss": 0.6332, + "step": 57300 + }, + { + "epoch": 2.266606023453104, + "grad_norm": 1.5816611326839283, + "learning_rate": 1.7125001739882757e-06, + "loss": 0.6879, + "step": 57310 + }, + { + "epoch": 2.267001522671993, + "grad_norm": 1.5914852857365567, + "learning_rate": 1.7107668279743084e-06, + "loss": 0.6481, + "step": 57320 + }, + { + "epoch": 2.267397021890882, + "grad_norm": 1.2157033436451423, + "learning_rate": 1.7090341785635757e-06, + "loss": 0.6948, + "step": 57330 + }, + { + "epoch": 2.267792521109771, + "grad_norm": 1.7803246563676012, + "learning_rate": 1.7073022261230226e-06, + "loss": 0.6538, + "step": 57340 + }, + { + "epoch": 2.26818802032866, + "grad_norm": 1.3001154680395732, + "learning_rate": 1.7055709710194452e-06, + "loss": 0.6846, + "step": 57350 + }, + { + "epoch": 2.268583519547549, + "grad_norm": 1.6657573371342396, + "learning_rate": 1.7038404136194965e-06, + "loss": 0.6423, + "step": 57360 + }, + { + "epoch": 2.268979018766438, + "grad_norm": 1.2987266925352685, + "learning_rate": 1.7021105542896765e-06, + "loss": 0.6781, + "step": 57370 + }, + { + "epoch": 2.269374517985327, + "grad_norm": 1.7055843078501114, + "learning_rate": 1.7003813933963426e-06, + "loss": 0.6559, + "step": 57380 + }, + { + "epoch": 2.269770017204216, + "grad_norm": 1.3833483045606862, + "learning_rate": 1.6986529313056982e-06, + "loss": 0.6719, + "step": 57390 + }, + { + "epoch": 2.270165516423105, + "grad_norm": 1.3986479432025962, + "learning_rate": 1.6969251683838057e-06, + "loss": 0.671, + "step": 57400 + }, + { + "epoch": 2.270561015641994, + "grad_norm": 1.598064121143924, + "learning_rate": 1.6951981049965732e-06, + "loss": 0.662, + "step": 57410 + }, + { + "epoch": 2.2709565148608832, + "grad_norm": 1.3996721457511865, + "learning_rate": 1.6934717415097618e-06, + "loss": 0.6427, + "step": 57420 + }, + { + "epoch": 2.2713520140797723, + "grad_norm": 1.3100170458920979, + "learning_rate": 1.6917460782889893e-06, + "loss": 0.6628, + "step": 57430 + }, + { + "epoch": 2.2717475132986613, + "grad_norm": 1.7192907129857418, + "learning_rate": 1.6900211156997182e-06, + "loss": 0.6303, + "step": 57440 + }, + { + "epoch": 2.2721430125175504, + "grad_norm": 1.4497992112254119, + "learning_rate": 1.6882968541072698e-06, + "loss": 0.636, + "step": 57450 + }, + { + "epoch": 2.2725385117364394, + "grad_norm": 1.424517628367475, + "learning_rate": 1.6865732938768103e-06, + "loss": 0.6761, + "step": 57460 + }, + { + "epoch": 2.2729340109553284, + "grad_norm": 1.7028883652228886, + "learning_rate": 1.6848504353733607e-06, + "loss": 0.6329, + "step": 57470 + }, + { + "epoch": 2.2733295101742175, + "grad_norm": 1.3406169916259694, + "learning_rate": 1.683128278961792e-06, + "loss": 0.6452, + "step": 57480 + }, + { + "epoch": 2.2737250093931065, + "grad_norm": 1.4435983804456758, + "learning_rate": 1.68140682500683e-06, + "loss": 0.6393, + "step": 57490 + }, + { + "epoch": 2.2741205086119956, + "grad_norm": 1.7632535182341844, + "learning_rate": 1.679686073873046e-06, + "loss": 0.6426, + "step": 57500 + }, + { + "epoch": 2.2745160078308846, + "grad_norm": 1.4736549012581635, + "learning_rate": 1.6779660259248693e-06, + "loss": 0.6398, + "step": 57510 + }, + { + "epoch": 2.2749115070497736, + "grad_norm": 1.4868355896250138, + "learning_rate": 1.6762466815265722e-06, + "loss": 0.6165, + "step": 57520 + }, + { + "epoch": 2.2753070062686627, + "grad_norm": 1.3288081421276643, + "learning_rate": 1.674528041042287e-06, + "loss": 0.6285, + "step": 57530 + }, + { + "epoch": 2.2757025054875517, + "grad_norm": 1.5871980916283486, + "learning_rate": 1.6728101048359884e-06, + "loss": 0.6548, + "step": 57540 + }, + { + "epoch": 2.2760980047064407, + "grad_norm": 1.2992695669355179, + "learning_rate": 1.6710928732715093e-06, + "loss": 0.6624, + "step": 57550 + }, + { + "epoch": 2.27649350392533, + "grad_norm": 1.6214569682743214, + "learning_rate": 1.6693763467125262e-06, + "loss": 0.6733, + "step": 57560 + }, + { + "epoch": 2.276889003144219, + "grad_norm": 1.5715821537528092, + "learning_rate": 1.6676605255225753e-06, + "loss": 0.6703, + "step": 57570 + }, + { + "epoch": 2.277284502363108, + "grad_norm": 1.2825438695845668, + "learning_rate": 1.6659454100650318e-06, + "loss": 0.6578, + "step": 57580 + }, + { + "epoch": 2.277680001581997, + "grad_norm": 1.4912912567077334, + "learning_rate": 1.664231000703132e-06, + "loss": 0.6604, + "step": 57590 + }, + { + "epoch": 2.278075500800886, + "grad_norm": 1.3372721678849402, + "learning_rate": 1.662517297799956e-06, + "loss": 0.6379, + "step": 57600 + }, + { + "epoch": 2.278471000019775, + "grad_norm": 1.3510983600573596, + "learning_rate": 1.6608043017184395e-06, + "loss": 0.6624, + "step": 57610 + }, + { + "epoch": 2.278866499238664, + "grad_norm": 1.491593008341789, + "learning_rate": 1.6590920128213623e-06, + "loss": 0.6824, + "step": 57620 + }, + { + "epoch": 2.279261998457553, + "grad_norm": 1.2950374325888685, + "learning_rate": 1.6573804314713616e-06, + "loss": 0.6708, + "step": 57630 + }, + { + "epoch": 2.279657497676442, + "grad_norm": 1.414082697029235, + "learning_rate": 1.655669558030919e-06, + "loss": 0.688, + "step": 57640 + }, + { + "epoch": 2.280052996895331, + "grad_norm": 1.5270323552824139, + "learning_rate": 1.653959392862367e-06, + "loss": 0.6371, + "step": 57650 + }, + { + "epoch": 2.28044849611422, + "grad_norm": 1.6408441809043868, + "learning_rate": 1.6522499363278915e-06, + "loss": 0.654, + "step": 57660 + }, + { + "epoch": 2.280843995333109, + "grad_norm": 1.393915299510681, + "learning_rate": 1.6505411887895245e-06, + "loss": 0.6597, + "step": 57670 + }, + { + "epoch": 2.2812394945519983, + "grad_norm": 1.567641139918831, + "learning_rate": 1.648833150609151e-06, + "loss": 0.6796, + "step": 57680 + }, + { + "epoch": 2.2816349937708873, + "grad_norm": 1.4602377625263734, + "learning_rate": 1.6471258221485037e-06, + "loss": 0.653, + "step": 57690 + }, + { + "epoch": 2.2820304929897763, + "grad_norm": 1.6635297503480375, + "learning_rate": 1.6454192037691653e-06, + "loss": 0.6897, + "step": 57700 + }, + { + "epoch": 2.2824259922086654, + "grad_norm": 1.2434893150116195, + "learning_rate": 1.6437132958325663e-06, + "loss": 0.66, + "step": 57710 + }, + { + "epoch": 2.2828214914275544, + "grad_norm": 1.1860223731327566, + "learning_rate": 1.6420080986999925e-06, + "loss": 0.6566, + "step": 57720 + }, + { + "epoch": 2.2832169906464435, + "grad_norm": 1.5192886544249655, + "learning_rate": 1.6403036127325723e-06, + "loss": 0.6671, + "step": 57730 + }, + { + "epoch": 2.2836124898653325, + "grad_norm": 1.4677915420538692, + "learning_rate": 1.6385998382912892e-06, + "loss": 0.6392, + "step": 57740 + }, + { + "epoch": 2.2840079890842215, + "grad_norm": 1.2732380959663254, + "learning_rate": 1.6368967757369708e-06, + "loss": 0.6447, + "step": 57750 + }, + { + "epoch": 2.2844034883031106, + "grad_norm": 1.2439017697047998, + "learning_rate": 1.6351944254302993e-06, + "loss": 0.6778, + "step": 57760 + }, + { + "epoch": 2.2847989875219996, + "grad_norm": 1.7164507940755784, + "learning_rate": 1.6334927877318008e-06, + "loss": 0.6557, + "step": 57770 + }, + { + "epoch": 2.2851944867408887, + "grad_norm": 1.6023879301931152, + "learning_rate": 1.6317918630018552e-06, + "loss": 0.6529, + "step": 57780 + }, + { + "epoch": 2.2855899859597777, + "grad_norm": 1.3174525669716117, + "learning_rate": 1.6300916516006871e-06, + "loss": 0.6571, + "step": 57790 + }, + { + "epoch": 2.2859854851786667, + "grad_norm": 1.5634580441480015, + "learning_rate": 1.628392153888375e-06, + "loss": 0.6656, + "step": 57800 + }, + { + "epoch": 2.2863809843975558, + "grad_norm": 1.5002028902256408, + "learning_rate": 1.62669337022484e-06, + "loss": 0.6628, + "step": 57810 + }, + { + "epoch": 2.286776483616445, + "grad_norm": 1.653627348827789, + "learning_rate": 1.62499530096986e-06, + "loss": 0.6397, + "step": 57820 + }, + { + "epoch": 2.287171982835334, + "grad_norm": 1.3228551588943012, + "learning_rate": 1.6232979464830512e-06, + "loss": 0.6952, + "step": 57830 + }, + { + "epoch": 2.287567482054223, + "grad_norm": 1.5451789610980418, + "learning_rate": 1.6216013071238884e-06, + "loss": 0.678, + "step": 57840 + }, + { + "epoch": 2.287962981273112, + "grad_norm": 1.6273381667535372, + "learning_rate": 1.6199053832516875e-06, + "loss": 0.634, + "step": 57850 + }, + { + "epoch": 2.288358480492001, + "grad_norm": 1.1599876504475686, + "learning_rate": 1.6182101752256201e-06, + "loss": 0.6506, + "step": 57860 + }, + { + "epoch": 2.28875397971089, + "grad_norm": 1.5176658895387827, + "learning_rate": 1.6165156834046996e-06, + "loss": 0.6808, + "step": 57870 + }, + { + "epoch": 2.289149478929779, + "grad_norm": 1.6738101398943044, + "learning_rate": 1.6148219081477901e-06, + "loss": 0.6563, + "step": 57880 + }, + { + "epoch": 2.289544978148668, + "grad_norm": 1.484106754214158, + "learning_rate": 1.613128849813606e-06, + "loss": 0.6912, + "step": 57890 + }, + { + "epoch": 2.289940477367557, + "grad_norm": 2.1509959343783, + "learning_rate": 1.6114365087607053e-06, + "loss": 0.6071, + "step": 57900 + }, + { + "epoch": 2.290335976586446, + "grad_norm": 1.323748398522322, + "learning_rate": 1.6097448853475e-06, + "loss": 0.6647, + "step": 57910 + }, + { + "epoch": 2.290731475805335, + "grad_norm": 1.821699853039601, + "learning_rate": 1.6080539799322442e-06, + "loss": 0.6669, + "step": 57920 + }, + { + "epoch": 2.2911269750242242, + "grad_norm": 1.4426798718781264, + "learning_rate": 1.6063637928730457e-06, + "loss": 0.6681, + "step": 57930 + }, + { + "epoch": 2.2915224742431133, + "grad_norm": 1.6176028663896787, + "learning_rate": 1.6046743245278556e-06, + "loss": 0.6643, + "step": 57940 + }, + { + "epoch": 2.2919179734620023, + "grad_norm": 1.5368376892843394, + "learning_rate": 1.6029855752544737e-06, + "loss": 0.6568, + "step": 57950 + }, + { + "epoch": 2.2923134726808914, + "grad_norm": 1.6495477891397017, + "learning_rate": 1.6012975454105472e-06, + "loss": 0.6708, + "step": 57960 + }, + { + "epoch": 2.2927089718997804, + "grad_norm": 1.675339368626984, + "learning_rate": 1.5996102353535753e-06, + "loss": 0.6371, + "step": 57970 + }, + { + "epoch": 2.2931044711186694, + "grad_norm": 1.418447469175257, + "learning_rate": 1.5979236454408975e-06, + "loss": 0.6489, + "step": 57980 + }, + { + "epoch": 2.2934999703375585, + "grad_norm": 1.863394765318512, + "learning_rate": 1.5962377760297083e-06, + "loss": 0.6536, + "step": 57990 + }, + { + "epoch": 2.2938954695564475, + "grad_norm": 1.5632786137165733, + "learning_rate": 1.5945526274770423e-06, + "loss": 0.6569, + "step": 58000 + }, + { + "epoch": 2.2942909687753366, + "grad_norm": 1.664471452019867, + "learning_rate": 1.592868200139789e-06, + "loss": 0.623, + "step": 58010 + }, + { + "epoch": 2.2946864679942256, + "grad_norm": 1.3245194241777019, + "learning_rate": 1.5911844943746774e-06, + "loss": 0.6561, + "step": 58020 + }, + { + "epoch": 2.2950819672131146, + "grad_norm": 1.3490462723433019, + "learning_rate": 1.5895015105382915e-06, + "loss": 0.6353, + "step": 58030 + }, + { + "epoch": 2.2954774664320037, + "grad_norm": 1.3904326446403537, + "learning_rate": 1.5878192489870543e-06, + "loss": 0.6616, + "step": 58040 + }, + { + "epoch": 2.2958729656508927, + "grad_norm": 1.561928048385546, + "learning_rate": 1.586137710077244e-06, + "loss": 0.6663, + "step": 58050 + }, + { + "epoch": 2.2962684648697818, + "grad_norm": 1.2986181394502438, + "learning_rate": 1.5844568941649795e-06, + "loss": 0.6742, + "step": 58060 + }, + { + "epoch": 2.296663964088671, + "grad_norm": 1.6383648008205771, + "learning_rate": 1.5827768016062295e-06, + "loss": 0.6476, + "step": 58070 + }, + { + "epoch": 2.29705946330756, + "grad_norm": 1.2337991535244286, + "learning_rate": 1.5810974327568064e-06, + "loss": 0.6498, + "step": 58080 + }, + { + "epoch": 2.297454962526449, + "grad_norm": 1.3983250667609106, + "learning_rate": 1.5794187879723755e-06, + "loss": 0.6523, + "step": 58090 + }, + { + "epoch": 2.297850461745338, + "grad_norm": 1.3251364210358583, + "learning_rate": 1.5777408676084416e-06, + "loss": 0.6714, + "step": 58100 + }, + { + "epoch": 2.298245960964227, + "grad_norm": 1.485177517136633, + "learning_rate": 1.5760636720203626e-06, + "loss": 0.6551, + "step": 58110 + }, + { + "epoch": 2.298641460183116, + "grad_norm": 1.457459592180781, + "learning_rate": 1.5743872015633383e-06, + "loss": 0.6576, + "step": 58120 + }, + { + "epoch": 2.299036959402005, + "grad_norm": 1.519833056103309, + "learning_rate": 1.572711456592415e-06, + "loss": 0.6706, + "step": 58130 + }, + { + "epoch": 2.299432458620894, + "grad_norm": 1.2310258239208047, + "learning_rate": 1.5710364374624897e-06, + "loss": 0.6795, + "step": 58140 + }, + { + "epoch": 2.299827957839783, + "grad_norm": 1.6219898840985205, + "learning_rate": 1.5693621445283002e-06, + "loss": 0.6621, + "step": 58150 + }, + { + "epoch": 2.300223457058672, + "grad_norm": 1.72127109905613, + "learning_rate": 1.5676885781444357e-06, + "loss": 0.6595, + "step": 58160 + }, + { + "epoch": 2.300618956277561, + "grad_norm": 1.2784419660467872, + "learning_rate": 1.5660157386653252e-06, + "loss": 0.6898, + "step": 58170 + }, + { + "epoch": 2.3010144554964502, + "grad_norm": 1.4028041658903252, + "learning_rate": 1.5643436264452527e-06, + "loss": 0.6905, + "step": 58180 + }, + { + "epoch": 2.3014099547153393, + "grad_norm": 1.4509910281962437, + "learning_rate": 1.5626722418383372e-06, + "loss": 0.6315, + "step": 58190 + }, + { + "epoch": 2.3018054539342283, + "grad_norm": 1.4389464337031619, + "learning_rate": 1.5610015851985533e-06, + "loss": 0.6589, + "step": 58200 + }, + { + "epoch": 2.3022009531531173, + "grad_norm": 1.6162702061779972, + "learning_rate": 1.5593316568797145e-06, + "loss": 0.6435, + "step": 58210 + }, + { + "epoch": 2.3025964523720064, + "grad_norm": 1.3017251205365583, + "learning_rate": 1.557662457235486e-06, + "loss": 0.6703, + "step": 58220 + }, + { + "epoch": 2.3029919515908954, + "grad_norm": 1.6342843100300628, + "learning_rate": 1.555993986619373e-06, + "loss": 0.6601, + "step": 58230 + }, + { + "epoch": 2.3033874508097845, + "grad_norm": 1.4379683509508339, + "learning_rate": 1.5543262453847318e-06, + "loss": 0.6612, + "step": 58240 + }, + { + "epoch": 2.3037829500286735, + "grad_norm": 1.4207268132884536, + "learning_rate": 1.5526592338847579e-06, + "loss": 0.6337, + "step": 58250 + }, + { + "epoch": 2.3041784492475625, + "grad_norm": 1.7506847451268952, + "learning_rate": 1.5509929524724999e-06, + "loss": 0.6606, + "step": 58260 + }, + { + "epoch": 2.3045739484664516, + "grad_norm": 1.5339848231245854, + "learning_rate": 1.5493274015008435e-06, + "loss": 0.6499, + "step": 58270 + }, + { + "epoch": 2.304969447685341, + "grad_norm": 1.554093932889167, + "learning_rate": 1.5476625813225276e-06, + "loss": 0.6519, + "step": 58280 + }, + { + "epoch": 2.3053649469042297, + "grad_norm": 1.2985812733157804, + "learning_rate": 1.5459984922901312e-06, + "loss": 0.6443, + "step": 58290 + }, + { + "epoch": 2.305760446123119, + "grad_norm": 1.4940357814146108, + "learning_rate": 1.5443351347560777e-06, + "loss": 0.6694, + "step": 58300 + }, + { + "epoch": 2.3061559453420077, + "grad_norm": 1.5596465786415064, + "learning_rate": 1.5426725090726407e-06, + "loss": 0.6423, + "step": 58310 + }, + { + "epoch": 2.3065514445608972, + "grad_norm": 1.4237617653192283, + "learning_rate": 1.5410106155919352e-06, + "loss": 0.6754, + "step": 58320 + }, + { + "epoch": 2.306946943779786, + "grad_norm": 1.2302332473431614, + "learning_rate": 1.53934945466592e-06, + "loss": 0.6792, + "step": 58330 + }, + { + "epoch": 2.3073424429986753, + "grad_norm": 1.4841925212686782, + "learning_rate": 1.537689026646403e-06, + "loss": 0.6579, + "step": 58340 + }, + { + "epoch": 2.307737942217564, + "grad_norm": 1.5674232813307791, + "learning_rate": 1.5360293318850327e-06, + "loss": 0.674, + "step": 58350 + }, + { + "epoch": 2.3081334414364534, + "grad_norm": 1.3277182227356745, + "learning_rate": 1.5343703707333035e-06, + "loss": 0.6645, + "step": 58360 + }, + { + "epoch": 2.308528940655342, + "grad_norm": 1.7565370101921338, + "learning_rate": 1.5327121435425573e-06, + "loss": 0.6122, + "step": 58370 + }, + { + "epoch": 2.3089244398742315, + "grad_norm": 1.443959377358891, + "learning_rate": 1.5310546506639756e-06, + "loss": 0.6371, + "step": 58380 + }, + { + "epoch": 2.30931993909312, + "grad_norm": 1.2468314994063057, + "learning_rate": 1.5293978924485898e-06, + "loss": 0.6684, + "step": 58390 + }, + { + "epoch": 2.3097154383120095, + "grad_norm": 1.533073051614103, + "learning_rate": 1.5277418692472696e-06, + "loss": 0.6752, + "step": 58400 + }, + { + "epoch": 2.310110937530898, + "grad_norm": 1.6903457432094442, + "learning_rate": 1.5260865814107356e-06, + "loss": 0.6848, + "step": 58410 + }, + { + "epoch": 2.3105064367497876, + "grad_norm": 1.3797874101974819, + "learning_rate": 1.5244320292895466e-06, + "loss": 0.6438, + "step": 58420 + }, + { + "epoch": 2.310901935968676, + "grad_norm": 1.3366210554745637, + "learning_rate": 1.5227782132341124e-06, + "loss": 0.6638, + "step": 58430 + }, + { + "epoch": 2.3112974351875657, + "grad_norm": 1.4089313087924513, + "learning_rate": 1.5211251335946774e-06, + "loss": 0.6623, + "step": 58440 + }, + { + "epoch": 2.3116929344064547, + "grad_norm": 1.564403536692619, + "learning_rate": 1.5194727907213396e-06, + "loss": 0.6699, + "step": 58450 + }, + { + "epoch": 2.3120884336253438, + "grad_norm": 1.3470041831455608, + "learning_rate": 1.5178211849640345e-06, + "loss": 0.6321, + "step": 58460 + }, + { + "epoch": 2.312483932844233, + "grad_norm": 1.5340558770808561, + "learning_rate": 1.5161703166725466e-06, + "loss": 0.6513, + "step": 58470 + }, + { + "epoch": 2.312879432063122, + "grad_norm": 1.5683140626013419, + "learning_rate": 1.5145201861964988e-06, + "loss": 0.6419, + "step": 58480 + }, + { + "epoch": 2.313274931282011, + "grad_norm": 1.8521633623477716, + "learning_rate": 1.5128707938853627e-06, + "loss": 0.6584, + "step": 58490 + }, + { + "epoch": 2.3136704305009, + "grad_norm": 1.5309877105310665, + "learning_rate": 1.5112221400884485e-06, + "loss": 0.6071, + "step": 58500 + }, + { + "epoch": 2.314065929719789, + "grad_norm": 1.345806768643149, + "learning_rate": 1.5095742251549167e-06, + "loss": 0.6742, + "step": 58510 + }, + { + "epoch": 2.314461428938678, + "grad_norm": 1.6424722452691336, + "learning_rate": 1.507927049433765e-06, + "loss": 0.6737, + "step": 58520 + }, + { + "epoch": 2.314856928157567, + "grad_norm": 1.395727716804011, + "learning_rate": 1.5062806132738362e-06, + "loss": 0.7003, + "step": 58530 + }, + { + "epoch": 2.315252427376456, + "grad_norm": 1.777091670701276, + "learning_rate": 1.5046349170238195e-06, + "loss": 0.6382, + "step": 58540 + }, + { + "epoch": 2.315647926595345, + "grad_norm": 1.4701249565898737, + "learning_rate": 1.5029899610322446e-06, + "loss": 0.6884, + "step": 58550 + }, + { + "epoch": 2.316043425814234, + "grad_norm": 1.4692638398286286, + "learning_rate": 1.5013457456474827e-06, + "loss": 0.6692, + "step": 58560 + }, + { + "epoch": 2.316438925033123, + "grad_norm": 1.4041738955726866, + "learning_rate": 1.4997022712177538e-06, + "loss": 0.6642, + "step": 58570 + }, + { + "epoch": 2.3168344242520122, + "grad_norm": 1.3469118053050946, + "learning_rate": 1.4980595380911167e-06, + "loss": 0.6968, + "step": 58580 + }, + { + "epoch": 2.3172299234709013, + "grad_norm": 1.426927940296544, + "learning_rate": 1.4964175466154712e-06, + "loss": 0.6568, + "step": 58590 + }, + { + "epoch": 2.3176254226897903, + "grad_norm": 1.351325834893919, + "learning_rate": 1.4947762971385671e-06, + "loss": 0.6443, + "step": 58600 + }, + { + "epoch": 2.3180209219086794, + "grad_norm": 1.571600262129865, + "learning_rate": 1.4931357900079896e-06, + "loss": 0.6538, + "step": 58610 + }, + { + "epoch": 2.3184164211275684, + "grad_norm": 1.4764750593585865, + "learning_rate": 1.491496025571173e-06, + "loss": 0.6424, + "step": 58620 + }, + { + "epoch": 2.3188119203464574, + "grad_norm": 1.578660614384939, + "learning_rate": 1.4898570041753886e-06, + "loss": 0.6435, + "step": 58630 + }, + { + "epoch": 2.3192074195653465, + "grad_norm": 1.2492216998705292, + "learning_rate": 1.4882187261677555e-06, + "loss": 0.6515, + "step": 58640 + }, + { + "epoch": 2.3196029187842355, + "grad_norm": 1.699200157935068, + "learning_rate": 1.48658119189523e-06, + "loss": 0.6402, + "step": 58650 + }, + { + "epoch": 2.3199984180031246, + "grad_norm": 1.3847294515883004, + "learning_rate": 1.4849444017046173e-06, + "loss": 0.6513, + "step": 58660 + }, + { + "epoch": 2.3203939172220136, + "grad_norm": 1.6170081171977473, + "learning_rate": 1.4833083559425598e-06, + "loss": 0.6748, + "step": 58670 + }, + { + "epoch": 2.3207894164409026, + "grad_norm": 1.4320738633561334, + "learning_rate": 1.4816730549555436e-06, + "loss": 0.653, + "step": 58680 + }, + { + "epoch": 2.3211849156597917, + "grad_norm": 1.3464218011610256, + "learning_rate": 1.4800384990898965e-06, + "loss": 0.6594, + "step": 58690 + }, + { + "epoch": 2.3215804148786807, + "grad_norm": 1.2664219459404893, + "learning_rate": 1.4784046886917919e-06, + "loss": 0.6888, + "step": 58700 + }, + { + "epoch": 2.3219759140975698, + "grad_norm": 1.64262450382083, + "learning_rate": 1.47677162410724e-06, + "loss": 0.6772, + "step": 58710 + }, + { + "epoch": 2.322371413316459, + "grad_norm": 1.4687884249862226, + "learning_rate": 1.4751393056820996e-06, + "loss": 0.6725, + "step": 58720 + }, + { + "epoch": 2.322766912535348, + "grad_norm": 1.526372832984667, + "learning_rate": 1.4735077337620634e-06, + "loss": 0.6737, + "step": 58730 + }, + { + "epoch": 2.323162411754237, + "grad_norm": 1.2434984411986378, + "learning_rate": 1.4718769086926742e-06, + "loss": 0.6559, + "step": 58740 + }, + { + "epoch": 2.323557910973126, + "grad_norm": 1.5748942810141886, + "learning_rate": 1.4702468308193102e-06, + "loss": 0.6794, + "step": 58750 + }, + { + "epoch": 2.323953410192015, + "grad_norm": 1.6068805492861729, + "learning_rate": 1.4686175004871966e-06, + "loss": 0.6632, + "step": 58760 + }, + { + "epoch": 2.324348909410904, + "grad_norm": 1.3607180031735213, + "learning_rate": 1.466988918041396e-06, + "loss": 0.6356, + "step": 58770 + }, + { + "epoch": 2.324744408629793, + "grad_norm": 1.5275811133382595, + "learning_rate": 1.465361083826813e-06, + "loss": 0.6906, + "step": 58780 + }, + { + "epoch": 2.325139907848682, + "grad_norm": 1.3645446397267935, + "learning_rate": 1.463733998188197e-06, + "loss": 0.6585, + "step": 58790 + }, + { + "epoch": 2.325535407067571, + "grad_norm": 1.3757428764176545, + "learning_rate": 1.4621076614701368e-06, + "loss": 0.6788, + "step": 58800 + }, + { + "epoch": 2.32593090628646, + "grad_norm": 1.4943658670214794, + "learning_rate": 1.4604820740170622e-06, + "loss": 0.6397, + "step": 58810 + }, + { + "epoch": 2.326326405505349, + "grad_norm": 1.3099215406675848, + "learning_rate": 1.4588572361732428e-06, + "loss": 0.6353, + "step": 58820 + }, + { + "epoch": 2.3267219047242382, + "grad_norm": 1.4080421032469292, + "learning_rate": 1.457233148282795e-06, + "loss": 0.6246, + "step": 58830 + }, + { + "epoch": 2.3271174039431273, + "grad_norm": 1.5910063067952556, + "learning_rate": 1.4556098106896698e-06, + "loss": 0.6693, + "step": 58840 + }, + { + "epoch": 2.3275129031620163, + "grad_norm": 1.273247061745168, + "learning_rate": 1.4539872237376646e-06, + "loss": 0.6395, + "step": 58850 + }, + { + "epoch": 2.3279084023809054, + "grad_norm": 1.4800588249259536, + "learning_rate": 1.452365387770413e-06, + "loss": 0.6492, + "step": 58860 + }, + { + "epoch": 2.3283039015997944, + "grad_norm": 1.5803342815483064, + "learning_rate": 1.450744303131395e-06, + "loss": 0.6418, + "step": 58870 + }, + { + "epoch": 2.3286994008186834, + "grad_norm": 1.5702078922429623, + "learning_rate": 1.449123970163926e-06, + "loss": 0.6649, + "step": 58880 + }, + { + "epoch": 2.3290949000375725, + "grad_norm": 1.1016771399149003, + "learning_rate": 1.4475043892111668e-06, + "loss": 0.6483, + "step": 58890 + }, + { + "epoch": 2.3294903992564615, + "grad_norm": 1.4473672733994998, + "learning_rate": 1.4458855606161143e-06, + "loss": 0.6572, + "step": 58900 + }, + { + "epoch": 2.3298858984753505, + "grad_norm": 1.543195619997971, + "learning_rate": 1.4442674847216127e-06, + "loss": 0.6655, + "step": 58910 + }, + { + "epoch": 2.3302813976942396, + "grad_norm": 1.4567336401416626, + "learning_rate": 1.4426501618703392e-06, + "loss": 0.6488, + "step": 58920 + }, + { + "epoch": 2.3306768969131286, + "grad_norm": 1.6509552237970642, + "learning_rate": 1.4410335924048169e-06, + "loss": 0.6308, + "step": 58930 + }, + { + "epoch": 2.3310723961320177, + "grad_norm": 1.653008636869871, + "learning_rate": 1.4394177766674055e-06, + "loss": 0.6773, + "step": 58940 + }, + { + "epoch": 2.3314678953509067, + "grad_norm": 1.2070845138616273, + "learning_rate": 1.4378027150003094e-06, + "loss": 0.656, + "step": 58950 + }, + { + "epoch": 2.3318633945697957, + "grad_norm": 1.643602810364153, + "learning_rate": 1.436188407745569e-06, + "loss": 0.6305, + "step": 58960 + }, + { + "epoch": 2.332258893788685, + "grad_norm": 1.2533714564833973, + "learning_rate": 1.4345748552450694e-06, + "loss": 0.6171, + "step": 58970 + }, + { + "epoch": 2.332654393007574, + "grad_norm": 1.4598709890067572, + "learning_rate": 1.43296205784053e-06, + "loss": 0.6843, + "step": 58980 + }, + { + "epoch": 2.333049892226463, + "grad_norm": 1.795831372239305, + "learning_rate": 1.4313500158735171e-06, + "loss": 0.6219, + "step": 58990 + }, + { + "epoch": 2.333445391445352, + "grad_norm": 1.3257915496488566, + "learning_rate": 1.4297387296854327e-06, + "loss": 0.652, + "step": 59000 + }, + { + "epoch": 2.333840890664241, + "grad_norm": 1.349164934271811, + "learning_rate": 1.4281281996175167e-06, + "loss": 0.6411, + "step": 59010 + }, + { + "epoch": 2.33423638988313, + "grad_norm": 1.3571014733250697, + "learning_rate": 1.4265184260108562e-06, + "loss": 0.6529, + "step": 59020 + }, + { + "epoch": 2.334631889102019, + "grad_norm": 1.9726802540595698, + "learning_rate": 1.4249094092063697e-06, + "loss": 0.6692, + "step": 59030 + }, + { + "epoch": 2.335027388320908, + "grad_norm": 1.517215322016344, + "learning_rate": 1.4233011495448228e-06, + "loss": 0.6193, + "step": 59040 + }, + { + "epoch": 2.335422887539797, + "grad_norm": 1.476802965089533, + "learning_rate": 1.4216936473668159e-06, + "loss": 0.6836, + "step": 59050 + }, + { + "epoch": 2.335818386758686, + "grad_norm": 1.535635423176671, + "learning_rate": 1.4200869030127896e-06, + "loss": 0.6241, + "step": 59060 + }, + { + "epoch": 2.336213885977575, + "grad_norm": 1.4791019565349466, + "learning_rate": 1.4184809168230245e-06, + "loss": 0.6802, + "step": 59070 + }, + { + "epoch": 2.336609385196464, + "grad_norm": 1.422139344051872, + "learning_rate": 1.4168756891376434e-06, + "loss": 0.6485, + "step": 59080 + }, + { + "epoch": 2.3370048844153533, + "grad_norm": 1.250139060542618, + "learning_rate": 1.415271220296603e-06, + "loss": 0.6561, + "step": 59090 + }, + { + "epoch": 2.3374003836342423, + "grad_norm": 1.730120130093888, + "learning_rate": 1.4136675106397051e-06, + "loss": 0.631, + "step": 59100 + }, + { + "epoch": 2.3377958828531313, + "grad_norm": 1.171299157037247, + "learning_rate": 1.4120645605065858e-06, + "loss": 0.68, + "step": 59110 + }, + { + "epoch": 2.3381913820720204, + "grad_norm": 1.256204715171071, + "learning_rate": 1.410462370236725e-06, + "loss": 0.6685, + "step": 59120 + }, + { + "epoch": 2.3385868812909094, + "grad_norm": 1.4909767608205295, + "learning_rate": 1.4088609401694353e-06, + "loss": 0.6415, + "step": 59130 + }, + { + "epoch": 2.3389823805097985, + "grad_norm": 1.464839828771158, + "learning_rate": 1.4072602706438765e-06, + "loss": 0.6355, + "step": 59140 + }, + { + "epoch": 2.3393778797286875, + "grad_norm": 1.407550713872548, + "learning_rate": 1.405660361999039e-06, + "loss": 0.6491, + "step": 59150 + }, + { + "epoch": 2.3397733789475765, + "grad_norm": 1.483199208642359, + "learning_rate": 1.4040612145737608e-06, + "loss": 0.6586, + "step": 59160 + }, + { + "epoch": 2.3401688781664656, + "grad_norm": 1.4822118430210998, + "learning_rate": 1.4024628287067088e-06, + "loss": 0.6817, + "step": 59170 + }, + { + "epoch": 2.3405643773853546, + "grad_norm": 1.2607731426652578, + "learning_rate": 1.4008652047363969e-06, + "loss": 0.6718, + "step": 59180 + }, + { + "epoch": 2.3409598766042437, + "grad_norm": 1.4850271041830017, + "learning_rate": 1.3992683430011722e-06, + "loss": 0.6518, + "step": 59190 + }, + { + "epoch": 2.3413553758231327, + "grad_norm": 1.4277620829199973, + "learning_rate": 1.3976722438392254e-06, + "loss": 0.65, + "step": 59200 + }, + { + "epoch": 2.3417508750420217, + "grad_norm": 1.4754328545544362, + "learning_rate": 1.39607690758858e-06, + "loss": 0.6861, + "step": 59210 + }, + { + "epoch": 2.3421463742609108, + "grad_norm": 1.2848009604767283, + "learning_rate": 1.3944823345871044e-06, + "loss": 0.6786, + "step": 59220 + }, + { + "epoch": 2.3425418734798, + "grad_norm": 1.3427506303437031, + "learning_rate": 1.3928885251725e-06, + "loss": 0.6461, + "step": 59230 + }, + { + "epoch": 2.342937372698689, + "grad_norm": 1.8386443146206752, + "learning_rate": 1.3912954796823064e-06, + "loss": 0.6896, + "step": 59240 + }, + { + "epoch": 2.343332871917578, + "grad_norm": 1.6415513179502648, + "learning_rate": 1.3897031984539067e-06, + "loss": 0.6465, + "step": 59250 + }, + { + "epoch": 2.343728371136467, + "grad_norm": 1.5105472299716078, + "learning_rate": 1.3881116818245154e-06, + "loss": 0.681, + "step": 59260 + }, + { + "epoch": 2.344123870355356, + "grad_norm": 1.411179528838079, + "learning_rate": 1.3865209301311928e-06, + "loss": 0.6655, + "step": 59270 + }, + { + "epoch": 2.344519369574245, + "grad_norm": 1.565916006096879, + "learning_rate": 1.3849309437108283e-06, + "loss": 0.6457, + "step": 59280 + }, + { + "epoch": 2.344914868793134, + "grad_norm": 1.6985129995597834, + "learning_rate": 1.383341722900159e-06, + "loss": 0.6068, + "step": 59290 + }, + { + "epoch": 2.345310368012023, + "grad_norm": 1.7523342050438377, + "learning_rate": 1.3817532680357481e-06, + "loss": 0.6535, + "step": 59300 + }, + { + "epoch": 2.345705867230912, + "grad_norm": 1.481222936280899, + "learning_rate": 1.3801655794540087e-06, + "loss": 0.6382, + "step": 59310 + }, + { + "epoch": 2.346101366449801, + "grad_norm": 1.2719378815407312, + "learning_rate": 1.378578657491182e-06, + "loss": 0.6826, + "step": 59320 + }, + { + "epoch": 2.34649686566869, + "grad_norm": 1.5770208930064684, + "learning_rate": 1.376992502483354e-06, + "loss": 0.6397, + "step": 59330 + }, + { + "epoch": 2.3468923648875792, + "grad_norm": 1.4328905492841875, + "learning_rate": 1.3754071147664432e-06, + "loss": 0.6459, + "step": 59340 + }, + { + "epoch": 2.3472878641064683, + "grad_norm": 1.6700468378830629, + "learning_rate": 1.373822494676209e-06, + "loss": 0.661, + "step": 59350 + }, + { + "epoch": 2.3476833633253573, + "grad_norm": 1.2650984169596182, + "learning_rate": 1.3722386425482454e-06, + "loss": 0.6646, + "step": 59360 + }, + { + "epoch": 2.3480788625442464, + "grad_norm": 1.342052914416015, + "learning_rate": 1.3706555587179864e-06, + "loss": 0.6574, + "step": 59370 + }, + { + "epoch": 2.3484743617631354, + "grad_norm": 1.4259567661685697, + "learning_rate": 1.3690732435207006e-06, + "loss": 0.6549, + "step": 59380 + }, + { + "epoch": 2.3488698609820244, + "grad_norm": 1.6446463839117533, + "learning_rate": 1.3674916972914976e-06, + "loss": 0.6469, + "step": 59390 + }, + { + "epoch": 2.3492653602009135, + "grad_norm": 1.558085067492573, + "learning_rate": 1.365910920365318e-06, + "loss": 0.6661, + "step": 59400 + }, + { + "epoch": 2.3496608594198025, + "grad_norm": 1.2039210028900755, + "learning_rate": 1.3643309130769494e-06, + "loss": 0.6767, + "step": 59410 + }, + { + "epoch": 2.3500563586386916, + "grad_norm": 1.5176618877166277, + "learning_rate": 1.3627516757610032e-06, + "loss": 0.6629, + "step": 59420 + }, + { + "epoch": 2.3504518578575806, + "grad_norm": 1.4430169207270038, + "learning_rate": 1.3611732087519397e-06, + "loss": 0.6797, + "step": 59430 + }, + { + "epoch": 2.3508473570764696, + "grad_norm": 1.9358104877105615, + "learning_rate": 1.3595955123840476e-06, + "loss": 0.6688, + "step": 59440 + }, + { + "epoch": 2.3512428562953587, + "grad_norm": 1.2966112649450476, + "learning_rate": 1.3580185869914597e-06, + "loss": 0.7071, + "step": 59450 + }, + { + "epoch": 2.3516383555142477, + "grad_norm": 1.442660163286812, + "learning_rate": 1.3564424329081398e-06, + "loss": 0.6737, + "step": 59460 + }, + { + "epoch": 2.3520338547331368, + "grad_norm": 1.614874860885327, + "learning_rate": 1.354867050467889e-06, + "loss": 0.647, + "step": 59470 + }, + { + "epoch": 2.352429353952026, + "grad_norm": 1.39062380619087, + "learning_rate": 1.3532924400043496e-06, + "loss": 0.6854, + "step": 59480 + }, + { + "epoch": 2.352824853170915, + "grad_norm": 1.6372282377873661, + "learning_rate": 1.3517186018509936e-06, + "loss": 0.6313, + "step": 59490 + }, + { + "epoch": 2.353220352389804, + "grad_norm": 1.4939086310379999, + "learning_rate": 1.3501455363411364e-06, + "loss": 0.6641, + "step": 59500 + }, + { + "epoch": 2.353615851608693, + "grad_norm": 1.7605222814424728, + "learning_rate": 1.348573243807923e-06, + "loss": 0.613, + "step": 59510 + }, + { + "epoch": 2.354011350827582, + "grad_norm": 1.834555047285748, + "learning_rate": 1.3470017245843408e-06, + "loss": 0.6586, + "step": 59520 + }, + { + "epoch": 2.354406850046471, + "grad_norm": 1.4537880489905297, + "learning_rate": 1.3454309790032093e-06, + "loss": 0.6798, + "step": 59530 + }, + { + "epoch": 2.35480234926536, + "grad_norm": 1.8650290561875345, + "learning_rate": 1.3438610073971863e-06, + "loss": 0.6182, + "step": 59540 + }, + { + "epoch": 2.355197848484249, + "grad_norm": 1.3792549427498257, + "learning_rate": 1.3422918100987625e-06, + "loss": 0.6406, + "step": 59550 + }, + { + "epoch": 2.355593347703138, + "grad_norm": 1.3926591348105821, + "learning_rate": 1.3407233874402703e-06, + "loss": 0.6793, + "step": 59560 + }, + { + "epoch": 2.355988846922027, + "grad_norm": 1.3892070418840154, + "learning_rate": 1.339155739753872e-06, + "loss": 0.638, + "step": 59570 + }, + { + "epoch": 2.356384346140916, + "grad_norm": 1.473607972554162, + "learning_rate": 1.337588867371571e-06, + "loss": 0.692, + "step": 59580 + }, + { + "epoch": 2.3567798453598052, + "grad_norm": 1.5750437920578224, + "learning_rate": 1.336022770625201e-06, + "loss": 0.6317, + "step": 59590 + }, + { + "epoch": 2.3571753445786943, + "grad_norm": 1.3371570634090677, + "learning_rate": 1.334457449846438e-06, + "loss": 0.6472, + "step": 59600 + }, + { + "epoch": 2.3575708437975837, + "grad_norm": 1.3035560578142398, + "learning_rate": 1.3328929053667866e-06, + "loss": 0.6458, + "step": 59610 + }, + { + "epoch": 2.3579663430164723, + "grad_norm": 1.3944212438036032, + "learning_rate": 1.331329137517594e-06, + "loss": 0.6382, + "step": 59620 + }, + { + "epoch": 2.358361842235362, + "grad_norm": 1.5005476053868148, + "learning_rate": 1.3297661466300366e-06, + "loss": 0.6576, + "step": 59630 + }, + { + "epoch": 2.3587573414542504, + "grad_norm": 1.5264474475420733, + "learning_rate": 1.328203933035131e-06, + "loss": 0.6349, + "step": 59640 + }, + { + "epoch": 2.35915284067314, + "grad_norm": 1.5970556218124319, + "learning_rate": 1.326642497063727e-06, + "loss": 0.6584, + "step": 59650 + }, + { + "epoch": 2.3595483398920285, + "grad_norm": 1.3766326130069657, + "learning_rate": 1.3250818390465092e-06, + "loss": 0.6685, + "step": 59660 + }, + { + "epoch": 2.359943839110918, + "grad_norm": 1.4963677502903887, + "learning_rate": 1.323521959313997e-06, + "loss": 0.6605, + "step": 59670 + }, + { + "epoch": 2.3603393383298066, + "grad_norm": 1.3302813930397257, + "learning_rate": 1.3219628581965493e-06, + "loss": 0.6655, + "step": 59680 + }, + { + "epoch": 2.360734837548696, + "grad_norm": 1.8136802835330335, + "learning_rate": 1.320404536024354e-06, + "loss": 0.6365, + "step": 59690 + }, + { + "epoch": 2.3611303367675847, + "grad_norm": 1.3834212107444948, + "learning_rate": 1.3188469931274394e-06, + "loss": 0.6787, + "step": 59700 + }, + { + "epoch": 2.361525835986474, + "grad_norm": 1.6115000671482202, + "learning_rate": 1.317290229835666e-06, + "loss": 0.6794, + "step": 59710 + }, + { + "epoch": 2.3619213352053627, + "grad_norm": 1.70829693992852, + "learning_rate": 1.315734246478727e-06, + "loss": 0.6583, + "step": 59720 + }, + { + "epoch": 2.362316834424252, + "grad_norm": 1.7795299826314106, + "learning_rate": 1.3141790433861574e-06, + "loss": 0.6541, + "step": 59730 + }, + { + "epoch": 2.362712333643141, + "grad_norm": 1.576112936163215, + "learning_rate": 1.3126246208873177e-06, + "loss": 0.6401, + "step": 59740 + }, + { + "epoch": 2.3631078328620303, + "grad_norm": 1.2382669149117873, + "learning_rate": 1.3110709793114128e-06, + "loss": 0.6478, + "step": 59750 + }, + { + "epoch": 2.363503332080919, + "grad_norm": 1.4180050814353615, + "learning_rate": 1.3095181189874733e-06, + "loss": 0.6622, + "step": 59760 + }, + { + "epoch": 2.3638988312998084, + "grad_norm": 1.353602984572126, + "learning_rate": 1.3079660402443716e-06, + "loss": 0.6654, + "step": 59770 + }, + { + "epoch": 2.3642943305186974, + "grad_norm": 1.4635536115259493, + "learning_rate": 1.3064147434108098e-06, + "loss": 0.6476, + "step": 59780 + }, + { + "epoch": 2.3646898297375865, + "grad_norm": 1.5019205941410168, + "learning_rate": 1.3048642288153268e-06, + "loss": 0.6444, + "step": 59790 + }, + { + "epoch": 2.3650853289564755, + "grad_norm": 1.1635429701984492, + "learning_rate": 1.3033144967862922e-06, + "loss": 0.6659, + "step": 59800 + }, + { + "epoch": 2.3654808281753645, + "grad_norm": 1.316139659650491, + "learning_rate": 1.3017655476519164e-06, + "loss": 0.6793, + "step": 59810 + }, + { + "epoch": 2.3658763273942536, + "grad_norm": 1.3870046542069292, + "learning_rate": 1.3002173817402375e-06, + "loss": 0.6723, + "step": 59820 + }, + { + "epoch": 2.3662718266131426, + "grad_norm": 1.3315779130439447, + "learning_rate": 1.2986699993791335e-06, + "loss": 0.662, + "step": 59830 + }, + { + "epoch": 2.3666673258320317, + "grad_norm": 1.3196653182571463, + "learning_rate": 1.2971234008963095e-06, + "loss": 0.6742, + "step": 59840 + }, + { + "epoch": 2.3670628250509207, + "grad_norm": 1.644057338081116, + "learning_rate": 1.2955775866193132e-06, + "loss": 0.645, + "step": 59850 + }, + { + "epoch": 2.3674583242698097, + "grad_norm": 1.4693549054439152, + "learning_rate": 1.294032556875517e-06, + "loss": 0.6549, + "step": 59860 + }, + { + "epoch": 2.3678538234886988, + "grad_norm": 1.3506758541748658, + "learning_rate": 1.2924883119921356e-06, + "loss": 0.6493, + "step": 59870 + }, + { + "epoch": 2.368249322707588, + "grad_norm": 1.779826698792813, + "learning_rate": 1.290944852296212e-06, + "loss": 0.6557, + "step": 59880 + }, + { + "epoch": 2.368644821926477, + "grad_norm": 1.4617164377679117, + "learning_rate": 1.2894021781146232e-06, + "loss": 0.7036, + "step": 59890 + }, + { + "epoch": 2.369040321145366, + "grad_norm": 1.5982077919923772, + "learning_rate": 1.2878602897740833e-06, + "loss": 0.6526, + "step": 59900 + }, + { + "epoch": 2.369435820364255, + "grad_norm": 1.5418488278722802, + "learning_rate": 1.2863191876011377e-06, + "loss": 0.6785, + "step": 59910 + }, + { + "epoch": 2.369831319583144, + "grad_norm": 1.4351145576462119, + "learning_rate": 1.2847788719221627e-06, + "loss": 0.6286, + "step": 59920 + }, + { + "epoch": 2.370226818802033, + "grad_norm": 1.3561329025592253, + "learning_rate": 1.2832393430633743e-06, + "loss": 0.6332, + "step": 59930 + }, + { + "epoch": 2.370622318020922, + "grad_norm": 1.610307683543711, + "learning_rate": 1.2817006013508166e-06, + "loss": 0.6688, + "step": 59940 + }, + { + "epoch": 2.371017817239811, + "grad_norm": 1.7967913675678822, + "learning_rate": 1.280162647110368e-06, + "loss": 0.6873, + "step": 59950 + }, + { + "epoch": 2.3714133164587, + "grad_norm": 1.3650903564260914, + "learning_rate": 1.2786254806677428e-06, + "loss": 0.7004, + "step": 59960 + }, + { + "epoch": 2.371808815677589, + "grad_norm": 1.7176979559601464, + "learning_rate": 1.2770891023484838e-06, + "loss": 0.6545, + "step": 59970 + }, + { + "epoch": 2.372204314896478, + "grad_norm": 1.3125911757228816, + "learning_rate": 1.2755535124779733e-06, + "loss": 0.6429, + "step": 59980 + }, + { + "epoch": 2.3725998141153672, + "grad_norm": 1.488890907232398, + "learning_rate": 1.2740187113814196e-06, + "loss": 0.6575, + "step": 59990 + }, + { + "epoch": 2.3729953133342563, + "grad_norm": 1.4188619088579277, + "learning_rate": 1.2724846993838696e-06, + "loss": 0.6166, + "step": 60000 + }, + { + "epoch": 2.3733908125531453, + "grad_norm": 1.5869439201226596, + "learning_rate": 1.2709514768101982e-06, + "loss": 0.6272, + "step": 60010 + }, + { + "epoch": 2.3737863117720344, + "grad_norm": 1.5208063386082633, + "learning_rate": 1.2694190439851205e-06, + "loss": 0.6505, + "step": 60020 + }, + { + "epoch": 2.3741818109909234, + "grad_norm": 1.4023165930533983, + "learning_rate": 1.2678874012331732e-06, + "loss": 0.6659, + "step": 60030 + }, + { + "epoch": 2.3745773102098124, + "grad_norm": 1.55656971084094, + "learning_rate": 1.2663565488787365e-06, + "loss": 0.625, + "step": 60040 + }, + { + "epoch": 2.3749728094287015, + "grad_norm": 1.280198765448565, + "learning_rate": 1.2648264872460165e-06, + "loss": 0.6823, + "step": 60050 + }, + { + "epoch": 2.3753683086475905, + "grad_norm": 1.282086373868942, + "learning_rate": 1.2632972166590557e-06, + "loss": 0.6827, + "step": 60060 + }, + { + "epoch": 2.3757638078664796, + "grad_norm": 1.6183942811456713, + "learning_rate": 1.2617687374417248e-06, + "loss": 0.6401, + "step": 60070 + }, + { + "epoch": 2.3761593070853686, + "grad_norm": 1.290320486427383, + "learning_rate": 1.260241049917733e-06, + "loss": 0.6903, + "step": 60080 + }, + { + "epoch": 2.3765548063042576, + "grad_norm": 1.393264511520163, + "learning_rate": 1.2587141544106147e-06, + "loss": 0.645, + "step": 60090 + }, + { + "epoch": 2.3769503055231467, + "grad_norm": 1.3693027720380315, + "learning_rate": 1.257188051243744e-06, + "loss": 0.6482, + "step": 60100 + }, + { + "epoch": 2.3773458047420357, + "grad_norm": 1.3737721789055617, + "learning_rate": 1.25566274074032e-06, + "loss": 0.6419, + "step": 60110 + }, + { + "epoch": 2.3777413039609248, + "grad_norm": 1.5808172562717315, + "learning_rate": 1.25413822322338e-06, + "loss": 0.6758, + "step": 60120 + }, + { + "epoch": 2.378136803179814, + "grad_norm": 1.4300721312341145, + "learning_rate": 1.2526144990157896e-06, + "loss": 0.6479, + "step": 60130 + }, + { + "epoch": 2.378532302398703, + "grad_norm": 1.5589774132785974, + "learning_rate": 1.2510915684402476e-06, + "loss": 0.6321, + "step": 60140 + }, + { + "epoch": 2.378927801617592, + "grad_norm": 1.5109835817451736, + "learning_rate": 1.2495694318192825e-06, + "loss": 0.6749, + "step": 60150 + }, + { + "epoch": 2.379323300836481, + "grad_norm": 1.3193461829025568, + "learning_rate": 1.2480480894752607e-06, + "loss": 0.6608, + "step": 60160 + }, + { + "epoch": 2.37971880005537, + "grad_norm": 1.7361197051816402, + "learning_rate": 1.246527541730374e-06, + "loss": 0.6349, + "step": 60170 + }, + { + "epoch": 2.380114299274259, + "grad_norm": 1.3659520000201715, + "learning_rate": 1.2450077889066476e-06, + "loss": 0.6568, + "step": 60180 + }, + { + "epoch": 2.380509798493148, + "grad_norm": 1.3582579488056623, + "learning_rate": 1.2434888313259413e-06, + "loss": 0.6418, + "step": 60190 + }, + { + "epoch": 2.380905297712037, + "grad_norm": 1.3786555784983439, + "learning_rate": 1.241970669309942e-06, + "loss": 0.6776, + "step": 60200 + }, + { + "epoch": 2.381300796930926, + "grad_norm": 1.5338849168254205, + "learning_rate": 1.240453303180173e-06, + "loss": 0.6728, + "step": 60210 + }, + { + "epoch": 2.381696296149815, + "grad_norm": 1.506702217549218, + "learning_rate": 1.2389367332579839e-06, + "loss": 0.6693, + "step": 60220 + }, + { + "epoch": 2.382091795368704, + "grad_norm": 1.700159884829986, + "learning_rate": 1.237420959864561e-06, + "loss": 0.6345, + "step": 60230 + }, + { + "epoch": 2.3824872945875932, + "grad_norm": 1.3334601416749623, + "learning_rate": 1.235905983320916e-06, + "loss": 0.641, + "step": 60240 + }, + { + "epoch": 2.3828827938064823, + "grad_norm": 1.361524541931307, + "learning_rate": 1.234391803947898e-06, + "loss": 0.6521, + "step": 60250 + }, + { + "epoch": 2.3832782930253713, + "grad_norm": 1.302166779138101, + "learning_rate": 1.232878422066181e-06, + "loss": 0.6603, + "step": 60260 + }, + { + "epoch": 2.3836737922442603, + "grad_norm": 1.3929624457021772, + "learning_rate": 1.2313658379962785e-06, + "loss": 0.6468, + "step": 60270 + }, + { + "epoch": 2.3840692914631494, + "grad_norm": 1.5646784580041087, + "learning_rate": 1.2298540520585239e-06, + "loss": 0.66, + "step": 60280 + }, + { + "epoch": 2.3844647906820384, + "grad_norm": 1.453340753328277, + "learning_rate": 1.228343064573091e-06, + "loss": 0.6768, + "step": 60290 + }, + { + "epoch": 2.3848602899009275, + "grad_norm": 1.2710447433472736, + "learning_rate": 1.226832875859979e-06, + "loss": 0.6625, + "step": 60300 + }, + { + "epoch": 2.3852557891198165, + "grad_norm": 1.5179303983822905, + "learning_rate": 1.225323486239023e-06, + "loss": 0.6475, + "step": 60310 + }, + { + "epoch": 2.3856512883387055, + "grad_norm": 1.4390768115991506, + "learning_rate": 1.223814896029883e-06, + "loss": 0.6869, + "step": 60320 + }, + { + "epoch": 2.3860467875575946, + "grad_norm": 1.2879109948492395, + "learning_rate": 1.2223071055520548e-06, + "loss": 0.644, + "step": 60330 + }, + { + "epoch": 2.3864422867764836, + "grad_norm": 1.4446520488917953, + "learning_rate": 1.2208001151248593e-06, + "loss": 0.6621, + "step": 60340 + }, + { + "epoch": 2.3868377859953727, + "grad_norm": 1.7177699519167628, + "learning_rate": 1.2192939250674556e-06, + "loss": 0.6374, + "step": 60350 + }, + { + "epoch": 2.3872332852142617, + "grad_norm": 1.5110958571884754, + "learning_rate": 1.2177885356988272e-06, + "loss": 0.6776, + "step": 60360 + }, + { + "epoch": 2.3876287844331507, + "grad_norm": 1.8865416778637636, + "learning_rate": 1.2162839473377874e-06, + "loss": 0.6536, + "step": 60370 + }, + { + "epoch": 2.38802428365204, + "grad_norm": 1.2466058794887338, + "learning_rate": 1.2147801603029858e-06, + "loss": 0.6691, + "step": 60380 + }, + { + "epoch": 2.388419782870929, + "grad_norm": 1.399261052596627, + "learning_rate": 1.2132771749128968e-06, + "loss": 0.6368, + "step": 60390 + }, + { + "epoch": 2.388815282089818, + "grad_norm": 1.5294209359199646, + "learning_rate": 1.2117749914858278e-06, + "loss": 0.6576, + "step": 60400 + }, + { + "epoch": 2.389210781308707, + "grad_norm": 1.1541573965797718, + "learning_rate": 1.2102736103399131e-06, + "loss": 0.628, + "step": 60410 + }, + { + "epoch": 2.389606280527596, + "grad_norm": 1.5562020024295269, + "learning_rate": 1.2087730317931234e-06, + "loss": 0.6788, + "step": 60420 + }, + { + "epoch": 2.390001779746485, + "grad_norm": 1.3303838930464016, + "learning_rate": 1.2072732561632517e-06, + "loss": 0.6592, + "step": 60430 + }, + { + "epoch": 2.390397278965374, + "grad_norm": 1.4643209353778344, + "learning_rate": 1.205774283767928e-06, + "loss": 0.6533, + "step": 60440 + }, + { + "epoch": 2.390792778184263, + "grad_norm": 1.9301841576090892, + "learning_rate": 1.2042761149246068e-06, + "loss": 0.6692, + "step": 60450 + }, + { + "epoch": 2.391188277403152, + "grad_norm": 1.3581607500107369, + "learning_rate": 1.202778749950576e-06, + "loss": 0.6754, + "step": 60460 + }, + { + "epoch": 2.391583776622041, + "grad_norm": 1.4465304591485115, + "learning_rate": 1.2012821891629506e-06, + "loss": 0.6277, + "step": 60470 + }, + { + "epoch": 2.39197927584093, + "grad_norm": 1.4712006554120876, + "learning_rate": 1.199786432878678e-06, + "loss": 0.6284, + "step": 60480 + }, + { + "epoch": 2.392374775059819, + "grad_norm": 1.5467911394235117, + "learning_rate": 1.198291481414532e-06, + "loss": 0.6247, + "step": 60490 + }, + { + "epoch": 2.3927702742787083, + "grad_norm": 1.4062238072676685, + "learning_rate": 1.1967973350871198e-06, + "loss": 0.636, + "step": 60500 + }, + { + "epoch": 2.3931657734975973, + "grad_norm": 1.3316811234444652, + "learning_rate": 1.1953039942128747e-06, + "loss": 0.6328, + "step": 60510 + }, + { + "epoch": 2.3935612727164863, + "grad_norm": 1.3839751745652498, + "learning_rate": 1.1938114591080614e-06, + "loss": 0.6461, + "step": 60520 + }, + { + "epoch": 2.3939567719353754, + "grad_norm": 1.5571631658497918, + "learning_rate": 1.1923197300887707e-06, + "loss": 0.6327, + "step": 60530 + }, + { + "epoch": 2.3943522711542644, + "grad_norm": 1.6334308492010725, + "learning_rate": 1.190828807470929e-06, + "loss": 0.6564, + "step": 60540 + }, + { + "epoch": 2.3947477703731535, + "grad_norm": 1.418491763997048, + "learning_rate": 1.1893386915702849e-06, + "loss": 0.6595, + "step": 60550 + }, + { + "epoch": 2.3951432695920425, + "grad_norm": 1.48611872633573, + "learning_rate": 1.1878493827024223e-06, + "loss": 0.6649, + "step": 60560 + }, + { + "epoch": 2.3955387688109315, + "grad_norm": 1.2897192077821347, + "learning_rate": 1.1863608811827487e-06, + "loss": 0.6429, + "step": 60570 + }, + { + "epoch": 2.3959342680298206, + "grad_norm": 1.4684912959462901, + "learning_rate": 1.1848731873265057e-06, + "loss": 0.661, + "step": 60580 + }, + { + "epoch": 2.3963297672487096, + "grad_norm": 1.491397517901672, + "learning_rate": 1.1833863014487601e-06, + "loss": 0.6491, + "step": 60590 + }, + { + "epoch": 2.3967252664675986, + "grad_norm": 1.439643725719278, + "learning_rate": 1.1819002238644078e-06, + "loss": 0.6123, + "step": 60600 + }, + { + "epoch": 2.3971207656864877, + "grad_norm": 1.50555570261791, + "learning_rate": 1.1804149548881771e-06, + "loss": 0.648, + "step": 60610 + }, + { + "epoch": 2.3975162649053767, + "grad_norm": 1.493759672470509, + "learning_rate": 1.1789304948346192e-06, + "loss": 0.6467, + "step": 60620 + }, + { + "epoch": 2.3979117641242658, + "grad_norm": 1.2590367263133964, + "learning_rate": 1.1774468440181215e-06, + "loss": 0.6686, + "step": 60630 + }, + { + "epoch": 2.398307263343155, + "grad_norm": 1.7154889530827062, + "learning_rate": 1.1759640027528923e-06, + "loss": 0.6636, + "step": 60640 + }, + { + "epoch": 2.398702762562044, + "grad_norm": 1.3137260105772055, + "learning_rate": 1.1744819713529742e-06, + "loss": 0.6737, + "step": 60650 + }, + { + "epoch": 2.399098261780933, + "grad_norm": 1.2598564465211248, + "learning_rate": 1.1730007501322333e-06, + "loss": 0.6689, + "step": 60660 + }, + { + "epoch": 2.399493760999822, + "grad_norm": 1.1638866359656532, + "learning_rate": 1.17152033940437e-06, + "loss": 0.6432, + "step": 60670 + }, + { + "epoch": 2.399889260218711, + "grad_norm": 1.3151691657067, + "learning_rate": 1.1700407394829071e-06, + "loss": 0.6492, + "step": 60680 + }, + { + "epoch": 2.4002847594376, + "grad_norm": 1.544299212709401, + "learning_rate": 1.1685619506812019e-06, + "loss": 0.6345, + "step": 60690 + }, + { + "epoch": 2.400680258656489, + "grad_norm": 1.5101256898393642, + "learning_rate": 1.1670839733124328e-06, + "loss": 0.6272, + "step": 60700 + }, + { + "epoch": 2.401075757875378, + "grad_norm": 1.629622232170866, + "learning_rate": 1.1656068076896133e-06, + "loss": 0.6542, + "step": 60710 + }, + { + "epoch": 2.401471257094267, + "grad_norm": 1.362979777159695, + "learning_rate": 1.1641304541255793e-06, + "loss": 0.6729, + "step": 60720 + }, + { + "epoch": 2.401866756313156, + "grad_norm": 1.4582497685187814, + "learning_rate": 1.1626549129329994e-06, + "loss": 0.6578, + "step": 60730 + }, + { + "epoch": 2.402262255532045, + "grad_norm": 1.4198343937352749, + "learning_rate": 1.1611801844243653e-06, + "loss": 0.6474, + "step": 60740 + }, + { + "epoch": 2.4026577547509342, + "grad_norm": 1.5561261182820643, + "learning_rate": 1.1597062689120025e-06, + "loss": 0.6665, + "step": 60750 + }, + { + "epoch": 2.4030532539698233, + "grad_norm": 1.6049471813565488, + "learning_rate": 1.1582331667080592e-06, + "loss": 0.6206, + "step": 60760 + }, + { + "epoch": 2.4034487531887123, + "grad_norm": 1.6786217835036092, + "learning_rate": 1.1567608781245133e-06, + "loss": 0.6113, + "step": 60770 + }, + { + "epoch": 2.4038442524076014, + "grad_norm": 1.488190374089625, + "learning_rate": 1.1552894034731687e-06, + "loss": 0.6632, + "step": 60780 + }, + { + "epoch": 2.4042397516264904, + "grad_norm": 1.4246333959101352, + "learning_rate": 1.1538187430656618e-06, + "loss": 0.633, + "step": 60790 + }, + { + "epoch": 2.4046352508453794, + "grad_norm": 1.3032663001725917, + "learning_rate": 1.15234889721345e-06, + "loss": 0.6375, + "step": 60800 + }, + { + "epoch": 2.4050307500642685, + "grad_norm": 1.3086106189047355, + "learning_rate": 1.1508798662278248e-06, + "loss": 0.6678, + "step": 60810 + }, + { + "epoch": 2.4054262492831575, + "grad_norm": 1.7276415892437251, + "learning_rate": 1.1494116504199e-06, + "loss": 0.6619, + "step": 60820 + }, + { + "epoch": 2.4058217485020466, + "grad_norm": 1.1340889323379753, + "learning_rate": 1.1479442501006166e-06, + "loss": 0.6405, + "step": 60830 + }, + { + "epoch": 2.4062172477209356, + "grad_norm": 1.6662959419954302, + "learning_rate": 1.1464776655807492e-06, + "loss": 0.6409, + "step": 60840 + }, + { + "epoch": 2.4066127469398246, + "grad_norm": 1.3467360463469853, + "learning_rate": 1.1450118971708907e-06, + "loss": 0.6383, + "step": 60850 + }, + { + "epoch": 2.4070082461587137, + "grad_norm": 1.3629825280027281, + "learning_rate": 1.1435469451814701e-06, + "loss": 0.6386, + "step": 60860 + }, + { + "epoch": 2.4074037453776027, + "grad_norm": 1.493295078801304, + "learning_rate": 1.1420828099227355e-06, + "loss": 0.6505, + "step": 60870 + }, + { + "epoch": 2.4077992445964918, + "grad_norm": 1.4834088359161504, + "learning_rate": 1.1406194917047698e-06, + "loss": 0.6762, + "step": 60880 + }, + { + "epoch": 2.408194743815381, + "grad_norm": 1.6251605261775033, + "learning_rate": 1.1391569908374734e-06, + "loss": 0.6691, + "step": 60890 + }, + { + "epoch": 2.40859024303427, + "grad_norm": 1.3862890862619546, + "learning_rate": 1.1376953076305825e-06, + "loss": 0.6311, + "step": 60900 + }, + { + "epoch": 2.408985742253159, + "grad_norm": 1.2292694053280526, + "learning_rate": 1.136234442393655e-06, + "loss": 0.6381, + "step": 60910 + }, + { + "epoch": 2.409381241472048, + "grad_norm": 1.4940990584477818, + "learning_rate": 1.1347743954360791e-06, + "loss": 0.6486, + "step": 60920 + }, + { + "epoch": 2.409776740690937, + "grad_norm": 1.6119023869121893, + "learning_rate": 1.133315167067065e-06, + "loss": 0.6494, + "step": 60930 + }, + { + "epoch": 2.410172239909826, + "grad_norm": 1.4466785758021523, + "learning_rate": 1.1318567575956552e-06, + "loss": 0.6359, + "step": 60940 + }, + { + "epoch": 2.410567739128715, + "grad_norm": 1.4157370381367196, + "learning_rate": 1.130399167330713e-06, + "loss": 0.6719, + "step": 60950 + }, + { + "epoch": 2.4109632383476045, + "grad_norm": 1.4081379226485597, + "learning_rate": 1.1289423965809337e-06, + "loss": 0.6438, + "step": 60960 + }, + { + "epoch": 2.411358737566493, + "grad_norm": 1.6047245390174836, + "learning_rate": 1.1274864456548334e-06, + "loss": 0.6719, + "step": 60970 + }, + { + "epoch": 2.4117542367853826, + "grad_norm": 1.5985440151479797, + "learning_rate": 1.1260313148607616e-06, + "loss": 0.6383, + "step": 60980 + }, + { + "epoch": 2.412149736004271, + "grad_norm": 1.8051044827715523, + "learning_rate": 1.1245770045068855e-06, + "loss": 0.654, + "step": 60990 + }, + { + "epoch": 2.4125452352231607, + "grad_norm": 1.5348047869595263, + "learning_rate": 1.1231235149012082e-06, + "loss": 0.6216, + "step": 61000 + }, + { + "epoch": 2.4129407344420493, + "grad_norm": 1.2468646891487871, + "learning_rate": 1.1216708463515486e-06, + "loss": 0.6605, + "step": 61010 + }, + { + "epoch": 2.4133362336609387, + "grad_norm": 1.5928298124860365, + "learning_rate": 1.120218999165561e-06, + "loss": 0.6642, + "step": 61020 + }, + { + "epoch": 2.4137317328798273, + "grad_norm": 1.3940623844004774, + "learning_rate": 1.1187679736507184e-06, + "loss": 0.6448, + "step": 61030 + }, + { + "epoch": 2.414127232098717, + "grad_norm": 1.3053382297503529, + "learning_rate": 1.1173177701143267e-06, + "loss": 0.6602, + "step": 61040 + }, + { + "epoch": 2.4145227313176054, + "grad_norm": 1.5173553184792226, + "learning_rate": 1.1158683888635108e-06, + "loss": 0.6653, + "step": 61050 + }, + { + "epoch": 2.414918230536495, + "grad_norm": 1.4179613876805257, + "learning_rate": 1.1144198302052285e-06, + "loss": 0.663, + "step": 61060 + }, + { + "epoch": 2.4153137297553835, + "grad_norm": 1.4964988085060997, + "learning_rate": 1.112972094446257e-06, + "loss": 0.6601, + "step": 61070 + }, + { + "epoch": 2.415709228974273, + "grad_norm": 1.5579015889614098, + "learning_rate": 1.111525181893201e-06, + "loss": 0.5985, + "step": 61080 + }, + { + "epoch": 2.4161047281931616, + "grad_norm": 1.5543731641027563, + "learning_rate": 1.1100790928524952e-06, + "loss": 0.6759, + "step": 61090 + }, + { + "epoch": 2.416500227412051, + "grad_norm": 1.5533371778260152, + "learning_rate": 1.1086338276303937e-06, + "loss": 0.6424, + "step": 61100 + }, + { + "epoch": 2.4168957266309397, + "grad_norm": 1.640508402683737, + "learning_rate": 1.1071893865329809e-06, + "loss": 0.6546, + "step": 61110 + }, + { + "epoch": 2.417291225849829, + "grad_norm": 1.922679998780759, + "learning_rate": 1.1057457698661634e-06, + "loss": 0.6288, + "step": 61120 + }, + { + "epoch": 2.417686725068718, + "grad_norm": 1.464637869938643, + "learning_rate": 1.1043029779356746e-06, + "loss": 0.6788, + "step": 61130 + }, + { + "epoch": 2.418082224287607, + "grad_norm": 1.4836737500505095, + "learning_rate": 1.1028610110470721e-06, + "loss": 0.6867, + "step": 61140 + }, + { + "epoch": 2.4184777235064963, + "grad_norm": 1.5008039641123663, + "learning_rate": 1.1014198695057425e-06, + "loss": 0.684, + "step": 61150 + }, + { + "epoch": 2.4188732227253853, + "grad_norm": 1.7285466124624471, + "learning_rate": 1.0999795536168923e-06, + "loss": 0.6598, + "step": 61160 + }, + { + "epoch": 2.4192687219442743, + "grad_norm": 1.5317446327926802, + "learning_rate": 1.0985400636855575e-06, + "loss": 0.6441, + "step": 61170 + }, + { + "epoch": 2.4196642211631634, + "grad_norm": 1.439633524673699, + "learning_rate": 1.0971014000165953e-06, + "loss": 0.6655, + "step": 61180 + }, + { + "epoch": 2.4200597203820524, + "grad_norm": 1.3834204559553223, + "learning_rate": 1.0956635629146928e-06, + "loss": 0.6558, + "step": 61190 + }, + { + "epoch": 2.4204552196009415, + "grad_norm": 1.4429096140548068, + "learning_rate": 1.0942265526843565e-06, + "loss": 0.646, + "step": 61200 + }, + { + "epoch": 2.4208507188198305, + "grad_norm": 1.2789180166857825, + "learning_rate": 1.0927903696299236e-06, + "loss": 0.6421, + "step": 61210 + }, + { + "epoch": 2.4212462180387195, + "grad_norm": 1.4565206704965734, + "learning_rate": 1.0913550140555496e-06, + "loss": 0.6717, + "step": 61220 + }, + { + "epoch": 2.4216417172576086, + "grad_norm": 1.4279653826072902, + "learning_rate": 1.0899204862652218e-06, + "loss": 0.6622, + "step": 61230 + }, + { + "epoch": 2.4220372164764976, + "grad_norm": 1.338945014598067, + "learning_rate": 1.0884867865627462e-06, + "loss": 0.675, + "step": 61240 + }, + { + "epoch": 2.4224327156953867, + "grad_norm": 1.4670619474772613, + "learning_rate": 1.0870539152517568e-06, + "loss": 0.6419, + "step": 61250 + }, + { + "epoch": 2.4228282149142757, + "grad_norm": 1.530382368666391, + "learning_rate": 1.0856218726357092e-06, + "loss": 0.6496, + "step": 61260 + }, + { + "epoch": 2.4232237141331647, + "grad_norm": 1.7566544375640354, + "learning_rate": 1.0841906590178885e-06, + "loss": 0.6739, + "step": 61270 + }, + { + "epoch": 2.4236192133520538, + "grad_norm": 1.802849502370634, + "learning_rate": 1.0827602747013987e-06, + "loss": 0.6801, + "step": 61280 + }, + { + "epoch": 2.424014712570943, + "grad_norm": 1.5131869114513883, + "learning_rate": 1.0813307199891732e-06, + "loss": 0.6405, + "step": 61290 + }, + { + "epoch": 2.424410211789832, + "grad_norm": 1.4696017429840822, + "learning_rate": 1.0799019951839656e-06, + "loss": 0.644, + "step": 61300 + }, + { + "epoch": 2.424805711008721, + "grad_norm": 1.1960487304474925, + "learning_rate": 1.0784741005883542e-06, + "loss": 0.6396, + "step": 61310 + }, + { + "epoch": 2.42520121022761, + "grad_norm": 1.3779615554428748, + "learning_rate": 1.0770470365047452e-06, + "loss": 0.6472, + "step": 61320 + }, + { + "epoch": 2.425596709446499, + "grad_norm": 1.3969139885976096, + "learning_rate": 1.0756208032353643e-06, + "loss": 0.6578, + "step": 61330 + }, + { + "epoch": 2.425992208665388, + "grad_norm": 1.6232009471995144, + "learning_rate": 1.0741954010822653e-06, + "loss": 0.6766, + "step": 61340 + }, + { + "epoch": 2.426387707884277, + "grad_norm": 1.3685099763164983, + "learning_rate": 1.0727708303473212e-06, + "loss": 0.6633, + "step": 61350 + }, + { + "epoch": 2.426783207103166, + "grad_norm": 1.5502785356278703, + "learning_rate": 1.0713470913322343e-06, + "loss": 0.6273, + "step": 61360 + }, + { + "epoch": 2.427178706322055, + "grad_norm": 1.549311524200009, + "learning_rate": 1.0699241843385273e-06, + "loss": 0.6679, + "step": 61370 + }, + { + "epoch": 2.427574205540944, + "grad_norm": 1.51978344787089, + "learning_rate": 1.0685021096675468e-06, + "loss": 0.6354, + "step": 61380 + }, + { + "epoch": 2.427969704759833, + "grad_norm": 1.4134533321743648, + "learning_rate": 1.0670808676204636e-06, + "loss": 0.6347, + "step": 61390 + }, + { + "epoch": 2.4283652039787222, + "grad_norm": 1.3841824161300906, + "learning_rate": 1.0656604584982743e-06, + "loss": 0.6463, + "step": 61400 + }, + { + "epoch": 2.4287607031976113, + "grad_norm": 1.7679971822631761, + "learning_rate": 1.0642408826017947e-06, + "loss": 0.6494, + "step": 61410 + }, + { + "epoch": 2.4291562024165003, + "grad_norm": 1.5882746010635962, + "learning_rate": 1.062822140231669e-06, + "loss": 0.6734, + "step": 61420 + }, + { + "epoch": 2.4295517016353894, + "grad_norm": 1.4039767642203624, + "learning_rate": 1.0614042316883605e-06, + "loss": 0.6578, + "step": 61430 + }, + { + "epoch": 2.4299472008542784, + "grad_norm": 1.4293413099723287, + "learning_rate": 1.0599871572721605e-06, + "loss": 0.6457, + "step": 61440 + }, + { + "epoch": 2.4303427000731674, + "grad_norm": 1.5983790144291692, + "learning_rate": 1.0585709172831777e-06, + "loss": 0.6257, + "step": 61450 + }, + { + "epoch": 2.4307381992920565, + "grad_norm": 1.285780355011274, + "learning_rate": 1.0571555120213505e-06, + "loss": 0.6815, + "step": 61460 + }, + { + "epoch": 2.4311336985109455, + "grad_norm": 1.1432723233815385, + "learning_rate": 1.0557409417864367e-06, + "loss": 0.6545, + "step": 61470 + }, + { + "epoch": 2.4315291977298346, + "grad_norm": 1.4830195332034621, + "learning_rate": 1.0543272068780164e-06, + "loss": 0.6535, + "step": 61480 + }, + { + "epoch": 2.4319246969487236, + "grad_norm": 1.4665287621063017, + "learning_rate": 1.0529143075954968e-06, + "loss": 0.6605, + "step": 61490 + }, + { + "epoch": 2.4323201961676126, + "grad_norm": 1.4245650550237787, + "learning_rate": 1.051502244238105e-06, + "loss": 0.6601, + "step": 61500 + }, + { + "epoch": 2.4327156953865017, + "grad_norm": 1.4538133840707317, + "learning_rate": 1.0500910171048894e-06, + "loss": 0.6726, + "step": 61510 + }, + { + "epoch": 2.4331111946053907, + "grad_norm": 1.5405563254994379, + "learning_rate": 1.048680626494728e-06, + "loss": 0.6572, + "step": 61520 + }, + { + "epoch": 2.4335066938242798, + "grad_norm": 1.5526006017400207, + "learning_rate": 1.0472710727063145e-06, + "loss": 0.6522, + "step": 61530 + }, + { + "epoch": 2.433902193043169, + "grad_norm": 1.6113840757233477, + "learning_rate": 1.0458623560381682e-06, + "loss": 0.6011, + "step": 61540 + }, + { + "epoch": 2.434297692262058, + "grad_norm": 1.3529848016486252, + "learning_rate": 1.0444544767886323e-06, + "loss": 0.687, + "step": 61550 + }, + { + "epoch": 2.434693191480947, + "grad_norm": 1.7203540451056272, + "learning_rate": 1.04304743525587e-06, + "loss": 0.6242, + "step": 61560 + }, + { + "epoch": 2.435088690699836, + "grad_norm": 1.5137338493701369, + "learning_rate": 1.0416412317378716e-06, + "loss": 0.6326, + "step": 61570 + }, + { + "epoch": 2.435484189918725, + "grad_norm": 1.5154935200710762, + "learning_rate": 1.040235866532443e-06, + "loss": 0.6585, + "step": 61580 + }, + { + "epoch": 2.435879689137614, + "grad_norm": 1.3313570789298665, + "learning_rate": 1.0388313399372201e-06, + "loss": 0.6935, + "step": 61590 + }, + { + "epoch": 2.436275188356503, + "grad_norm": 1.5742679874702534, + "learning_rate": 1.0374276522496546e-06, + "loss": 0.6518, + "step": 61600 + }, + { + "epoch": 2.436670687575392, + "grad_norm": 1.484978954781372, + "learning_rate": 1.0360248037670273e-06, + "loss": 0.6419, + "step": 61610 + }, + { + "epoch": 2.437066186794281, + "grad_norm": 1.6230100547349011, + "learning_rate": 1.0346227947864323e-06, + "loss": 0.6335, + "step": 61620 + }, + { + "epoch": 2.43746168601317, + "grad_norm": 1.508863979277988, + "learning_rate": 1.033221625604795e-06, + "loss": 0.6358, + "step": 61630 + }, + { + "epoch": 2.437857185232059, + "grad_norm": 1.3207688631361822, + "learning_rate": 1.0318212965188568e-06, + "loss": 0.659, + "step": 61640 + }, + { + "epoch": 2.4382526844509482, + "grad_norm": 1.2422122200091579, + "learning_rate": 1.0304218078251853e-06, + "loss": 0.6612, + "step": 61650 + }, + { + "epoch": 2.4386481836698373, + "grad_norm": 1.290823321606612, + "learning_rate": 1.0290231598201666e-06, + "loss": 0.6836, + "step": 61660 + }, + { + "epoch": 2.4390436828887263, + "grad_norm": 1.3802244898816645, + "learning_rate": 1.0276253528000118e-06, + "loss": 0.6474, + "step": 61670 + }, + { + "epoch": 2.4394391821076153, + "grad_norm": 1.3739471336168543, + "learning_rate": 1.0262283870607509e-06, + "loss": 0.6883, + "step": 61680 + }, + { + "epoch": 2.4398346813265044, + "grad_norm": 1.4467098933249398, + "learning_rate": 1.0248322628982393e-06, + "loss": 0.6872, + "step": 61690 + }, + { + "epoch": 2.4402301805453934, + "grad_norm": 1.1580199054388167, + "learning_rate": 1.02343698060815e-06, + "loss": 0.6613, + "step": 61700 + }, + { + "epoch": 2.4406256797642825, + "grad_norm": 1.6253976706663749, + "learning_rate": 1.0220425404859825e-06, + "loss": 0.625, + "step": 61710 + }, + { + "epoch": 2.4410211789831715, + "grad_norm": 1.7512048321027551, + "learning_rate": 1.0206489428270532e-06, + "loss": 0.6518, + "step": 61720 + }, + { + "epoch": 2.4414166782020605, + "grad_norm": 1.520503957582064, + "learning_rate": 1.0192561879265017e-06, + "loss": 0.6383, + "step": 61730 + }, + { + "epoch": 2.4418121774209496, + "grad_norm": 1.4878472454208505, + "learning_rate": 1.0178642760792923e-06, + "loss": 0.6408, + "step": 61740 + }, + { + "epoch": 2.4422076766398386, + "grad_norm": 1.4681928952925842, + "learning_rate": 1.0164732075802064e-06, + "loss": 0.6744, + "step": 61750 + }, + { + "epoch": 2.4426031758587277, + "grad_norm": 1.5560241461750843, + "learning_rate": 1.0150829827238484e-06, + "loss": 0.6343, + "step": 61760 + }, + { + "epoch": 2.4429986750776167, + "grad_norm": 1.6122360279641574, + "learning_rate": 1.0136936018046433e-06, + "loss": 0.6319, + "step": 61770 + }, + { + "epoch": 2.4433941742965057, + "grad_norm": 1.8002739722284244, + "learning_rate": 1.0123050651168404e-06, + "loss": 0.6488, + "step": 61780 + }, + { + "epoch": 2.443789673515395, + "grad_norm": 1.5156827878034502, + "learning_rate": 1.0109173729545058e-06, + "loss": 0.6635, + "step": 61790 + }, + { + "epoch": 2.444185172734284, + "grad_norm": 1.480946256554298, + "learning_rate": 1.0095305256115306e-06, + "loss": 0.673, + "step": 61800 + }, + { + "epoch": 2.444580671953173, + "grad_norm": 1.6729230238763309, + "learning_rate": 1.0081445233816235e-06, + "loss": 0.6687, + "step": 61810 + }, + { + "epoch": 2.444976171172062, + "grad_norm": 1.4790354983141905, + "learning_rate": 1.006759366558318e-06, + "loss": 0.6674, + "step": 61820 + }, + { + "epoch": 2.445371670390951, + "grad_norm": 1.4564880887960199, + "learning_rate": 1.0053750554349652e-06, + "loss": 0.6431, + "step": 61830 + }, + { + "epoch": 2.44576716960984, + "grad_norm": 1.679158130216397, + "learning_rate": 1.0039915903047398e-06, + "loss": 0.6514, + "step": 61840 + }, + { + "epoch": 2.446162668828729, + "grad_norm": 1.6241238693990425, + "learning_rate": 1.002608971460634e-06, + "loss": 0.6454, + "step": 61850 + }, + { + "epoch": 2.446558168047618, + "grad_norm": 1.4976577718811317, + "learning_rate": 1.001227199195467e-06, + "loss": 0.6832, + "step": 61860 + }, + { + "epoch": 2.446953667266507, + "grad_norm": 1.306892079381734, + "learning_rate": 9.998462738018683e-07, + "loss": 0.674, + "step": 61870 + }, + { + "epoch": 2.447349166485396, + "grad_norm": 1.5629046344378226, + "learning_rate": 9.98466195572299e-07, + "loss": 0.6227, + "step": 61880 + }, + { + "epoch": 2.447744665704285, + "grad_norm": 1.4508346253659112, + "learning_rate": 9.970869647990327e-07, + "loss": 0.6469, + "step": 61890 + }, + { + "epoch": 2.448140164923174, + "grad_norm": 1.9290465565852426, + "learning_rate": 9.957085817741707e-07, + "loss": 0.6317, + "step": 61900 + }, + { + "epoch": 2.4485356641420633, + "grad_norm": 1.7856544279104343, + "learning_rate": 9.943310467896267e-07, + "loss": 0.6417, + "step": 61910 + }, + { + "epoch": 2.4489311633609523, + "grad_norm": 1.55771514639698, + "learning_rate": 9.929543601371428e-07, + "loss": 0.6557, + "step": 61920 + }, + { + "epoch": 2.4493266625798413, + "grad_norm": 1.463084215873033, + "learning_rate": 9.915785221082747e-07, + "loss": 0.6924, + "step": 61930 + }, + { + "epoch": 2.4497221617987304, + "grad_norm": 1.5546262033243476, + "learning_rate": 9.902035329944037e-07, + "loss": 0.6655, + "step": 61940 + }, + { + "epoch": 2.4501176610176194, + "grad_norm": 1.6910192612695814, + "learning_rate": 9.888293930867283e-07, + "loss": 0.6671, + "step": 61950 + }, + { + "epoch": 2.4505131602365084, + "grad_norm": 1.5081076597464609, + "learning_rate": 9.874561026762653e-07, + "loss": 0.6414, + "step": 61960 + }, + { + "epoch": 2.4509086594553975, + "grad_norm": 1.5947135474911969, + "learning_rate": 9.860836620538577e-07, + "loss": 0.6263, + "step": 61970 + }, + { + "epoch": 2.4513041586742865, + "grad_norm": 1.5982495936199257, + "learning_rate": 9.847120715101632e-07, + "loss": 0.6456, + "step": 61980 + }, + { + "epoch": 2.4516996578931756, + "grad_norm": 1.393370621178838, + "learning_rate": 9.833413313356604e-07, + "loss": 0.6419, + "step": 61990 + }, + { + "epoch": 2.4520951571120646, + "grad_norm": 1.4594935501580708, + "learning_rate": 9.819714418206506e-07, + "loss": 0.6573, + "step": 62000 + }, + { + "epoch": 2.4524906563309536, + "grad_norm": 1.3440150645443825, + "learning_rate": 9.806024032552514e-07, + "loss": 0.6674, + "step": 62010 + }, + { + "epoch": 2.4528861555498427, + "grad_norm": 1.503560926056615, + "learning_rate": 9.79234215929401e-07, + "loss": 0.6362, + "step": 62020 + }, + { + "epoch": 2.4532816547687317, + "grad_norm": 1.6815575487442866, + "learning_rate": 9.778668801328606e-07, + "loss": 0.677, + "step": 62030 + }, + { + "epoch": 2.4536771539876208, + "grad_norm": 1.3585129209049398, + "learning_rate": 9.765003961552055e-07, + "loss": 0.6324, + "step": 62040 + }, + { + "epoch": 2.45407265320651, + "grad_norm": 1.2943114263110564, + "learning_rate": 9.751347642858367e-07, + "loss": 0.6516, + "step": 62050 + }, + { + "epoch": 2.454468152425399, + "grad_norm": 1.5810697418849364, + "learning_rate": 9.737699848139687e-07, + "loss": 0.6421, + "step": 62060 + }, + { + "epoch": 2.454863651644288, + "grad_norm": 1.3837798597260171, + "learning_rate": 9.724060580286409e-07, + "loss": 0.6578, + "step": 62070 + }, + { + "epoch": 2.455259150863177, + "grad_norm": 1.4030850214162944, + "learning_rate": 9.710429842187075e-07, + "loss": 0.6635, + "step": 62080 + }, + { + "epoch": 2.455654650082066, + "grad_norm": 1.2935157562658848, + "learning_rate": 9.696807636728461e-07, + "loss": 0.6587, + "step": 62090 + }, + { + "epoch": 2.456050149300955, + "grad_norm": 1.4275532812321332, + "learning_rate": 9.683193966795512e-07, + "loss": 0.6443, + "step": 62100 + }, + { + "epoch": 2.456445648519844, + "grad_norm": 1.363085813590042, + "learning_rate": 9.669588835271366e-07, + "loss": 0.6697, + "step": 62110 + }, + { + "epoch": 2.456841147738733, + "grad_norm": 1.3131016348851743, + "learning_rate": 9.655992245037337e-07, + "loss": 0.6923, + "step": 62120 + }, + { + "epoch": 2.457236646957622, + "grad_norm": 1.4559672738106249, + "learning_rate": 9.642404198972988e-07, + "loss": 0.6379, + "step": 62130 + }, + { + "epoch": 2.457632146176511, + "grad_norm": 1.4758187857817757, + "learning_rate": 9.628824699956002e-07, + "loss": 0.6303, + "step": 62140 + }, + { + "epoch": 2.4580276453954, + "grad_norm": 1.4862585199584466, + "learning_rate": 9.615253750862308e-07, + "loss": 0.6242, + "step": 62150 + }, + { + "epoch": 2.4584231446142892, + "grad_norm": 1.66444901556112, + "learning_rate": 9.601691354565979e-07, + "loss": 0.6308, + "step": 62160 + }, + { + "epoch": 2.4588186438331783, + "grad_norm": 1.1451174595820228, + "learning_rate": 9.588137513939322e-07, + "loss": 0.6826, + "step": 62170 + }, + { + "epoch": 2.4592141430520673, + "grad_norm": 1.445211288315692, + "learning_rate": 9.574592231852797e-07, + "loss": 0.6555, + "step": 62180 + }, + { + "epoch": 2.4596096422709564, + "grad_norm": 1.3547554940248092, + "learning_rate": 9.561055511175048e-07, + "loss": 0.6102, + "step": 62190 + }, + { + "epoch": 2.4600051414898454, + "grad_norm": 1.5317981706398662, + "learning_rate": 9.547527354772951e-07, + "loss": 0.6432, + "step": 62200 + }, + { + "epoch": 2.4604006407087344, + "grad_norm": 1.5952520238558352, + "learning_rate": 9.534007765511505e-07, + "loss": 0.6552, + "step": 62210 + }, + { + "epoch": 2.4607961399276235, + "grad_norm": 1.2482111505444335, + "learning_rate": 9.520496746253955e-07, + "loss": 0.6057, + "step": 62220 + }, + { + "epoch": 2.4611916391465125, + "grad_norm": 1.3345106448942112, + "learning_rate": 9.506994299861693e-07, + "loss": 0.6436, + "step": 62230 + }, + { + "epoch": 2.4615871383654016, + "grad_norm": 1.2860768302922536, + "learning_rate": 9.493500429194302e-07, + "loss": 0.66, + "step": 62240 + }, + { + "epoch": 2.4619826375842906, + "grad_norm": 1.7214272374023878, + "learning_rate": 9.480015137109544e-07, + "loss": 0.6282, + "step": 62250 + }, + { + "epoch": 2.4623781368031796, + "grad_norm": 1.153778145211568, + "learning_rate": 9.466538426463395e-07, + "loss": 0.6429, + "step": 62260 + }, + { + "epoch": 2.4627736360220687, + "grad_norm": 1.377254642963884, + "learning_rate": 9.453070300109968e-07, + "loss": 0.6412, + "step": 62270 + }, + { + "epoch": 2.4631691352409577, + "grad_norm": 1.3144104816162459, + "learning_rate": 9.439610760901608e-07, + "loss": 0.6704, + "step": 62280 + }, + { + "epoch": 2.463564634459847, + "grad_norm": 1.410960223425946, + "learning_rate": 9.42615981168879e-07, + "loss": 0.6663, + "step": 62290 + }, + { + "epoch": 2.463960133678736, + "grad_norm": 1.548955934356688, + "learning_rate": 9.412717455320214e-07, + "loss": 0.6599, + "step": 62300 + }, + { + "epoch": 2.4643556328976253, + "grad_norm": 1.8830095633276083, + "learning_rate": 9.399283694642719e-07, + "loss": 0.6237, + "step": 62310 + }, + { + "epoch": 2.464751132116514, + "grad_norm": 1.4858962781557385, + "learning_rate": 9.385858532501374e-07, + "loss": 0.6399, + "step": 62320 + }, + { + "epoch": 2.4651466313354033, + "grad_norm": 1.1758318344683238, + "learning_rate": 9.372441971739371e-07, + "loss": 0.6445, + "step": 62330 + }, + { + "epoch": 2.465542130554292, + "grad_norm": 1.51863280205433, + "learning_rate": 9.359034015198126e-07, + "loss": 0.6541, + "step": 62340 + }, + { + "epoch": 2.4659376297731814, + "grad_norm": 1.6331341703058395, + "learning_rate": 9.345634665717212e-07, + "loss": 0.6306, + "step": 62350 + }, + { + "epoch": 2.46633312899207, + "grad_norm": 1.5850832986333157, + "learning_rate": 9.332243926134377e-07, + "loss": 0.6371, + "step": 62360 + }, + { + "epoch": 2.4667286282109595, + "grad_norm": 1.4918165819158464, + "learning_rate": 9.318861799285539e-07, + "loss": 0.6556, + "step": 62370 + }, + { + "epoch": 2.467124127429848, + "grad_norm": 1.729307261313895, + "learning_rate": 9.305488288004827e-07, + "loss": 0.659, + "step": 62380 + }, + { + "epoch": 2.4675196266487376, + "grad_norm": 1.2589755586999247, + "learning_rate": 9.292123395124498e-07, + "loss": 0.6528, + "step": 62390 + }, + { + "epoch": 2.467915125867626, + "grad_norm": 1.4258865980902158, + "learning_rate": 9.27876712347503e-07, + "loss": 0.6558, + "step": 62400 + }, + { + "epoch": 2.4683106250865157, + "grad_norm": 1.5996889358058768, + "learning_rate": 9.26541947588504e-07, + "loss": 0.6538, + "step": 62410 + }, + { + "epoch": 2.4687061243054043, + "grad_norm": 1.7470968860875724, + "learning_rate": 9.252080455181323e-07, + "loss": 0.6403, + "step": 62420 + }, + { + "epoch": 2.4691016235242937, + "grad_norm": 1.5989633140410042, + "learning_rate": 9.238750064188873e-07, + "loss": 0.658, + "step": 62430 + }, + { + "epoch": 2.4694971227431823, + "grad_norm": 1.4565099791856557, + "learning_rate": 9.225428305730822e-07, + "loss": 0.6592, + "step": 62440 + }, + { + "epoch": 2.469892621962072, + "grad_norm": 1.4378637988652545, + "learning_rate": 9.212115182628506e-07, + "loss": 0.637, + "step": 62450 + }, + { + "epoch": 2.470288121180961, + "grad_norm": 1.5468024163108722, + "learning_rate": 9.1988106977014e-07, + "loss": 0.6547, + "step": 62460 + }, + { + "epoch": 2.47068362039985, + "grad_norm": 1.3753385287803068, + "learning_rate": 9.185514853767197e-07, + "loss": 0.6751, + "step": 62470 + }, + { + "epoch": 2.471079119618739, + "grad_norm": 1.1817099948942396, + "learning_rate": 9.172227653641686e-07, + "loss": 0.6297, + "step": 62480 + }, + { + "epoch": 2.471474618837628, + "grad_norm": 1.5115803225740767, + "learning_rate": 9.158949100138897e-07, + "loss": 0.6344, + "step": 62490 + }, + { + "epoch": 2.471870118056517, + "grad_norm": 1.6701593994570985, + "learning_rate": 9.145679196070978e-07, + "loss": 0.6627, + "step": 62500 + }, + { + "epoch": 2.472265617275406, + "grad_norm": 1.3432859419678334, + "learning_rate": 9.132417944248295e-07, + "loss": 0.658, + "step": 62510 + }, + { + "epoch": 2.472661116494295, + "grad_norm": 1.7223608956543923, + "learning_rate": 9.119165347479331e-07, + "loss": 0.6302, + "step": 62520 + }, + { + "epoch": 2.473056615713184, + "grad_norm": 1.3084200109269424, + "learning_rate": 9.105921408570778e-07, + "loss": 0.6594, + "step": 62530 + }, + { + "epoch": 2.473452114932073, + "grad_norm": 1.737087609201176, + "learning_rate": 9.092686130327455e-07, + "loss": 0.6528, + "step": 62540 + }, + { + "epoch": 2.473847614150962, + "grad_norm": 1.4305464176140446, + "learning_rate": 9.079459515552385e-07, + "loss": 0.6803, + "step": 62550 + }, + { + "epoch": 2.4742431133698513, + "grad_norm": 1.429248974699674, + "learning_rate": 9.066241567046724e-07, + "loss": 0.6401, + "step": 62560 + }, + { + "epoch": 2.4746386125887403, + "grad_norm": 1.4025526118836549, + "learning_rate": 9.053032287609825e-07, + "loss": 0.6517, + "step": 62570 + }, + { + "epoch": 2.4750341118076293, + "grad_norm": 1.667657266035966, + "learning_rate": 9.039831680039162e-07, + "loss": 0.6217, + "step": 62580 + }, + { + "epoch": 2.4754296110265184, + "grad_norm": 1.6871116170448464, + "learning_rate": 9.026639747130444e-07, + "loss": 0.6623, + "step": 62590 + }, + { + "epoch": 2.4758251102454074, + "grad_norm": 1.552351408301868, + "learning_rate": 9.013456491677431e-07, + "loss": 0.6752, + "step": 62600 + }, + { + "epoch": 2.4762206094642965, + "grad_norm": 1.5074626328497924, + "learning_rate": 9.000281916472159e-07, + "loss": 0.6335, + "step": 62610 + }, + { + "epoch": 2.4766161086831855, + "grad_norm": 1.3484547178956428, + "learning_rate": 8.987116024304754e-07, + "loss": 0.6698, + "step": 62620 + }, + { + "epoch": 2.4770116079020745, + "grad_norm": 1.3605788799841034, + "learning_rate": 8.973958817963541e-07, + "loss": 0.6492, + "step": 62630 + }, + { + "epoch": 2.4774071071209636, + "grad_norm": 1.4256248434597538, + "learning_rate": 8.960810300234973e-07, + "loss": 0.6488, + "step": 62640 + }, + { + "epoch": 2.4778026063398526, + "grad_norm": 1.5857225928372, + "learning_rate": 8.947670473903708e-07, + "loss": 0.678, + "step": 62650 + }, + { + "epoch": 2.4781981055587416, + "grad_norm": 1.3916001682373047, + "learning_rate": 8.934539341752518e-07, + "loss": 0.6202, + "step": 62660 + }, + { + "epoch": 2.4785936047776307, + "grad_norm": 1.5879844648146813, + "learning_rate": 8.921416906562342e-07, + "loss": 0.6839, + "step": 62670 + }, + { + "epoch": 2.4789891039965197, + "grad_norm": 1.610584180477446, + "learning_rate": 8.90830317111231e-07, + "loss": 0.657, + "step": 62680 + }, + { + "epoch": 2.4793846032154088, + "grad_norm": 1.462989022813284, + "learning_rate": 8.895198138179662e-07, + "loss": 0.661, + "step": 62690 + }, + { + "epoch": 2.479780102434298, + "grad_norm": 1.346729610438674, + "learning_rate": 8.88210181053985e-07, + "loss": 0.6347, + "step": 62700 + }, + { + "epoch": 2.480175601653187, + "grad_norm": 1.404694002954589, + "learning_rate": 8.869014190966419e-07, + "loss": 0.6651, + "step": 62710 + }, + { + "epoch": 2.480571100872076, + "grad_norm": 1.4416693004426953, + "learning_rate": 8.855935282231148e-07, + "loss": 0.6374, + "step": 62720 + }, + { + "epoch": 2.480966600090965, + "grad_norm": 1.3913758857778433, + "learning_rate": 8.842865087103863e-07, + "loss": 0.6239, + "step": 62730 + }, + { + "epoch": 2.481362099309854, + "grad_norm": 1.440279853761296, + "learning_rate": 8.82980360835266e-07, + "loss": 0.6681, + "step": 62740 + }, + { + "epoch": 2.481757598528743, + "grad_norm": 1.5505994266108365, + "learning_rate": 8.816750848743705e-07, + "loss": 0.6532, + "step": 62750 + }, + { + "epoch": 2.482153097747632, + "grad_norm": 2.1636942681583813, + "learning_rate": 8.803706811041373e-07, + "loss": 0.6201, + "step": 62760 + }, + { + "epoch": 2.482548596966521, + "grad_norm": 1.7695730589459897, + "learning_rate": 8.790671498008141e-07, + "loss": 0.6165, + "step": 62770 + }, + { + "epoch": 2.48294409618541, + "grad_norm": 1.4994018571311274, + "learning_rate": 8.777644912404693e-07, + "loss": 0.6417, + "step": 62780 + }, + { + "epoch": 2.483339595404299, + "grad_norm": 1.176276158836899, + "learning_rate": 8.76462705698981e-07, + "loss": 0.6257, + "step": 62790 + }, + { + "epoch": 2.483735094623188, + "grad_norm": 1.4198950469951166, + "learning_rate": 8.751617934520479e-07, + "loss": 0.633, + "step": 62800 + }, + { + "epoch": 2.4841305938420772, + "grad_norm": 1.5267975687121929, + "learning_rate": 8.738617547751776e-07, + "loss": 0.6494, + "step": 62810 + }, + { + "epoch": 2.4845260930609663, + "grad_norm": 1.4114562111610234, + "learning_rate": 8.725625899436996e-07, + "loss": 0.6267, + "step": 62820 + }, + { + "epoch": 2.4849215922798553, + "grad_norm": 1.7341174176053717, + "learning_rate": 8.71264299232753e-07, + "loss": 0.6501, + "step": 62830 + }, + { + "epoch": 2.4853170914987444, + "grad_norm": 1.5297476215036843, + "learning_rate": 8.699668829172936e-07, + "loss": 0.6792, + "step": 62840 + }, + { + "epoch": 2.4857125907176334, + "grad_norm": 1.585368319975606, + "learning_rate": 8.686703412720904e-07, + "loss": 0.6261, + "step": 62850 + }, + { + "epoch": 2.4861080899365224, + "grad_norm": 1.430888819363717, + "learning_rate": 8.673746745717321e-07, + "loss": 0.6668, + "step": 62860 + }, + { + "epoch": 2.4865035891554115, + "grad_norm": 1.309847223506252, + "learning_rate": 8.660798830906148e-07, + "loss": 0.6486, + "step": 62870 + }, + { + "epoch": 2.4868990883743005, + "grad_norm": 1.6277582586387795, + "learning_rate": 8.647859671029568e-07, + "loss": 0.6608, + "step": 62880 + }, + { + "epoch": 2.4872945875931896, + "grad_norm": 1.5101654507004232, + "learning_rate": 8.634929268827852e-07, + "loss": 0.6679, + "step": 62890 + }, + { + "epoch": 2.4876900868120786, + "grad_norm": 1.3435015396226426, + "learning_rate": 8.622007627039436e-07, + "loss": 0.6634, + "step": 62900 + }, + { + "epoch": 2.4880855860309676, + "grad_norm": 1.6139767103014016, + "learning_rate": 8.60909474840092e-07, + "loss": 0.6746, + "step": 62910 + }, + { + "epoch": 2.4884810852498567, + "grad_norm": 1.3277232870043423, + "learning_rate": 8.596190635647006e-07, + "loss": 0.682, + "step": 62920 + }, + { + "epoch": 2.4888765844687457, + "grad_norm": 1.458653472983833, + "learning_rate": 8.583295291510591e-07, + "loss": 0.638, + "step": 62930 + }, + { + "epoch": 2.4892720836876348, + "grad_norm": 1.2215171501564577, + "learning_rate": 8.570408718722656e-07, + "loss": 0.6495, + "step": 62940 + }, + { + "epoch": 2.489667582906524, + "grad_norm": 1.5068448611067728, + "learning_rate": 8.557530920012396e-07, + "loss": 0.6619, + "step": 62950 + }, + { + "epoch": 2.490063082125413, + "grad_norm": 1.3211681548482772, + "learning_rate": 8.544661898107081e-07, + "loss": 0.6537, + "step": 62960 + }, + { + "epoch": 2.490458581344302, + "grad_norm": 1.4480668111754622, + "learning_rate": 8.531801655732158e-07, + "loss": 0.6267, + "step": 62970 + }, + { + "epoch": 2.490854080563191, + "grad_norm": 1.5213505338685824, + "learning_rate": 8.518950195611192e-07, + "loss": 0.6482, + "step": 62980 + }, + { + "epoch": 2.49124957978208, + "grad_norm": 1.4109406820508017, + "learning_rate": 8.506107520465923e-07, + "loss": 0.679, + "step": 62990 + }, + { + "epoch": 2.491645079000969, + "grad_norm": 1.7065792918642062, + "learning_rate": 8.493273633016186e-07, + "loss": 0.6331, + "step": 63000 + }, + { + "epoch": 2.492040578219858, + "grad_norm": 1.238981061339666, + "learning_rate": 8.480448535980008e-07, + "loss": 0.6905, + "step": 63010 + }, + { + "epoch": 2.492436077438747, + "grad_norm": 1.4103121127250444, + "learning_rate": 8.467632232073497e-07, + "loss": 0.6741, + "step": 63020 + }, + { + "epoch": 2.492831576657636, + "grad_norm": 1.474332623979177, + "learning_rate": 8.454824724010951e-07, + "loss": 0.6304, + "step": 63030 + }, + { + "epoch": 2.493227075876525, + "grad_norm": 1.6107543437688416, + "learning_rate": 8.442026014504756e-07, + "loss": 0.6067, + "step": 63040 + }, + { + "epoch": 2.493622575095414, + "grad_norm": 1.2994985351397115, + "learning_rate": 8.429236106265482e-07, + "loss": 0.6561, + "step": 63050 + }, + { + "epoch": 2.4940180743143032, + "grad_norm": 1.2811967071546007, + "learning_rate": 8.416455002001806e-07, + "loss": 0.6239, + "step": 63060 + }, + { + "epoch": 2.4944135735331923, + "grad_norm": 1.6154029237172847, + "learning_rate": 8.40368270442053e-07, + "loss": 0.6649, + "step": 63070 + }, + { + "epoch": 2.4948090727520813, + "grad_norm": 1.430999136350645, + "learning_rate": 8.390919216226634e-07, + "loss": 0.6348, + "step": 63080 + }, + { + "epoch": 2.4952045719709703, + "grad_norm": 1.4004336370578108, + "learning_rate": 8.378164540123191e-07, + "loss": 0.6403, + "step": 63090 + }, + { + "epoch": 2.4956000711898594, + "grad_norm": 1.6829120560389035, + "learning_rate": 8.365418678811416e-07, + "loss": 0.6494, + "step": 63100 + }, + { + "epoch": 2.4959955704087484, + "grad_norm": 1.8855314343538596, + "learning_rate": 8.352681634990683e-07, + "loss": 0.6319, + "step": 63110 + }, + { + "epoch": 2.4963910696276375, + "grad_norm": 1.6200870333688646, + "learning_rate": 8.339953411358471e-07, + "loss": 0.6278, + "step": 63120 + }, + { + "epoch": 2.4967865688465265, + "grad_norm": 1.421846388191181, + "learning_rate": 8.327234010610385e-07, + "loss": 0.6179, + "step": 63130 + }, + { + "epoch": 2.4971820680654155, + "grad_norm": 1.5421255745157936, + "learning_rate": 8.314523435440203e-07, + "loss": 0.6267, + "step": 63140 + }, + { + "epoch": 2.4975775672843046, + "grad_norm": 1.6931529358451676, + "learning_rate": 8.301821688539785e-07, + "loss": 0.6251, + "step": 63150 + }, + { + "epoch": 2.4979730665031936, + "grad_norm": 1.4438046039011787, + "learning_rate": 8.289128772599159e-07, + "loss": 0.6507, + "step": 63160 + }, + { + "epoch": 2.4983685657220827, + "grad_norm": 1.5983460025928997, + "learning_rate": 8.276444690306451e-07, + "loss": 0.6626, + "step": 63170 + }, + { + "epoch": 2.4987640649409717, + "grad_norm": 1.7811642513131678, + "learning_rate": 8.263769444347957e-07, + "loss": 0.62, + "step": 63180 + }, + { + "epoch": 2.4991595641598607, + "grad_norm": 1.420844575349242, + "learning_rate": 8.251103037408043e-07, + "loss": 0.6636, + "step": 63190 + }, + { + "epoch": 2.4995550633787498, + "grad_norm": 1.895786180603178, + "learning_rate": 8.238445472169271e-07, + "loss": 0.6418, + "step": 63200 + }, + { + "epoch": 2.499950562597639, + "grad_norm": 1.3154920560413872, + "learning_rate": 8.225796751312282e-07, + "loss": 0.6348, + "step": 63210 + }, + { + "epoch": 2.500346061816528, + "grad_norm": 1.5507445316264594, + "learning_rate": 8.21315687751586e-07, + "loss": 0.6517, + "step": 63220 + }, + { + "epoch": 2.500741561035417, + "grad_norm": 1.3246659817953341, + "learning_rate": 8.200525853456892e-07, + "loss": 0.6591, + "step": 63230 + }, + { + "epoch": 2.501137060254306, + "grad_norm": 1.5009986996407279, + "learning_rate": 8.187903681810449e-07, + "loss": 0.6518, + "step": 63240 + }, + { + "epoch": 2.501532559473195, + "grad_norm": 1.3898452371231547, + "learning_rate": 8.17529036524966e-07, + "loss": 0.6702, + "step": 63250 + }, + { + "epoch": 2.501928058692084, + "grad_norm": 1.3773557734199384, + "learning_rate": 8.162685906445833e-07, + "loss": 0.636, + "step": 63260 + }, + { + "epoch": 2.502323557910973, + "grad_norm": 1.6405808013267522, + "learning_rate": 8.150090308068359e-07, + "loss": 0.678, + "step": 63270 + }, + { + "epoch": 2.502719057129862, + "grad_norm": 1.870128765345886, + "learning_rate": 8.137503572784782e-07, + "loss": 0.6503, + "step": 63280 + }, + { + "epoch": 2.503114556348751, + "grad_norm": 1.491973614542632, + "learning_rate": 8.12492570326075e-07, + "loss": 0.6489, + "step": 63290 + }, + { + "epoch": 2.50351005556764, + "grad_norm": 1.647248289622151, + "learning_rate": 8.112356702160046e-07, + "loss": 0.667, + "step": 63300 + }, + { + "epoch": 2.503905554786529, + "grad_norm": 1.4382096851185189, + "learning_rate": 8.09979657214457e-07, + "loss": 0.6597, + "step": 63310 + }, + { + "epoch": 2.5043010540054182, + "grad_norm": 1.4127706331466097, + "learning_rate": 8.08724531587432e-07, + "loss": 0.6322, + "step": 63320 + }, + { + "epoch": 2.5046965532243073, + "grad_norm": 1.8051513070023004, + "learning_rate": 8.074702936007472e-07, + "loss": 0.6568, + "step": 63330 + }, + { + "epoch": 2.5050920524431963, + "grad_norm": 1.8887772380015515, + "learning_rate": 8.062169435200267e-07, + "loss": 0.6395, + "step": 63340 + }, + { + "epoch": 2.5054875516620854, + "grad_norm": 1.5912545429694345, + "learning_rate": 8.049644816107094e-07, + "loss": 0.5802, + "step": 63350 + }, + { + "epoch": 2.5058830508809744, + "grad_norm": 1.6595292247544349, + "learning_rate": 8.037129081380429e-07, + "loss": 0.6536, + "step": 63360 + }, + { + "epoch": 2.5062785500998634, + "grad_norm": 1.4623974840906868, + "learning_rate": 8.024622233670926e-07, + "loss": 0.6474, + "step": 63370 + }, + { + "epoch": 2.5066740493187525, + "grad_norm": 1.4673617443372873, + "learning_rate": 8.012124275627292e-07, + "loss": 0.6596, + "step": 63380 + }, + { + "epoch": 2.5070695485376415, + "grad_norm": 1.7589369671854493, + "learning_rate": 7.9996352098964e-07, + "loss": 0.6719, + "step": 63390 + }, + { + "epoch": 2.5074650477565306, + "grad_norm": 1.216763752073658, + "learning_rate": 7.987155039123201e-07, + "loss": 0.6581, + "step": 63400 + }, + { + "epoch": 2.5078605469754196, + "grad_norm": 1.6247504919023326, + "learning_rate": 7.974683765950808e-07, + "loss": 0.6476, + "step": 63410 + }, + { + "epoch": 2.5082560461943086, + "grad_norm": 1.6316927824068348, + "learning_rate": 7.962221393020392e-07, + "loss": 0.6482, + "step": 63420 + }, + { + "epoch": 2.5086515454131977, + "grad_norm": 1.523106900589352, + "learning_rate": 7.949767922971302e-07, + "loss": 0.6467, + "step": 63430 + }, + { + "epoch": 2.5090470446320867, + "grad_norm": 1.5718266179477145, + "learning_rate": 7.937323358440935e-07, + "loss": 0.6564, + "step": 63440 + }, + { + "epoch": 2.5094425438509758, + "grad_norm": 1.6197619057117363, + "learning_rate": 7.924887702064882e-07, + "loss": 0.6044, + "step": 63450 + }, + { + "epoch": 2.509838043069865, + "grad_norm": 1.6228221150341298, + "learning_rate": 7.912460956476753e-07, + "loss": 0.6674, + "step": 63460 + }, + { + "epoch": 2.510233542288754, + "grad_norm": 1.4876497457779427, + "learning_rate": 7.900043124308354e-07, + "loss": 0.6816, + "step": 63470 + }, + { + "epoch": 2.510629041507643, + "grad_norm": 1.653460457016791, + "learning_rate": 7.887634208189543e-07, + "loss": 0.654, + "step": 63480 + }, + { + "epoch": 2.511024540726532, + "grad_norm": 1.3788636207437148, + "learning_rate": 7.875234210748339e-07, + "loss": 0.6551, + "step": 63490 + }, + { + "epoch": 2.511420039945421, + "grad_norm": 1.3921040566769074, + "learning_rate": 7.862843134610832e-07, + "loss": 0.6709, + "step": 63500 + }, + { + "epoch": 2.51181553916431, + "grad_norm": 1.469479419151567, + "learning_rate": 7.850460982401259e-07, + "loss": 0.6693, + "step": 63510 + }, + { + "epoch": 2.512211038383199, + "grad_norm": 1.8575011969455981, + "learning_rate": 7.838087756741919e-07, + "loss": 0.6254, + "step": 63520 + }, + { + "epoch": 2.512606537602088, + "grad_norm": 1.4649251746976057, + "learning_rate": 7.825723460253282e-07, + "loss": 0.6306, + "step": 63530 + }, + { + "epoch": 2.513002036820977, + "grad_norm": 1.6339690068494417, + "learning_rate": 7.81336809555388e-07, + "loss": 0.6609, + "step": 63540 + }, + { + "epoch": 2.513397536039866, + "grad_norm": 1.1678016073328654, + "learning_rate": 7.801021665260355e-07, + "loss": 0.6617, + "step": 63550 + }, + { + "epoch": 2.5137930352587556, + "grad_norm": 1.5747655237636606, + "learning_rate": 7.788684171987487e-07, + "loss": 0.6253, + "step": 63560 + }, + { + "epoch": 2.5141885344776442, + "grad_norm": 1.5584210004059487, + "learning_rate": 7.776355618348142e-07, + "loss": 0.6566, + "step": 63570 + }, + { + "epoch": 2.5145840336965337, + "grad_norm": 1.3341122680843094, + "learning_rate": 7.764036006953285e-07, + "loss": 0.6715, + "step": 63580 + }, + { + "epoch": 2.5149795329154223, + "grad_norm": 1.5556587834951154, + "learning_rate": 7.751725340412014e-07, + "loss": 0.6418, + "step": 63590 + }, + { + "epoch": 2.515375032134312, + "grad_norm": 1.3525840074132167, + "learning_rate": 7.73942362133151e-07, + "loss": 0.6608, + "step": 63600 + }, + { + "epoch": 2.5157705313532004, + "grad_norm": 1.4780082058622266, + "learning_rate": 7.72713085231706e-07, + "loss": 0.6422, + "step": 63610 + }, + { + "epoch": 2.51616603057209, + "grad_norm": 1.2975374943721156, + "learning_rate": 7.714847035972078e-07, + "loss": 0.6751, + "step": 63620 + }, + { + "epoch": 2.5165615297909785, + "grad_norm": 1.2297419535072853, + "learning_rate": 7.702572174898043e-07, + "loss": 0.6747, + "step": 63630 + }, + { + "epoch": 2.516957029009868, + "grad_norm": 1.7581706604975569, + "learning_rate": 7.690306271694587e-07, + "loss": 0.6347, + "step": 63640 + }, + { + "epoch": 2.5173525282287565, + "grad_norm": 1.3210786059057584, + "learning_rate": 7.678049328959391e-07, + "loss": 0.644, + "step": 63650 + }, + { + "epoch": 2.517748027447646, + "grad_norm": 1.4356136901919918, + "learning_rate": 7.665801349288294e-07, + "loss": 0.6156, + "step": 63660 + }, + { + "epoch": 2.5181435266665346, + "grad_norm": 1.855447833296287, + "learning_rate": 7.653562335275183e-07, + "loss": 0.6433, + "step": 63670 + }, + { + "epoch": 2.518539025885424, + "grad_norm": 1.4234518359546702, + "learning_rate": 7.641332289512094e-07, + "loss": 0.6532, + "step": 63680 + }, + { + "epoch": 2.5189345251043127, + "grad_norm": 1.606622494532379, + "learning_rate": 7.629111214589114e-07, + "loss": 0.6306, + "step": 63690 + }, + { + "epoch": 2.519330024323202, + "grad_norm": 1.3417351965958764, + "learning_rate": 7.616899113094495e-07, + "loss": 0.6412, + "step": 63700 + }, + { + "epoch": 2.519725523542091, + "grad_norm": 1.4463619080999224, + "learning_rate": 7.604695987614508e-07, + "loss": 0.6473, + "step": 63710 + }, + { + "epoch": 2.5201210227609803, + "grad_norm": 1.5176804577662928, + "learning_rate": 7.592501840733602e-07, + "loss": 0.6089, + "step": 63720 + }, + { + "epoch": 2.520516521979869, + "grad_norm": 1.1632054246897647, + "learning_rate": 7.580316675034255e-07, + "loss": 0.6658, + "step": 63730 + }, + { + "epoch": 2.5209120211987583, + "grad_norm": 1.652567896807858, + "learning_rate": 7.56814049309711e-07, + "loss": 0.6341, + "step": 63740 + }, + { + "epoch": 2.521307520417647, + "grad_norm": 1.6090079031867557, + "learning_rate": 7.555973297500841e-07, + "loss": 0.6301, + "step": 63750 + }, + { + "epoch": 2.5217030196365364, + "grad_norm": 1.5197996184173634, + "learning_rate": 7.543815090822288e-07, + "loss": 0.6535, + "step": 63760 + }, + { + "epoch": 2.522098518855425, + "grad_norm": 1.3457718741471285, + "learning_rate": 7.531665875636324e-07, + "loss": 0.673, + "step": 63770 + }, + { + "epoch": 2.5224940180743145, + "grad_norm": 1.3066599926141607, + "learning_rate": 7.519525654515941e-07, + "loss": 0.6569, + "step": 63780 + }, + { + "epoch": 2.522889517293203, + "grad_norm": 1.4335640762244388, + "learning_rate": 7.507394430032255e-07, + "loss": 0.6321, + "step": 63790 + }, + { + "epoch": 2.5232850165120926, + "grad_norm": 1.4828753579370653, + "learning_rate": 7.495272204754428e-07, + "loss": 0.6175, + "step": 63800 + }, + { + "epoch": 2.523680515730981, + "grad_norm": 1.7804579988153775, + "learning_rate": 7.483158981249761e-07, + "loss": 0.6413, + "step": 63810 + }, + { + "epoch": 2.5240760149498707, + "grad_norm": 1.5587827224832627, + "learning_rate": 7.471054762083608e-07, + "loss": 0.6523, + "step": 63820 + }, + { + "epoch": 2.5244715141687593, + "grad_norm": 1.242734199945498, + "learning_rate": 7.45895954981945e-07, + "loss": 0.6569, + "step": 63830 + }, + { + "epoch": 2.5248670133876487, + "grad_norm": 1.1548645219013924, + "learning_rate": 7.446873347018824e-07, + "loss": 0.6389, + "step": 63840 + }, + { + "epoch": 2.5252625126065373, + "grad_norm": 1.390645330296244, + "learning_rate": 7.434796156241403e-07, + "loss": 0.6637, + "step": 63850 + }, + { + "epoch": 2.525658011825427, + "grad_norm": 1.4611925248930342, + "learning_rate": 7.422727980044914e-07, + "loss": 0.6433, + "step": 63860 + }, + { + "epoch": 2.5260535110443154, + "grad_norm": 1.2388985783426072, + "learning_rate": 7.410668820985206e-07, + "loss": 0.6327, + "step": 63870 + }, + { + "epoch": 2.526449010263205, + "grad_norm": 1.4914892348947144, + "learning_rate": 7.398618681616182e-07, + "loss": 0.6289, + "step": 63880 + }, + { + "epoch": 2.526844509482094, + "grad_norm": 1.4462997489650091, + "learning_rate": 7.386577564489877e-07, + "loss": 0.6535, + "step": 63890 + }, + { + "epoch": 2.527240008700983, + "grad_norm": 1.5843617688194136, + "learning_rate": 7.374545472156374e-07, + "loss": 0.6201, + "step": 63900 + }, + { + "epoch": 2.527635507919872, + "grad_norm": 1.3444249863623576, + "learning_rate": 7.36252240716388e-07, + "loss": 0.6341, + "step": 63910 + }, + { + "epoch": 2.528031007138761, + "grad_norm": 1.5466589150827639, + "learning_rate": 7.350508372058657e-07, + "loss": 0.6555, + "step": 63920 + }, + { + "epoch": 2.52842650635765, + "grad_norm": 1.4055301225211925, + "learning_rate": 7.338503369385097e-07, + "loss": 0.6854, + "step": 63930 + }, + { + "epoch": 2.528822005576539, + "grad_norm": 1.5328363216445886, + "learning_rate": 7.326507401685634e-07, + "loss": 0.6965, + "step": 63940 + }, + { + "epoch": 2.529217504795428, + "grad_norm": 1.6822995299394083, + "learning_rate": 7.314520471500813e-07, + "loss": 0.6335, + "step": 63950 + }, + { + "epoch": 2.529613004014317, + "grad_norm": 1.7948228501110086, + "learning_rate": 7.302542581369254e-07, + "loss": 0.6182, + "step": 63960 + }, + { + "epoch": 2.5300085032332063, + "grad_norm": 1.5126533728553917, + "learning_rate": 7.290573733827683e-07, + "loss": 0.6239, + "step": 63970 + }, + { + "epoch": 2.5304040024520953, + "grad_norm": 1.4597173872060147, + "learning_rate": 7.278613931410877e-07, + "loss": 0.6487, + "step": 63980 + }, + { + "epoch": 2.5307995016709843, + "grad_norm": 1.4037056788550275, + "learning_rate": 7.266663176651739e-07, + "loss": 0.6566, + "step": 63990 + }, + { + "epoch": 2.5311950008898734, + "grad_norm": 1.3656886098733774, + "learning_rate": 7.254721472081227e-07, + "loss": 0.6514, + "step": 64000 + }, + { + "epoch": 2.5315905001087624, + "grad_norm": 1.7648473525926431, + "learning_rate": 7.242788820228381e-07, + "loss": 0.6673, + "step": 64010 + }, + { + "epoch": 2.5319859993276514, + "grad_norm": 1.4951716781466082, + "learning_rate": 7.230865223620342e-07, + "loss": 0.6656, + "step": 64020 + }, + { + "epoch": 2.5323814985465405, + "grad_norm": 1.518394802150982, + "learning_rate": 7.218950684782311e-07, + "loss": 0.6527, + "step": 64030 + }, + { + "epoch": 2.5327769977654295, + "grad_norm": 1.6029453925841384, + "learning_rate": 7.207045206237601e-07, + "loss": 0.6221, + "step": 64040 + }, + { + "epoch": 2.5331724969843186, + "grad_norm": 1.3012678994012925, + "learning_rate": 7.195148790507572e-07, + "loss": 0.6857, + "step": 64050 + }, + { + "epoch": 2.5335679962032076, + "grad_norm": 1.4442843991955365, + "learning_rate": 7.18326144011171e-07, + "loss": 0.6308, + "step": 64060 + }, + { + "epoch": 2.5339634954220966, + "grad_norm": 1.6481146908524857, + "learning_rate": 7.171383157567507e-07, + "loss": 0.632, + "step": 64070 + }, + { + "epoch": 2.5343589946409857, + "grad_norm": 1.481039199847829, + "learning_rate": 7.159513945390612e-07, + "loss": 0.6455, + "step": 64080 + }, + { + "epoch": 2.5347544938598747, + "grad_norm": 1.5901662236972298, + "learning_rate": 7.147653806094706e-07, + "loss": 0.6808, + "step": 64090 + }, + { + "epoch": 2.5351499930787638, + "grad_norm": 1.6810272782322087, + "learning_rate": 7.135802742191578e-07, + "loss": 0.6347, + "step": 64100 + }, + { + "epoch": 2.535545492297653, + "grad_norm": 1.685341120738639, + "learning_rate": 7.123960756191056e-07, + "loss": 0.6633, + "step": 64110 + }, + { + "epoch": 2.535940991516542, + "grad_norm": 1.4389745441286776, + "learning_rate": 7.112127850601103e-07, + "loss": 0.617, + "step": 64120 + }, + { + "epoch": 2.536336490735431, + "grad_norm": 1.3193730233786827, + "learning_rate": 7.100304027927696e-07, + "loss": 0.6449, + "step": 64130 + }, + { + "epoch": 2.53673198995432, + "grad_norm": 1.3535632068787948, + "learning_rate": 7.088489290674938e-07, + "loss": 0.6603, + "step": 64140 + }, + { + "epoch": 2.537127489173209, + "grad_norm": 1.5667684132141804, + "learning_rate": 7.076683641344972e-07, + "loss": 0.6243, + "step": 64150 + }, + { + "epoch": 2.537522988392098, + "grad_norm": 1.6672802724444284, + "learning_rate": 7.064887082438049e-07, + "loss": 0.6272, + "step": 64160 + }, + { + "epoch": 2.537918487610987, + "grad_norm": 1.4388337615207, + "learning_rate": 7.053099616452464e-07, + "loss": 0.6629, + "step": 64170 + }, + { + "epoch": 2.538313986829876, + "grad_norm": 1.666663529523057, + "learning_rate": 7.041321245884608e-07, + "loss": 0.6876, + "step": 64180 + }, + { + "epoch": 2.538709486048765, + "grad_norm": 1.378539536989363, + "learning_rate": 7.029551973228943e-07, + "loss": 0.6507, + "step": 64190 + }, + { + "epoch": 2.539104985267654, + "grad_norm": 1.4746413949833421, + "learning_rate": 7.017791800977991e-07, + "loss": 0.6922, + "step": 64200 + }, + { + "epoch": 2.539500484486543, + "grad_norm": 1.2625828816637943, + "learning_rate": 7.006040731622343e-07, + "loss": 0.6717, + "step": 64210 + }, + { + "epoch": 2.5398959837054322, + "grad_norm": 1.1690945069767034, + "learning_rate": 6.994298767650698e-07, + "loss": 0.6588, + "step": 64220 + }, + { + "epoch": 2.5402914829243213, + "grad_norm": 1.4394871701547498, + "learning_rate": 6.982565911549783e-07, + "loss": 0.6692, + "step": 64230 + }, + { + "epoch": 2.5406869821432103, + "grad_norm": 1.7926294439519617, + "learning_rate": 6.970842165804437e-07, + "loss": 0.6453, + "step": 64240 + }, + { + "epoch": 2.5410824813620994, + "grad_norm": 1.4632823611424701, + "learning_rate": 6.959127532897536e-07, + "loss": 0.6645, + "step": 64250 + }, + { + "epoch": 2.5414779805809884, + "grad_norm": 1.3098492414216825, + "learning_rate": 6.947422015310029e-07, + "loss": 0.6261, + "step": 64260 + }, + { + "epoch": 2.5418734797998774, + "grad_norm": 1.6759362917098144, + "learning_rate": 6.935725615520961e-07, + "loss": 0.6553, + "step": 64270 + }, + { + "epoch": 2.5422689790187665, + "grad_norm": 1.4048591046827679, + "learning_rate": 6.924038336007416e-07, + "loss": 0.6114, + "step": 64280 + }, + { + "epoch": 2.5426644782376555, + "grad_norm": 1.5032320314848409, + "learning_rate": 6.912360179244576e-07, + "loss": 0.6302, + "step": 64290 + }, + { + "epoch": 2.5430599774565446, + "grad_norm": 1.978846624091732, + "learning_rate": 6.900691147705652e-07, + "loss": 0.6296, + "step": 64300 + }, + { + "epoch": 2.5434554766754336, + "grad_norm": 1.4151085709598235, + "learning_rate": 6.889031243861982e-07, + "loss": 0.609, + "step": 64310 + }, + { + "epoch": 2.5438509758943226, + "grad_norm": 1.3592749604610284, + "learning_rate": 6.877380470182881e-07, + "loss": 0.6357, + "step": 64320 + }, + { + "epoch": 2.5442464751132117, + "grad_norm": 1.624499301516054, + "learning_rate": 6.865738829135827e-07, + "loss": 0.6172, + "step": 64330 + }, + { + "epoch": 2.5446419743321007, + "grad_norm": 1.3110736274845878, + "learning_rate": 6.854106323186294e-07, + "loss": 0.6445, + "step": 64340 + }, + { + "epoch": 2.5450374735509897, + "grad_norm": 1.3787268488250088, + "learning_rate": 6.842482954797869e-07, + "loss": 0.6364, + "step": 64350 + }, + { + "epoch": 2.545432972769879, + "grad_norm": 1.4867735525275818, + "learning_rate": 6.830868726432161e-07, + "loss": 0.6398, + "step": 64360 + }, + { + "epoch": 2.545828471988768, + "grad_norm": 1.3799749482140977, + "learning_rate": 6.819263640548884e-07, + "loss": 0.6412, + "step": 64370 + }, + { + "epoch": 2.546223971207657, + "grad_norm": 1.464857903363239, + "learning_rate": 6.807667699605775e-07, + "loss": 0.6466, + "step": 64380 + }, + { + "epoch": 2.546619470426546, + "grad_norm": 1.5043346964684243, + "learning_rate": 6.796080906058683e-07, + "loss": 0.6763, + "step": 64390 + }, + { + "epoch": 2.547014969645435, + "grad_norm": 1.5315236711721354, + "learning_rate": 6.784503262361464e-07, + "loss": 0.6106, + "step": 64400 + }, + { + "epoch": 2.547410468864324, + "grad_norm": 1.49336411608949, + "learning_rate": 6.772934770966089e-07, + "loss": 0.6599, + "step": 64410 + }, + { + "epoch": 2.547805968083213, + "grad_norm": 1.617251759033168, + "learning_rate": 6.761375434322559e-07, + "loss": 0.6686, + "step": 64420 + }, + { + "epoch": 2.548201467302102, + "grad_norm": 1.4681605835162652, + "learning_rate": 6.749825254878933e-07, + "loss": 0.6697, + "step": 64430 + }, + { + "epoch": 2.548596966520991, + "grad_norm": 1.5569685125586523, + "learning_rate": 6.738284235081344e-07, + "loss": 0.6736, + "step": 64440 + }, + { + "epoch": 2.54899246573988, + "grad_norm": 1.5922001412111684, + "learning_rate": 6.72675237737399e-07, + "loss": 0.6853, + "step": 64450 + }, + { + "epoch": 2.549387964958769, + "grad_norm": 1.5651530357971513, + "learning_rate": 6.715229684199109e-07, + "loss": 0.6976, + "step": 64460 + }, + { + "epoch": 2.549783464177658, + "grad_norm": 1.483646448211547, + "learning_rate": 6.703716157997031e-07, + "loss": 0.6324, + "step": 64470 + }, + { + "epoch": 2.5501789633965473, + "grad_norm": 1.6781225982285821, + "learning_rate": 6.692211801206111e-07, + "loss": 0.638, + "step": 64480 + }, + { + "epoch": 2.5505744626154363, + "grad_norm": 1.4220896570869705, + "learning_rate": 6.680716616262762e-07, + "loss": 0.6718, + "step": 64490 + }, + { + "epoch": 2.5509699618343253, + "grad_norm": 1.330645928287722, + "learning_rate": 6.66923060560149e-07, + "loss": 0.618, + "step": 64500 + }, + { + "epoch": 2.5513654610532144, + "grad_norm": 1.5587740110969288, + "learning_rate": 6.657753771654812e-07, + "loss": 0.6258, + "step": 64510 + }, + { + "epoch": 2.5517609602721034, + "grad_norm": 1.4081390674616483, + "learning_rate": 6.646286116853351e-07, + "loss": 0.6385, + "step": 64520 + }, + { + "epoch": 2.5521564594909925, + "grad_norm": 1.7677420048528714, + "learning_rate": 6.634827643625735e-07, + "loss": 0.6206, + "step": 64530 + }, + { + "epoch": 2.5525519587098815, + "grad_norm": 1.285515909697965, + "learning_rate": 6.623378354398691e-07, + "loss": 0.6588, + "step": 64540 + }, + { + "epoch": 2.5529474579287705, + "grad_norm": 1.4981960539225139, + "learning_rate": 6.611938251596978e-07, + "loss": 0.6319, + "step": 64550 + }, + { + "epoch": 2.5533429571476596, + "grad_norm": 1.280442623280671, + "learning_rate": 6.600507337643414e-07, + "loss": 0.6068, + "step": 64560 + }, + { + "epoch": 2.5537384563665486, + "grad_norm": 1.3930178025558675, + "learning_rate": 6.589085614958852e-07, + "loss": 0.644, + "step": 64570 + }, + { + "epoch": 2.5541339555854377, + "grad_norm": 1.4492186047061981, + "learning_rate": 6.57767308596225e-07, + "loss": 0.6498, + "step": 64580 + }, + { + "epoch": 2.5545294548043267, + "grad_norm": 1.592177634645699, + "learning_rate": 6.566269753070547e-07, + "loss": 0.6156, + "step": 64590 + }, + { + "epoch": 2.5549249540232157, + "grad_norm": 1.413915533106512, + "learning_rate": 6.554875618698813e-07, + "loss": 0.6319, + "step": 64600 + }, + { + "epoch": 2.5553204532421048, + "grad_norm": 1.691402239662337, + "learning_rate": 6.543490685260107e-07, + "loss": 0.6282, + "step": 64610 + }, + { + "epoch": 2.555715952460994, + "grad_norm": 1.4182407924117755, + "learning_rate": 6.532114955165569e-07, + "loss": 0.6222, + "step": 64620 + }, + { + "epoch": 2.556111451679883, + "grad_norm": 1.4428935534445777, + "learning_rate": 6.520748430824381e-07, + "loss": 0.6563, + "step": 64630 + }, + { + "epoch": 2.556506950898772, + "grad_norm": 1.375222417798048, + "learning_rate": 6.509391114643787e-07, + "loss": 0.6313, + "step": 64640 + }, + { + "epoch": 2.556902450117661, + "grad_norm": 1.7151935067741564, + "learning_rate": 6.49804300902907e-07, + "loss": 0.6186, + "step": 64650 + }, + { + "epoch": 2.55729794933655, + "grad_norm": 1.5579021075814052, + "learning_rate": 6.486704116383547e-07, + "loss": 0.631, + "step": 64660 + }, + { + "epoch": 2.557693448555439, + "grad_norm": 1.305896274034207, + "learning_rate": 6.475374439108623e-07, + "loss": 0.6564, + "step": 64670 + }, + { + "epoch": 2.558088947774328, + "grad_norm": 1.6039936468718605, + "learning_rate": 6.464053979603718e-07, + "loss": 0.6005, + "step": 64680 + }, + { + "epoch": 2.558484446993217, + "grad_norm": 1.5820520144580468, + "learning_rate": 6.45274274026631e-07, + "loss": 0.6261, + "step": 64690 + }, + { + "epoch": 2.558879946212106, + "grad_norm": 1.7683693120383075, + "learning_rate": 6.441440723491938e-07, + "loss": 0.6305, + "step": 64700 + }, + { + "epoch": 2.559275445430995, + "grad_norm": 1.259330393854891, + "learning_rate": 6.430147931674163e-07, + "loss": 0.6343, + "step": 64710 + }, + { + "epoch": 2.559670944649884, + "grad_norm": 1.404342326652451, + "learning_rate": 6.418864367204603e-07, + "loss": 0.6514, + "step": 64720 + }, + { + "epoch": 2.5600664438687732, + "grad_norm": 1.3370667182072586, + "learning_rate": 6.407590032472938e-07, + "loss": 0.6623, + "step": 64730 + }, + { + "epoch": 2.5604619430876623, + "grad_norm": 1.3470432245547215, + "learning_rate": 6.396324929866866e-07, + "loss": 0.6543, + "step": 64740 + }, + { + "epoch": 2.5608574423065513, + "grad_norm": 1.6588143553005, + "learning_rate": 6.385069061772154e-07, + "loss": 0.6637, + "step": 64750 + }, + { + "epoch": 2.5612529415254404, + "grad_norm": 1.391687738537518, + "learning_rate": 6.373822430572579e-07, + "loss": 0.626, + "step": 64760 + }, + { + "epoch": 2.5616484407443294, + "grad_norm": 1.4089109900553332, + "learning_rate": 6.362585038650021e-07, + "loss": 0.599, + "step": 64770 + }, + { + "epoch": 2.5620439399632184, + "grad_norm": 1.9344132896465636, + "learning_rate": 6.351356888384331e-07, + "loss": 0.6414, + "step": 64780 + }, + { + "epoch": 2.5624394391821075, + "grad_norm": 1.7518604994578053, + "learning_rate": 6.340137982153465e-07, + "loss": 0.6371, + "step": 64790 + }, + { + "epoch": 2.5628349384009965, + "grad_norm": 1.7207446991893527, + "learning_rate": 6.328928322333394e-07, + "loss": 0.6143, + "step": 64800 + }, + { + "epoch": 2.5632304376198856, + "grad_norm": 1.8064046863523273, + "learning_rate": 6.317727911298116e-07, + "loss": 0.613, + "step": 64810 + }, + { + "epoch": 2.5636259368387746, + "grad_norm": 1.3921089111696843, + "learning_rate": 6.306536751419684e-07, + "loss": 0.6432, + "step": 64820 + }, + { + "epoch": 2.5640214360576636, + "grad_norm": 1.5636391433656414, + "learning_rate": 6.295354845068214e-07, + "loss": 0.6594, + "step": 64830 + }, + { + "epoch": 2.5644169352765527, + "grad_norm": 1.5904359900860687, + "learning_rate": 6.284182194611821e-07, + "loss": 0.6687, + "step": 64840 + }, + { + "epoch": 2.5648124344954417, + "grad_norm": 1.7698590566229946, + "learning_rate": 6.273018802416703e-07, + "loss": 0.6641, + "step": 64850 + }, + { + "epoch": 2.5652079337143308, + "grad_norm": 1.3230267030166378, + "learning_rate": 6.261864670847051e-07, + "loss": 0.6525, + "step": 64860 + }, + { + "epoch": 2.56560343293322, + "grad_norm": 1.256391103949212, + "learning_rate": 6.250719802265142e-07, + "loss": 0.6716, + "step": 64870 + }, + { + "epoch": 2.565998932152109, + "grad_norm": 1.3886864404686912, + "learning_rate": 6.239584199031251e-07, + "loss": 0.6363, + "step": 64880 + }, + { + "epoch": 2.5663944313709983, + "grad_norm": 1.2004886167065867, + "learning_rate": 6.228457863503723e-07, + "loss": 0.6573, + "step": 64890 + }, + { + "epoch": 2.566789930589887, + "grad_norm": 1.8332682103011722, + "learning_rate": 6.217340798038917e-07, + "loss": 0.672, + "step": 64900 + }, + { + "epoch": 2.5671854298087764, + "grad_norm": 1.688646039930057, + "learning_rate": 6.20623300499123e-07, + "loss": 0.6288, + "step": 64910 + }, + { + "epoch": 2.567580929027665, + "grad_norm": 1.4683619682667337, + "learning_rate": 6.195134486713117e-07, + "loss": 0.6515, + "step": 64920 + }, + { + "epoch": 2.5679764282465545, + "grad_norm": 1.6056308905916619, + "learning_rate": 6.184045245555048e-07, + "loss": 0.6177, + "step": 64930 + }, + { + "epoch": 2.568371927465443, + "grad_norm": 1.3332388397797146, + "learning_rate": 6.172965283865528e-07, + "loss": 0.6297, + "step": 64940 + }, + { + "epoch": 2.5687674266843326, + "grad_norm": 1.51702282970225, + "learning_rate": 6.161894603991103e-07, + "loss": 0.6329, + "step": 64950 + }, + { + "epoch": 2.569162925903221, + "grad_norm": 1.3399787244702674, + "learning_rate": 6.150833208276363e-07, + "loss": 0.6369, + "step": 64960 + }, + { + "epoch": 2.5695584251221106, + "grad_norm": 1.3680672543378571, + "learning_rate": 6.139781099063913e-07, + "loss": 0.6587, + "step": 64970 + }, + { + "epoch": 2.5699539243409992, + "grad_norm": 1.4743134536453832, + "learning_rate": 6.128738278694412e-07, + "loss": 0.6071, + "step": 64980 + }, + { + "epoch": 2.5703494235598887, + "grad_norm": 1.3647499895725006, + "learning_rate": 6.11770474950652e-07, + "loss": 0.6278, + "step": 64990 + }, + { + "epoch": 2.5707449227787773, + "grad_norm": 1.7172077060608413, + "learning_rate": 6.106680513836976e-07, + "loss": 0.6347, + "step": 65000 + }, + { + "epoch": 2.571140421997667, + "grad_norm": 1.2993390458797738, + "learning_rate": 6.095665574020504e-07, + "loss": 0.6646, + "step": 65010 + }, + { + "epoch": 2.5715359212165554, + "grad_norm": 1.4848489083274365, + "learning_rate": 6.084659932389891e-07, + "loss": 0.6637, + "step": 65020 + }, + { + "epoch": 2.571931420435445, + "grad_norm": 1.5877402863902395, + "learning_rate": 6.07366359127593e-07, + "loss": 0.6503, + "step": 65030 + }, + { + "epoch": 2.5723269196543335, + "grad_norm": 1.5517251825422966, + "learning_rate": 6.062676553007485e-07, + "loss": 0.6612, + "step": 65040 + }, + { + "epoch": 2.572722418873223, + "grad_norm": 1.7784695102816384, + "learning_rate": 6.051698819911384e-07, + "loss": 0.6344, + "step": 65050 + }, + { + "epoch": 2.5731179180921115, + "grad_norm": 1.3031254714632408, + "learning_rate": 6.04073039431255e-07, + "loss": 0.6467, + "step": 65060 + }, + { + "epoch": 2.573513417311001, + "grad_norm": 1.7933600367778726, + "learning_rate": 6.029771278533891e-07, + "loss": 0.6199, + "step": 65070 + }, + { + "epoch": 2.5739089165298896, + "grad_norm": 1.4033180688369145, + "learning_rate": 6.01882147489638e-07, + "loss": 0.6497, + "step": 65080 + }, + { + "epoch": 2.574304415748779, + "grad_norm": 1.2791579512017908, + "learning_rate": 6.007880985718978e-07, + "loss": 0.6744, + "step": 65090 + }, + { + "epoch": 2.5746999149676677, + "grad_norm": 1.5819704182622671, + "learning_rate": 5.996949813318709e-07, + "loss": 0.6535, + "step": 65100 + }, + { + "epoch": 2.575095414186557, + "grad_norm": 1.6215734422411294, + "learning_rate": 5.986027960010593e-07, + "loss": 0.6249, + "step": 65110 + }, + { + "epoch": 2.575490913405446, + "grad_norm": 1.678272007338141, + "learning_rate": 5.975115428107709e-07, + "loss": 0.6557, + "step": 65120 + }, + { + "epoch": 2.5758864126243353, + "grad_norm": 1.2732338189743906, + "learning_rate": 5.964212219921134e-07, + "loss": 0.6324, + "step": 65130 + }, + { + "epoch": 2.576281911843224, + "grad_norm": 1.3369278773350985, + "learning_rate": 5.953318337759973e-07, + "loss": 0.661, + "step": 65140 + }, + { + "epoch": 2.5766774110621133, + "grad_norm": 1.4366000393816591, + "learning_rate": 5.942433783931378e-07, + "loss": 0.6404, + "step": 65150 + }, + { + "epoch": 2.577072910281002, + "grad_norm": 1.5049741622756876, + "learning_rate": 5.931558560740502e-07, + "loss": 0.6072, + "step": 65160 + }, + { + "epoch": 2.5774684094998914, + "grad_norm": 1.4558191823923379, + "learning_rate": 5.920692670490535e-07, + "loss": 0.6599, + "step": 65170 + }, + { + "epoch": 2.57786390871878, + "grad_norm": 1.3659451057860597, + "learning_rate": 5.909836115482693e-07, + "loss": 0.6119, + "step": 65180 + }, + { + "epoch": 2.5782594079376695, + "grad_norm": 1.3108490962184507, + "learning_rate": 5.898988898016201e-07, + "loss": 0.6234, + "step": 65190 + }, + { + "epoch": 2.578654907156558, + "grad_norm": 1.395860705573737, + "learning_rate": 5.888151020388299e-07, + "loss": 0.6626, + "step": 65200 + }, + { + "epoch": 2.5790504063754476, + "grad_norm": 1.8333726697416093, + "learning_rate": 5.877322484894288e-07, + "loss": 0.6515, + "step": 65210 + }, + { + "epoch": 2.579445905594336, + "grad_norm": 1.2140871888109008, + "learning_rate": 5.866503293827447e-07, + "loss": 0.6493, + "step": 65220 + }, + { + "epoch": 2.5798414048132257, + "grad_norm": 1.8742947173068751, + "learning_rate": 5.855693449479116e-07, + "loss": 0.6254, + "step": 65230 + }, + { + "epoch": 2.5802369040321147, + "grad_norm": 1.3742629590621933, + "learning_rate": 5.844892954138615e-07, + "loss": 0.648, + "step": 65240 + }, + { + "epoch": 2.5806324032510037, + "grad_norm": 1.5513769655278322, + "learning_rate": 5.834101810093324e-07, + "loss": 0.6327, + "step": 65250 + }, + { + "epoch": 2.5810279024698928, + "grad_norm": 1.6421918722944624, + "learning_rate": 5.823320019628598e-07, + "loss": 0.6564, + "step": 65260 + }, + { + "epoch": 2.581423401688782, + "grad_norm": 1.1995188456299204, + "learning_rate": 5.812547585027861e-07, + "loss": 0.6314, + "step": 65270 + }, + { + "epoch": 2.581818900907671, + "grad_norm": 1.7341415510130007, + "learning_rate": 5.801784508572505e-07, + "loss": 0.6268, + "step": 65280 + }, + { + "epoch": 2.58221440012656, + "grad_norm": 1.4444947634172702, + "learning_rate": 5.791030792541996e-07, + "loss": 0.6632, + "step": 65290 + }, + { + "epoch": 2.582609899345449, + "grad_norm": 1.4454786684461065, + "learning_rate": 5.780286439213756e-07, + "loss": 0.6268, + "step": 65300 + }, + { + "epoch": 2.583005398564338, + "grad_norm": 1.4044651480280266, + "learning_rate": 5.76955145086327e-07, + "loss": 0.6675, + "step": 65310 + }, + { + "epoch": 2.583400897783227, + "grad_norm": 1.4568383295623744, + "learning_rate": 5.758825829764014e-07, + "loss": 0.6679, + "step": 65320 + }, + { + "epoch": 2.583796397002116, + "grad_norm": 1.4287001495107357, + "learning_rate": 5.748109578187511e-07, + "loss": 0.671, + "step": 65330 + }, + { + "epoch": 2.584191896221005, + "grad_norm": 1.7176298639320215, + "learning_rate": 5.73740269840326e-07, + "loss": 0.6463, + "step": 65340 + }, + { + "epoch": 2.584587395439894, + "grad_norm": 1.44455304180914, + "learning_rate": 5.726705192678811e-07, + "loss": 0.6132, + "step": 65350 + }, + { + "epoch": 2.584982894658783, + "grad_norm": 1.328066761917533, + "learning_rate": 5.7160170632797e-07, + "loss": 0.6655, + "step": 65360 + }, + { + "epoch": 2.585378393877672, + "grad_norm": 1.5843005670391128, + "learning_rate": 5.705338312469483e-07, + "loss": 0.6335, + "step": 65370 + }, + { + "epoch": 2.5857738930965612, + "grad_norm": 1.6978839145878624, + "learning_rate": 5.694668942509763e-07, + "loss": 0.6692, + "step": 65380 + }, + { + "epoch": 2.5861693923154503, + "grad_norm": 1.872285829206385, + "learning_rate": 5.684008955660098e-07, + "loss": 0.6408, + "step": 65390 + }, + { + "epoch": 2.5865648915343393, + "grad_norm": 1.8226915418218141, + "learning_rate": 5.673358354178122e-07, + "loss": 0.607, + "step": 65400 + }, + { + "epoch": 2.5869603907532284, + "grad_norm": 1.2491721761377383, + "learning_rate": 5.66271714031943e-07, + "loss": 0.6553, + "step": 65410 + }, + { + "epoch": 2.5873558899721174, + "grad_norm": 1.381641801471228, + "learning_rate": 5.652085316337652e-07, + "loss": 0.6368, + "step": 65420 + }, + { + "epoch": 2.5877513891910064, + "grad_norm": 1.6237051709871286, + "learning_rate": 5.641462884484417e-07, + "loss": 0.676, + "step": 65430 + }, + { + "epoch": 2.5881468884098955, + "grad_norm": 1.3530778706896025, + "learning_rate": 5.630849847009396e-07, + "loss": 0.6748, + "step": 65440 + }, + { + "epoch": 2.5885423876287845, + "grad_norm": 1.82032126633611, + "learning_rate": 5.62024620616023e-07, + "loss": 0.6462, + "step": 65450 + }, + { + "epoch": 2.5889378868476736, + "grad_norm": 1.4043252529225034, + "learning_rate": 5.6096519641826e-07, + "loss": 0.6253, + "step": 65460 + }, + { + "epoch": 2.5893333860665626, + "grad_norm": 1.2400145032499792, + "learning_rate": 5.599067123320174e-07, + "loss": 0.6624, + "step": 65470 + }, + { + "epoch": 2.5897288852854516, + "grad_norm": 1.864662516035955, + "learning_rate": 5.588491685814651e-07, + "loss": 0.5777, + "step": 65480 + }, + { + "epoch": 2.5901243845043407, + "grad_norm": 1.3970398659563015, + "learning_rate": 5.577925653905713e-07, + "loss": 0.6373, + "step": 65490 + }, + { + "epoch": 2.5905198837232297, + "grad_norm": 1.4821470775298895, + "learning_rate": 5.567369029831088e-07, + "loss": 0.6529, + "step": 65500 + }, + { + "epoch": 2.5909153829421188, + "grad_norm": 1.579923038008987, + "learning_rate": 5.556821815826463e-07, + "loss": 0.6489, + "step": 65510 + }, + { + "epoch": 2.591310882161008, + "grad_norm": 1.7764681387432162, + "learning_rate": 5.54628401412558e-07, + "loss": 0.635, + "step": 65520 + }, + { + "epoch": 2.591706381379897, + "grad_norm": 1.480321787976223, + "learning_rate": 5.535755626960154e-07, + "loss": 0.6838, + "step": 65530 + }, + { + "epoch": 2.592101880598786, + "grad_norm": 1.478309609680217, + "learning_rate": 5.525236656559912e-07, + "loss": 0.6412, + "step": 65540 + }, + { + "epoch": 2.592497379817675, + "grad_norm": 1.5376367312659802, + "learning_rate": 5.51472710515259e-07, + "loss": 0.6475, + "step": 65550 + }, + { + "epoch": 2.592892879036564, + "grad_norm": 1.5763290316113068, + "learning_rate": 5.504226974963945e-07, + "loss": 0.6109, + "step": 65560 + }, + { + "epoch": 2.593288378255453, + "grad_norm": 1.746398331000172, + "learning_rate": 5.493736268217703e-07, + "loss": 0.6444, + "step": 65570 + }, + { + "epoch": 2.593683877474342, + "grad_norm": 1.341431239770582, + "learning_rate": 5.483254987135644e-07, + "loss": 0.6368, + "step": 65580 + }, + { + "epoch": 2.594079376693231, + "grad_norm": 1.403624033436707, + "learning_rate": 5.472783133937509e-07, + "loss": 0.6444, + "step": 65590 + }, + { + "epoch": 2.59447487591212, + "grad_norm": 1.1751479390279747, + "learning_rate": 5.462320710841046e-07, + "loss": 0.6682, + "step": 65600 + }, + { + "epoch": 2.594870375131009, + "grad_norm": 1.4945311682834728, + "learning_rate": 5.451867720062043e-07, + "loss": 0.6538, + "step": 65610 + }, + { + "epoch": 2.595265874349898, + "grad_norm": 1.6062316057132215, + "learning_rate": 5.441424163814235e-07, + "loss": 0.6459, + "step": 65620 + }, + { + "epoch": 2.5956613735687872, + "grad_norm": 1.4393111753530963, + "learning_rate": 5.430990044309415e-07, + "loss": 0.6503, + "step": 65630 + }, + { + "epoch": 2.5960568727876763, + "grad_norm": 1.8482574754564973, + "learning_rate": 5.42056536375733e-07, + "loss": 0.6418, + "step": 65640 + }, + { + "epoch": 2.5964523720065653, + "grad_norm": 1.2775102832349554, + "learning_rate": 5.410150124365765e-07, + "loss": 0.6368, + "step": 65650 + }, + { + "epoch": 2.5968478712254544, + "grad_norm": 1.354447918318132, + "learning_rate": 5.399744328340484e-07, + "loss": 0.6397, + "step": 65660 + }, + { + "epoch": 2.5972433704443434, + "grad_norm": 1.7722332729393773, + "learning_rate": 5.389347977885257e-07, + "loss": 0.6413, + "step": 65670 + }, + { + "epoch": 2.5976388696632324, + "grad_norm": 1.6760909974915852, + "learning_rate": 5.378961075201833e-07, + "loss": 0.6503, + "step": 65680 + }, + { + "epoch": 2.5980343688821215, + "grad_norm": 1.5416730261143272, + "learning_rate": 5.368583622490009e-07, + "loss": 0.6261, + "step": 65690 + }, + { + "epoch": 2.5984298681010105, + "grad_norm": 1.4664663984519084, + "learning_rate": 5.358215621947532e-07, + "loss": 0.6777, + "step": 65700 + }, + { + "epoch": 2.5988253673198995, + "grad_norm": 1.8338741320098175, + "learning_rate": 5.347857075770185e-07, + "loss": 0.6085, + "step": 65710 + }, + { + "epoch": 2.5992208665387886, + "grad_norm": 1.2743189577164808, + "learning_rate": 5.337507986151713e-07, + "loss": 0.6309, + "step": 65720 + }, + { + "epoch": 2.5996163657576776, + "grad_norm": 1.4289964181188688, + "learning_rate": 5.327168355283891e-07, + "loss": 0.6552, + "step": 65730 + }, + { + "epoch": 2.6000118649765667, + "grad_norm": 1.2660059470743217, + "learning_rate": 5.316838185356454e-07, + "loss": 0.6132, + "step": 65740 + }, + { + "epoch": 2.6004073641954557, + "grad_norm": 1.3859608823350247, + "learning_rate": 5.306517478557183e-07, + "loss": 0.6443, + "step": 65750 + }, + { + "epoch": 2.6008028634143447, + "grad_norm": 1.4016103657751104, + "learning_rate": 5.296206237071799e-07, + "loss": 0.6381, + "step": 65760 + }, + { + "epoch": 2.601198362633234, + "grad_norm": 1.482216362906679, + "learning_rate": 5.285904463084074e-07, + "loss": 0.6265, + "step": 65770 + }, + { + "epoch": 2.601593861852123, + "grad_norm": 1.400283819206724, + "learning_rate": 5.275612158775734e-07, + "loss": 0.6555, + "step": 65780 + }, + { + "epoch": 2.601989361071012, + "grad_norm": 1.5144468067868355, + "learning_rate": 5.265329326326512e-07, + "loss": 0.6247, + "step": 65790 + }, + { + "epoch": 2.602384860289901, + "grad_norm": 1.6692677941281382, + "learning_rate": 5.255055967914125e-07, + "loss": 0.6729, + "step": 65800 + }, + { + "epoch": 2.60278035950879, + "grad_norm": 1.5714983036438803, + "learning_rate": 5.244792085714312e-07, + "loss": 0.6247, + "step": 65810 + }, + { + "epoch": 2.603175858727679, + "grad_norm": 1.4483053168443896, + "learning_rate": 5.23453768190077e-07, + "loss": 0.6426, + "step": 65820 + }, + { + "epoch": 2.603571357946568, + "grad_norm": 1.398570202727538, + "learning_rate": 5.224292758645233e-07, + "loss": 0.6789, + "step": 65830 + }, + { + "epoch": 2.603966857165457, + "grad_norm": 1.3141073894710729, + "learning_rate": 5.214057318117378e-07, + "loss": 0.6701, + "step": 65840 + }, + { + "epoch": 2.604362356384346, + "grad_norm": 1.2438240004294592, + "learning_rate": 5.203831362484896e-07, + "loss": 0.6722, + "step": 65850 + }, + { + "epoch": 2.604757855603235, + "grad_norm": 1.4024661360095372, + "learning_rate": 5.193614893913484e-07, + "loss": 0.6816, + "step": 65860 + }, + { + "epoch": 2.605153354822124, + "grad_norm": 1.3013663084535005, + "learning_rate": 5.183407914566796e-07, + "loss": 0.6537, + "step": 65870 + }, + { + "epoch": 2.605548854041013, + "grad_norm": 1.461307100114189, + "learning_rate": 5.173210426606512e-07, + "loss": 0.677, + "step": 65880 + }, + { + "epoch": 2.6059443532599023, + "grad_norm": 1.5037038967480247, + "learning_rate": 5.163022432192272e-07, + "loss": 0.6461, + "step": 65890 + }, + { + "epoch": 2.6063398524787913, + "grad_norm": 1.6788525185134127, + "learning_rate": 5.152843933481744e-07, + "loss": 0.657, + "step": 65900 + }, + { + "epoch": 2.6067353516976803, + "grad_norm": 1.531500435506248, + "learning_rate": 5.14267493263052e-07, + "loss": 0.6538, + "step": 65910 + }, + { + "epoch": 2.6071308509165694, + "grad_norm": 1.4066417904709896, + "learning_rate": 5.132515431792251e-07, + "loss": 0.613, + "step": 65920 + }, + { + "epoch": 2.6075263501354584, + "grad_norm": 1.2227901604776632, + "learning_rate": 5.122365433118526e-07, + "loss": 0.6637, + "step": 65930 + }, + { + "epoch": 2.6079218493543475, + "grad_norm": 1.4510534327965838, + "learning_rate": 5.112224938758964e-07, + "loss": 0.6328, + "step": 65940 + }, + { + "epoch": 2.6083173485732365, + "grad_norm": 1.4457845330829098, + "learning_rate": 5.10209395086112e-07, + "loss": 0.6725, + "step": 65950 + }, + { + "epoch": 2.6087128477921255, + "grad_norm": 1.69543710852855, + "learning_rate": 5.091972471570589e-07, + "loss": 0.6189, + "step": 65960 + }, + { + "epoch": 2.6091083470110146, + "grad_norm": 1.3225714447403205, + "learning_rate": 5.081860503030906e-07, + "loss": 0.6507, + "step": 65970 + }, + { + "epoch": 2.6095038462299036, + "grad_norm": 1.5105313988423044, + "learning_rate": 5.07175804738363e-07, + "loss": 0.6666, + "step": 65980 + }, + { + "epoch": 2.6098993454487927, + "grad_norm": 1.62226883544106, + "learning_rate": 5.061665106768271e-07, + "loss": 0.6518, + "step": 65990 + }, + { + "epoch": 2.6102948446676817, + "grad_norm": 1.9588686832610245, + "learning_rate": 5.051581683322365e-07, + "loss": 0.6314, + "step": 66000 + }, + { + "epoch": 2.6106903438865707, + "grad_norm": 1.7156386895046971, + "learning_rate": 5.041507779181388e-07, + "loss": 0.6274, + "step": 66010 + }, + { + "epoch": 2.6110858431054598, + "grad_norm": 1.6515180010712787, + "learning_rate": 5.03144339647883e-07, + "loss": 0.6576, + "step": 66020 + }, + { + "epoch": 2.611481342324349, + "grad_norm": 1.1889228329078885, + "learning_rate": 5.021388537346139e-07, + "loss": 0.6502, + "step": 66030 + }, + { + "epoch": 2.611876841543238, + "grad_norm": 1.4509807141328215, + "learning_rate": 5.011343203912783e-07, + "loss": 0.63, + "step": 66040 + }, + { + "epoch": 2.612272340762127, + "grad_norm": 1.2892090941749295, + "learning_rate": 5.001307398306171e-07, + "loss": 0.6491, + "step": 66050 + }, + { + "epoch": 2.612667839981016, + "grad_norm": 1.592465753293887, + "learning_rate": 4.991281122651736e-07, + "loss": 0.6252, + "step": 66060 + }, + { + "epoch": 2.613063339199905, + "grad_norm": 1.3656516042900058, + "learning_rate": 4.981264379072864e-07, + "loss": 0.6587, + "step": 66070 + }, + { + "epoch": 2.613458838418794, + "grad_norm": 1.6250125310512444, + "learning_rate": 4.971257169690919e-07, + "loss": 0.6481, + "step": 66080 + }, + { + "epoch": 2.613854337637683, + "grad_norm": 1.9221780773238208, + "learning_rate": 4.96125949662527e-07, + "loss": 0.6392, + "step": 66090 + }, + { + "epoch": 2.614249836856572, + "grad_norm": 1.228444389820104, + "learning_rate": 4.951271361993237e-07, + "loss": 0.6456, + "step": 66100 + }, + { + "epoch": 2.614645336075461, + "grad_norm": 1.2933064230881441, + "learning_rate": 4.941292767910161e-07, + "loss": 0.6625, + "step": 66110 + }, + { + "epoch": 2.61504083529435, + "grad_norm": 1.9606588973178931, + "learning_rate": 4.931323716489306e-07, + "loss": 0.6522, + "step": 66120 + }, + { + "epoch": 2.615436334513239, + "grad_norm": 1.586729380817973, + "learning_rate": 4.921364209841978e-07, + "loss": 0.6488, + "step": 66130 + }, + { + "epoch": 2.6158318337321282, + "grad_norm": 1.4016925681263692, + "learning_rate": 4.911414250077412e-07, + "loss": 0.6429, + "step": 66140 + }, + { + "epoch": 2.6162273329510173, + "grad_norm": 1.448621941728291, + "learning_rate": 4.901473839302839e-07, + "loss": 0.6207, + "step": 66150 + }, + { + "epoch": 2.6166228321699063, + "grad_norm": 1.495869245144115, + "learning_rate": 4.891542979623465e-07, + "loss": 0.617, + "step": 66160 + }, + { + "epoch": 2.6170183313887954, + "grad_norm": 1.186091347328476, + "learning_rate": 4.881621673142489e-07, + "loss": 0.6554, + "step": 66170 + }, + { + "epoch": 2.6174138306076844, + "grad_norm": 1.4066447255639665, + "learning_rate": 4.87170992196106e-07, + "loss": 0.654, + "step": 66180 + }, + { + "epoch": 2.6178093298265734, + "grad_norm": 1.3191814178400343, + "learning_rate": 4.861807728178331e-07, + "loss": 0.6691, + "step": 66190 + }, + { + "epoch": 2.6182048290454625, + "grad_norm": 1.4579616457467213, + "learning_rate": 4.851915093891396e-07, + "loss": 0.6561, + "step": 66200 + }, + { + "epoch": 2.6186003282643515, + "grad_norm": 1.6474097434634207, + "learning_rate": 4.842032021195376e-07, + "loss": 0.6462, + "step": 66210 + }, + { + "epoch": 2.6189958274832406, + "grad_norm": 1.4782575772413122, + "learning_rate": 4.832158512183299e-07, + "loss": 0.6019, + "step": 66220 + }, + { + "epoch": 2.6193913267021296, + "grad_norm": 1.7916294109319313, + "learning_rate": 4.82229456894624e-07, + "loss": 0.6443, + "step": 66230 + }, + { + "epoch": 2.619786825921019, + "grad_norm": 2.0669091433373183, + "learning_rate": 4.812440193573197e-07, + "loss": 0.65, + "step": 66240 + }, + { + "epoch": 2.6201823251399077, + "grad_norm": 1.7944506849343103, + "learning_rate": 4.802595388151154e-07, + "loss": 0.6307, + "step": 66250 + }, + { + "epoch": 2.620577824358797, + "grad_norm": 1.521611640318423, + "learning_rate": 4.792760154765086e-07, + "loss": 0.6724, + "step": 66260 + }, + { + "epoch": 2.6209733235776858, + "grad_norm": 1.3096520307171977, + "learning_rate": 4.782934495497915e-07, + "loss": 0.6257, + "step": 66270 + }, + { + "epoch": 2.6213688227965752, + "grad_norm": 1.5461989241147014, + "learning_rate": 4.773118412430538e-07, + "loss": 0.6651, + "step": 66280 + }, + { + "epoch": 2.621764322015464, + "grad_norm": 1.3400524696883678, + "learning_rate": 4.76331190764186e-07, + "loss": 0.6291, + "step": 66290 + }, + { + "epoch": 2.6221598212343533, + "grad_norm": 1.4785937689506297, + "learning_rate": 4.753514983208718e-07, + "loss": 0.6352, + "step": 66300 + }, + { + "epoch": 2.622555320453242, + "grad_norm": 1.180513996359601, + "learning_rate": 4.74372764120592e-07, + "loss": 0.6276, + "step": 66310 + }, + { + "epoch": 2.6229508196721314, + "grad_norm": 1.3848742090224064, + "learning_rate": 4.733949883706274e-07, + "loss": 0.6542, + "step": 66320 + }, + { + "epoch": 2.62334631889102, + "grad_norm": 1.2880722612427167, + "learning_rate": 4.724181712780529e-07, + "loss": 0.6459, + "step": 66330 + }, + { + "epoch": 2.6237418181099095, + "grad_norm": 1.6418835102149247, + "learning_rate": 4.714423130497436e-07, + "loss": 0.6335, + "step": 66340 + }, + { + "epoch": 2.624137317328798, + "grad_norm": 1.1854161039246214, + "learning_rate": 4.7046741389236704e-07, + "loss": 0.6794, + "step": 66350 + }, + { + "epoch": 2.6245328165476876, + "grad_norm": 1.4350707385400354, + "learning_rate": 4.694934740123924e-07, + "loss": 0.6673, + "step": 66360 + }, + { + "epoch": 2.624928315766576, + "grad_norm": 1.2049319144411197, + "learning_rate": 4.685204936160814e-07, + "loss": 0.6581, + "step": 66370 + }, + { + "epoch": 2.6253238149854656, + "grad_norm": 1.4943565124130345, + "learning_rate": 4.67548472909497e-07, + "loss": 0.6418, + "step": 66380 + }, + { + "epoch": 2.6257193142043542, + "grad_norm": 1.6376707669612973, + "learning_rate": 4.665774120984951e-07, + "loss": 0.6431, + "step": 66390 + }, + { + "epoch": 2.6261148134232437, + "grad_norm": 1.804583110026279, + "learning_rate": 4.6560731138872993e-07, + "loss": 0.6203, + "step": 66400 + }, + { + "epoch": 2.6265103126421323, + "grad_norm": 1.6513561468535503, + "learning_rate": 4.6463817098565067e-07, + "loss": 0.6537, + "step": 66410 + }, + { + "epoch": 2.626905811861022, + "grad_norm": 1.288928900264162, + "learning_rate": 4.636699910945075e-07, + "loss": 0.6435, + "step": 66420 + }, + { + "epoch": 2.6273013110799104, + "grad_norm": 1.7691904867964667, + "learning_rate": 4.62702771920342e-07, + "loss": 0.6214, + "step": 66430 + }, + { + "epoch": 2.6276968102988, + "grad_norm": 1.487699400478626, + "learning_rate": 4.6173651366799634e-07, + "loss": 0.6408, + "step": 66440 + }, + { + "epoch": 2.6280923095176885, + "grad_norm": 1.3436567914096835, + "learning_rate": 4.607712165421058e-07, + "loss": 0.6614, + "step": 66450 + }, + { + "epoch": 2.628487808736578, + "grad_norm": 1.3544132189882172, + "learning_rate": 4.5980688074710566e-07, + "loss": 0.6343, + "step": 66460 + }, + { + "epoch": 2.6288833079554665, + "grad_norm": 1.5119038218419327, + "learning_rate": 4.5884350648722374e-07, + "loss": 0.6615, + "step": 66470 + }, + { + "epoch": 2.629278807174356, + "grad_norm": 1.3001043451843204, + "learning_rate": 4.578810939664885e-07, + "loss": 0.687, + "step": 66480 + }, + { + "epoch": 2.6296743063932446, + "grad_norm": 1.6595015704129095, + "learning_rate": 4.5691964338872075e-07, + "loss": 0.6675, + "step": 66490 + }, + { + "epoch": 2.630069805612134, + "grad_norm": 1.5078063069290257, + "learning_rate": 4.5595915495753873e-07, + "loss": 0.6433, + "step": 66500 + }, + { + "epoch": 2.6304653048310227, + "grad_norm": 1.435052566826052, + "learning_rate": 4.549996288763592e-07, + "loss": 0.6355, + "step": 66510 + }, + { + "epoch": 2.630860804049912, + "grad_norm": 1.3746795233069786, + "learning_rate": 4.5404106534839233e-07, + "loss": 0.6673, + "step": 66520 + }, + { + "epoch": 2.631256303268801, + "grad_norm": 1.2458315954306327, + "learning_rate": 4.5308346457664573e-07, + "loss": 0.6544, + "step": 66530 + }, + { + "epoch": 2.6316518024876903, + "grad_norm": 1.2212836465971777, + "learning_rate": 4.5212682676392174e-07, + "loss": 0.6423, + "step": 66540 + }, + { + "epoch": 2.632047301706579, + "grad_norm": 1.5625978161652871, + "learning_rate": 4.51171152112822e-07, + "loss": 0.627, + "step": 66550 + }, + { + "epoch": 2.6324428009254683, + "grad_norm": 1.6740385092598458, + "learning_rate": 4.502164408257398e-07, + "loss": 0.6406, + "step": 66560 + }, + { + "epoch": 2.6328383001443574, + "grad_norm": 1.390000813214016, + "learning_rate": 4.492626931048688e-07, + "loss": 0.6463, + "step": 66570 + }, + { + "epoch": 2.6332337993632464, + "grad_norm": 1.2996125579848212, + "learning_rate": 4.483099091521936e-07, + "loss": 0.6256, + "step": 66580 + }, + { + "epoch": 2.6336292985821355, + "grad_norm": 1.4238411976058616, + "learning_rate": 4.473580891695012e-07, + "loss": 0.6468, + "step": 66590 + }, + { + "epoch": 2.6340247978010245, + "grad_norm": 1.5538501448890076, + "learning_rate": 4.464072333583669e-07, + "loss": 0.6266, + "step": 66600 + }, + { + "epoch": 2.6344202970199135, + "grad_norm": 1.336802486191245, + "learning_rate": 4.454573419201691e-07, + "loss": 0.6551, + "step": 66610 + }, + { + "epoch": 2.6348157962388026, + "grad_norm": 1.4376934215654078, + "learning_rate": 4.445084150560758e-07, + "loss": 0.6352, + "step": 66620 + }, + { + "epoch": 2.6352112954576916, + "grad_norm": 1.6694939470526333, + "learning_rate": 4.4356045296705676e-07, + "loss": 0.6499, + "step": 66630 + }, + { + "epoch": 2.6356067946765807, + "grad_norm": 1.7201483962088393, + "learning_rate": 4.426134558538697e-07, + "loss": 0.652, + "step": 66640 + }, + { + "epoch": 2.6360022938954697, + "grad_norm": 1.3728509963743392, + "learning_rate": 4.4166742391707593e-07, + "loss": 0.6375, + "step": 66650 + }, + { + "epoch": 2.6363977931143587, + "grad_norm": 1.472184419224856, + "learning_rate": 4.4072235735702684e-07, + "loss": 0.6308, + "step": 66660 + }, + { + "epoch": 2.6367932923332478, + "grad_norm": 1.6987875377420738, + "learning_rate": 4.3977825637387284e-07, + "loss": 0.6244, + "step": 66670 + }, + { + "epoch": 2.637188791552137, + "grad_norm": 1.7906711340462902, + "learning_rate": 4.388351211675562e-07, + "loss": 0.6316, + "step": 66680 + }, + { + "epoch": 2.637584290771026, + "grad_norm": 1.5727263411124601, + "learning_rate": 4.3789295193781944e-07, + "loss": 0.6458, + "step": 66690 + }, + { + "epoch": 2.637979789989915, + "grad_norm": 1.753860939783427, + "learning_rate": 4.3695174888419566e-07, + "loss": 0.6201, + "step": 66700 + }, + { + "epoch": 2.638375289208804, + "grad_norm": 1.5859584062220233, + "learning_rate": 4.3601151220601713e-07, + "loss": 0.5974, + "step": 66710 + }, + { + "epoch": 2.638770788427693, + "grad_norm": 1.5678573027066647, + "learning_rate": 4.350722421024095e-07, + "loss": 0.6461, + "step": 66720 + }, + { + "epoch": 2.639166287646582, + "grad_norm": 1.4199341610106007, + "learning_rate": 4.341339387722926e-07, + "loss": 0.6716, + "step": 66730 + }, + { + "epoch": 2.639561786865471, + "grad_norm": 1.2020944948214642, + "learning_rate": 4.3319660241438577e-07, + "loss": 0.6666, + "step": 66740 + }, + { + "epoch": 2.63995728608436, + "grad_norm": 1.3431146074968838, + "learning_rate": 4.3226023322719746e-07, + "loss": 0.658, + "step": 66750 + }, + { + "epoch": 2.640352785303249, + "grad_norm": 1.602487495302267, + "learning_rate": 4.313248314090379e-07, + "loss": 0.6427, + "step": 66760 + }, + { + "epoch": 2.640748284522138, + "grad_norm": 1.4788348998753265, + "learning_rate": 4.30390397158007e-07, + "loss": 0.678, + "step": 66770 + }, + { + "epoch": 2.641143783741027, + "grad_norm": 1.6903540426726307, + "learning_rate": 4.2945693067200313e-07, + "loss": 0.6275, + "step": 66780 + }, + { + "epoch": 2.6415392829599162, + "grad_norm": 1.4297571612996394, + "learning_rate": 4.28524432148717e-07, + "loss": 0.6514, + "step": 66790 + }, + { + "epoch": 2.6419347821788053, + "grad_norm": 1.5165658895594936, + "learning_rate": 4.2759290178563727e-07, + "loss": 0.6416, + "step": 66800 + }, + { + "epoch": 2.6423302813976943, + "grad_norm": 1.3195290811336806, + "learning_rate": 4.2666233978004513e-07, + "loss": 0.6493, + "step": 66810 + }, + { + "epoch": 2.6427257806165834, + "grad_norm": 1.348959330811117, + "learning_rate": 4.257327463290184e-07, + "loss": 0.6614, + "step": 66820 + }, + { + "epoch": 2.6431212798354724, + "grad_norm": 1.5874236226395562, + "learning_rate": 4.248041216294285e-07, + "loss": 0.6408, + "step": 66830 + }, + { + "epoch": 2.6435167790543614, + "grad_norm": 1.8629482742181673, + "learning_rate": 4.238764658779432e-07, + "loss": 0.6554, + "step": 66840 + }, + { + "epoch": 2.6439122782732505, + "grad_norm": 1.6249435092101672, + "learning_rate": 4.2294977927102244e-07, + "loss": 0.6373, + "step": 66850 + }, + { + "epoch": 2.6443077774921395, + "grad_norm": 1.4046606342939152, + "learning_rate": 4.220240620049243e-07, + "loss": 0.6299, + "step": 66860 + }, + { + "epoch": 2.6447032767110286, + "grad_norm": 1.5306101148762647, + "learning_rate": 4.210993142756986e-07, + "loss": 0.6246, + "step": 66870 + }, + { + "epoch": 2.6450987759299176, + "grad_norm": 1.529327978440233, + "learning_rate": 4.201755362791932e-07, + "loss": 0.6279, + "step": 66880 + }, + { + "epoch": 2.6454942751488066, + "grad_norm": 1.2823331238658422, + "learning_rate": 4.1925272821104477e-07, + "loss": 0.651, + "step": 66890 + }, + { + "epoch": 2.6458897743676957, + "grad_norm": 1.2822840583923572, + "learning_rate": 4.183308902666916e-07, + "loss": 0.6798, + "step": 66900 + }, + { + "epoch": 2.6462852735865847, + "grad_norm": 1.4604251989445376, + "learning_rate": 4.1741002264136075e-07, + "loss": 0.6264, + "step": 66910 + }, + { + "epoch": 2.6466807728054738, + "grad_norm": 1.495190644041529, + "learning_rate": 4.1649012553007795e-07, + "loss": 0.6066, + "step": 66920 + }, + { + "epoch": 2.647076272024363, + "grad_norm": 1.4840463689796157, + "learning_rate": 4.155711991276601e-07, + "loss": 0.6796, + "step": 66930 + }, + { + "epoch": 2.647471771243252, + "grad_norm": 1.3556142115990775, + "learning_rate": 4.1465324362872205e-07, + "loss": 0.6319, + "step": 66940 + }, + { + "epoch": 2.647867270462141, + "grad_norm": 1.3947919236139419, + "learning_rate": 4.1373625922767005e-07, + "loss": 0.6654, + "step": 66950 + }, + { + "epoch": 2.64826276968103, + "grad_norm": 1.3691337766863305, + "learning_rate": 4.1282024611870474e-07, + "loss": 0.606, + "step": 66960 + }, + { + "epoch": 2.648658268899919, + "grad_norm": 1.7176406567866715, + "learning_rate": 4.119052044958238e-07, + "loss": 0.6339, + "step": 66970 + }, + { + "epoch": 2.649053768118808, + "grad_norm": 1.2847804424439877, + "learning_rate": 4.1099113455281556e-07, + "loss": 0.6513, + "step": 66980 + }, + { + "epoch": 2.649449267337697, + "grad_norm": 1.4261966979975993, + "learning_rate": 4.100780364832657e-07, + "loss": 0.6392, + "step": 66990 + }, + { + "epoch": 2.649844766556586, + "grad_norm": 1.3296865532160875, + "learning_rate": 4.0916591048055286e-07, + "loss": 0.6221, + "step": 67000 + }, + { + "epoch": 2.650240265775475, + "grad_norm": 1.2429985713452463, + "learning_rate": 4.0825475673784867e-07, + "loss": 0.6598, + "step": 67010 + }, + { + "epoch": 2.650635764994364, + "grad_norm": 1.3711051294072798, + "learning_rate": 4.073445754481198e-07, + "loss": 0.6152, + "step": 67020 + }, + { + "epoch": 2.651031264213253, + "grad_norm": 1.3829629827143872, + "learning_rate": 4.064353668041282e-07, + "loss": 0.6329, + "step": 67030 + }, + { + "epoch": 2.6514267634321422, + "grad_norm": 1.425930233077325, + "learning_rate": 4.0552713099842767e-07, + "loss": 0.6248, + "step": 67040 + }, + { + "epoch": 2.6518222626510313, + "grad_norm": 1.603481017678548, + "learning_rate": 4.0461986822336765e-07, + "loss": 0.6601, + "step": 67050 + }, + { + "epoch": 2.6522177618699203, + "grad_norm": 1.4839827953440277, + "learning_rate": 4.0371357867109e-07, + "loss": 0.6424, + "step": 67060 + }, + { + "epoch": 2.6526132610888093, + "grad_norm": 1.7249467590446503, + "learning_rate": 4.0280826253353334e-07, + "loss": 0.6432, + "step": 67070 + }, + { + "epoch": 2.6530087603076984, + "grad_norm": 1.7496658102068572, + "learning_rate": 4.0190392000242495e-07, + "loss": 0.6269, + "step": 67080 + }, + { + "epoch": 2.6534042595265874, + "grad_norm": 1.484891972499491, + "learning_rate": 4.0100055126929215e-07, + "loss": 0.6415, + "step": 67090 + }, + { + "epoch": 2.6537997587454765, + "grad_norm": 1.3361017697498834, + "learning_rate": 4.0009815652545135e-07, + "loss": 0.6332, + "step": 67100 + }, + { + "epoch": 2.6541952579643655, + "grad_norm": 1.5597945169185352, + "learning_rate": 3.991967359620147e-07, + "loss": 0.625, + "step": 67110 + }, + { + "epoch": 2.6545907571832545, + "grad_norm": 1.494366562315302, + "learning_rate": 3.982962897698878e-07, + "loss": 0.6288, + "step": 67120 + }, + { + "epoch": 2.6549862564021436, + "grad_norm": 1.4337863908407422, + "learning_rate": 3.973968181397697e-07, + "loss": 0.6566, + "step": 67130 + }, + { + "epoch": 2.6553817556210326, + "grad_norm": 1.447224989329216, + "learning_rate": 3.964983212621515e-07, + "loss": 0.637, + "step": 67140 + }, + { + "epoch": 2.6557772548399217, + "grad_norm": 1.4654719225633515, + "learning_rate": 3.9560079932732197e-07, + "loss": 0.6435, + "step": 67150 + }, + { + "epoch": 2.6561727540588107, + "grad_norm": 1.253563886149293, + "learning_rate": 3.947042525253586e-07, + "loss": 0.6382, + "step": 67160 + }, + { + "epoch": 2.6565682532776997, + "grad_norm": 1.6720554023752612, + "learning_rate": 3.9380868104613665e-07, + "loss": 0.6539, + "step": 67170 + }, + { + "epoch": 2.656963752496589, + "grad_norm": 1.4577868046866587, + "learning_rate": 3.929140850793223e-07, + "loss": 0.6201, + "step": 67180 + }, + { + "epoch": 2.657359251715478, + "grad_norm": 1.3094226264865712, + "learning_rate": 3.920204648143738e-07, + "loss": 0.6847, + "step": 67190 + }, + { + "epoch": 2.657754750934367, + "grad_norm": 1.4000435018670074, + "learning_rate": 3.911278204405478e-07, + "loss": 0.6414, + "step": 67200 + }, + { + "epoch": 2.658150250153256, + "grad_norm": 1.5063931391024656, + "learning_rate": 3.902361521468878e-07, + "loss": 0.6388, + "step": 67210 + }, + { + "epoch": 2.658545749372145, + "grad_norm": 1.609485005896645, + "learning_rate": 3.893454601222363e-07, + "loss": 0.6314, + "step": 67220 + }, + { + "epoch": 2.658941248591034, + "grad_norm": 1.5424299020077414, + "learning_rate": 3.8845574455522506e-07, + "loss": 0.6425, + "step": 67230 + }, + { + "epoch": 2.659336747809923, + "grad_norm": 1.5112816966688762, + "learning_rate": 3.875670056342823e-07, + "loss": 0.6339, + "step": 67240 + }, + { + "epoch": 2.659732247028812, + "grad_norm": 1.8633998106247787, + "learning_rate": 3.866792435476263e-07, + "loss": 0.657, + "step": 67250 + }, + { + "epoch": 2.660127746247701, + "grad_norm": 1.6998981111492322, + "learning_rate": 3.857924584832706e-07, + "loss": 0.6516, + "step": 67260 + }, + { + "epoch": 2.66052324546659, + "grad_norm": 1.761161973631195, + "learning_rate": 3.849066506290194e-07, + "loss": 0.6435, + "step": 67270 + }, + { + "epoch": 2.660918744685479, + "grad_norm": 1.1564538653904168, + "learning_rate": 3.8402182017247434e-07, + "loss": 0.6386, + "step": 67280 + }, + { + "epoch": 2.661314243904368, + "grad_norm": 1.455135653124351, + "learning_rate": 3.8313796730102493e-07, + "loss": 0.636, + "step": 67290 + }, + { + "epoch": 2.6617097431232573, + "grad_norm": 1.4325183598229272, + "learning_rate": 3.822550922018575e-07, + "loss": 0.6897, + "step": 67300 + }, + { + "epoch": 2.6621052423421463, + "grad_norm": 1.194626336687413, + "learning_rate": 3.8137319506194927e-07, + "loss": 0.6419, + "step": 67310 + }, + { + "epoch": 2.6625007415610353, + "grad_norm": 1.3449387684709344, + "learning_rate": 3.8049227606807126e-07, + "loss": 0.6562, + "step": 67320 + }, + { + "epoch": 2.6628962407799244, + "grad_norm": 1.4410529700490062, + "learning_rate": 3.7961233540678656e-07, + "loss": 0.6348, + "step": 67330 + }, + { + "epoch": 2.6632917399988134, + "grad_norm": 1.4851409735795837, + "learning_rate": 3.787333732644527e-07, + "loss": 0.6228, + "step": 67340 + }, + { + "epoch": 2.6636872392177025, + "grad_norm": 1.2700039102753866, + "learning_rate": 3.778553898272169e-07, + "loss": 0.6139, + "step": 67350 + }, + { + "epoch": 2.6640827384365915, + "grad_norm": 1.7911519665895288, + "learning_rate": 3.7697838528102324e-07, + "loss": 0.6239, + "step": 67360 + }, + { + "epoch": 2.6644782376554805, + "grad_norm": 1.3071701079955973, + "learning_rate": 3.761023598116048e-07, + "loss": 0.615, + "step": 67370 + }, + { + "epoch": 2.6648737368743696, + "grad_norm": 1.5354925568102975, + "learning_rate": 3.752273136044893e-07, + "loss": 0.6623, + "step": 67380 + }, + { + "epoch": 2.6652692360932586, + "grad_norm": 1.5452305780191486, + "learning_rate": 3.743532468449951e-07, + "loss": 0.6218, + "step": 67390 + }, + { + "epoch": 2.6656647353121476, + "grad_norm": 1.2995892283492798, + "learning_rate": 3.734801597182369e-07, + "loss": 0.6602, + "step": 67400 + }, + { + "epoch": 2.6660602345310367, + "grad_norm": 1.2425583351380654, + "learning_rate": 3.7260805240911747e-07, + "loss": 0.6482, + "step": 67410 + }, + { + "epoch": 2.6664557337499257, + "grad_norm": 1.3466121328477731, + "learning_rate": 3.7173692510233617e-07, + "loss": 0.6565, + "step": 67420 + }, + { + "epoch": 2.6668512329688148, + "grad_norm": 1.5716410366626758, + "learning_rate": 3.7086677798238214e-07, + "loss": 0.6406, + "step": 67430 + }, + { + "epoch": 2.667246732187704, + "grad_norm": 1.295476496645719, + "learning_rate": 3.6999761123353574e-07, + "loss": 0.6533, + "step": 67440 + }, + { + "epoch": 2.667642231406593, + "grad_norm": 1.4453105704642009, + "learning_rate": 3.691294250398747e-07, + "loss": 0.6463, + "step": 67450 + }, + { + "epoch": 2.668037730625482, + "grad_norm": 1.4420361191847626, + "learning_rate": 3.6826221958526307e-07, + "loss": 0.6328, + "step": 67460 + }, + { + "epoch": 2.668433229844371, + "grad_norm": 1.4687586905766155, + "learning_rate": 3.6739599505336286e-07, + "loss": 0.662, + "step": 67470 + }, + { + "epoch": 2.66882872906326, + "grad_norm": 1.4689470491007226, + "learning_rate": 3.665307516276234e-07, + "loss": 0.6495, + "step": 67480 + }, + { + "epoch": 2.669224228282149, + "grad_norm": 1.1769819109822068, + "learning_rate": 3.656664894912909e-07, + "loss": 0.6014, + "step": 67490 + }, + { + "epoch": 2.669619727501038, + "grad_norm": 1.2838348423550436, + "learning_rate": 3.6480320882739785e-07, + "loss": 0.6706, + "step": 67500 + }, + { + "epoch": 2.670015226719927, + "grad_norm": 1.4384230088080945, + "learning_rate": 3.6394090981877463e-07, + "loss": 0.6385, + "step": 67510 + }, + { + "epoch": 2.670410725938816, + "grad_norm": 1.408892298790722, + "learning_rate": 3.630795926480407e-07, + "loss": 0.6576, + "step": 67520 + }, + { + "epoch": 2.670806225157705, + "grad_norm": 1.364572772958427, + "learning_rate": 3.6221925749760854e-07, + "loss": 0.6057, + "step": 67530 + }, + { + "epoch": 2.671201724376594, + "grad_norm": 1.3792356111715454, + "learning_rate": 3.6135990454968184e-07, + "loss": 0.6377, + "step": 67540 + }, + { + "epoch": 2.6715972235954832, + "grad_norm": 1.4487893114290935, + "learning_rate": 3.605015339862583e-07, + "loss": 0.6133, + "step": 67550 + }, + { + "epoch": 2.6719927228143723, + "grad_norm": 1.5843185378033344, + "learning_rate": 3.596441459891242e-07, + "loss": 0.6327, + "step": 67560 + }, + { + "epoch": 2.6723882220332613, + "grad_norm": 1.3243027806347243, + "learning_rate": 3.58787740739861e-07, + "loss": 0.6783, + "step": 67570 + }, + { + "epoch": 2.6727837212521504, + "grad_norm": 1.440987779065691, + "learning_rate": 3.579323184198397e-07, + "loss": 0.6576, + "step": 67580 + }, + { + "epoch": 2.67317922047104, + "grad_norm": 1.6064033549193635, + "learning_rate": 3.570778792102253e-07, + "loss": 0.6474, + "step": 67590 + }, + { + "epoch": 2.6735747196899284, + "grad_norm": 1.362771487103353, + "learning_rate": 3.562244232919726e-07, + "loss": 0.6504, + "step": 67600 + }, + { + "epoch": 2.673970218908818, + "grad_norm": 1.6466558859849665, + "learning_rate": 3.553719508458292e-07, + "loss": 0.6744, + "step": 67610 + }, + { + "epoch": 2.6743657181277065, + "grad_norm": 1.692159902937415, + "learning_rate": 3.5452046205233293e-07, + "loss": 0.6317, + "step": 67620 + }, + { + "epoch": 2.674761217346596, + "grad_norm": 1.704803372118754, + "learning_rate": 3.5366995709181675e-07, + "loss": 0.6421, + "step": 67630 + }, + { + "epoch": 2.6751567165654846, + "grad_norm": 1.5623441738852302, + "learning_rate": 3.5282043614440043e-07, + "loss": 0.6059, + "step": 67640 + }, + { + "epoch": 2.675552215784374, + "grad_norm": 1.5529221015322243, + "learning_rate": 3.5197189939000065e-07, + "loss": 0.662, + "step": 67650 + }, + { + "epoch": 2.6759477150032627, + "grad_norm": 1.499488156716238, + "learning_rate": 3.5112434700832144e-07, + "loss": 0.6466, + "step": 67660 + }, + { + "epoch": 2.676343214222152, + "grad_norm": 1.3826978451876017, + "learning_rate": 3.5027777917885977e-07, + "loss": 0.6642, + "step": 67670 + }, + { + "epoch": 2.6767387134410408, + "grad_norm": 1.5903123990018386, + "learning_rate": 3.4943219608090493e-07, + "loss": 0.6398, + "step": 67680 + }, + { + "epoch": 2.6771342126599302, + "grad_norm": 1.5798941281349288, + "learning_rate": 3.48587597893536e-07, + "loss": 0.6516, + "step": 67690 + }, + { + "epoch": 2.677529711878819, + "grad_norm": 1.3080170689834487, + "learning_rate": 3.477439847956254e-07, + "loss": 0.6427, + "step": 67700 + }, + { + "epoch": 2.6779252110977083, + "grad_norm": 1.3071788865346678, + "learning_rate": 3.4690135696583473e-07, + "loss": 0.635, + "step": 67710 + }, + { + "epoch": 2.678320710316597, + "grad_norm": 1.5345252688578457, + "learning_rate": 3.4605971458262e-07, + "loss": 0.5767, + "step": 67720 + }, + { + "epoch": 2.6787162095354864, + "grad_norm": 1.6636315024068176, + "learning_rate": 3.452190578242243e-07, + "loss": 0.651, + "step": 67730 + }, + { + "epoch": 2.679111708754375, + "grad_norm": 1.6950699020623816, + "learning_rate": 3.443793868686879e-07, + "loss": 0.643, + "step": 67740 + }, + { + "epoch": 2.6795072079732645, + "grad_norm": 1.4733830197549675, + "learning_rate": 3.4354070189383413e-07, + "loss": 0.643, + "step": 67750 + }, + { + "epoch": 2.679902707192153, + "grad_norm": 1.2748151594976393, + "learning_rate": 3.427030030772854e-07, + "loss": 0.6711, + "step": 67760 + }, + { + "epoch": 2.6802982064110425, + "grad_norm": 1.3254388623939386, + "learning_rate": 3.418662905964498e-07, + "loss": 0.6648, + "step": 67770 + }, + { + "epoch": 2.680693705629931, + "grad_norm": 1.4290339665773355, + "learning_rate": 3.410305646285311e-07, + "loss": 0.6499, + "step": 67780 + }, + { + "epoch": 2.6810892048488206, + "grad_norm": 1.4335538686772098, + "learning_rate": 3.401958253505194e-07, + "loss": 0.635, + "step": 67790 + }, + { + "epoch": 2.6814847040677092, + "grad_norm": 1.5844709371594365, + "learning_rate": 3.3936207293919997e-07, + "loss": 0.6507, + "step": 67800 + }, + { + "epoch": 2.6818802032865987, + "grad_norm": 1.350994567483789, + "learning_rate": 3.38529307571146e-07, + "loss": 0.6449, + "step": 67810 + }, + { + "epoch": 2.6822757025054873, + "grad_norm": 1.6806705175000394, + "learning_rate": 3.376975294227242e-07, + "loss": 0.6298, + "step": 67820 + }, + { + "epoch": 2.682671201724377, + "grad_norm": 1.6975300418314174, + "learning_rate": 3.3686673867009025e-07, + "loss": 0.6431, + "step": 67830 + }, + { + "epoch": 2.6830667009432654, + "grad_norm": 1.2908359146813606, + "learning_rate": 3.360369354891907e-07, + "loss": 0.6517, + "step": 67840 + }, + { + "epoch": 2.683462200162155, + "grad_norm": 1.4959510332695647, + "learning_rate": 3.352081200557644e-07, + "loss": 0.6325, + "step": 67850 + }, + { + "epoch": 2.6838576993810435, + "grad_norm": 1.5398910327911224, + "learning_rate": 3.34380292545341e-07, + "loss": 0.6073, + "step": 67860 + }, + { + "epoch": 2.684253198599933, + "grad_norm": 1.434697804562703, + "learning_rate": 3.3355345313323796e-07, + "loss": 0.6551, + "step": 67870 + }, + { + "epoch": 2.6846486978188215, + "grad_norm": 1.6316207395651667, + "learning_rate": 3.3272760199456853e-07, + "loss": 0.6412, + "step": 67880 + }, + { + "epoch": 2.685044197037711, + "grad_norm": 1.6515451705670228, + "learning_rate": 3.3190273930423177e-07, + "loss": 0.6689, + "step": 67890 + }, + { + "epoch": 2.6854396962565996, + "grad_norm": 1.7636864563050267, + "learning_rate": 3.3107886523691955e-07, + "loss": 0.6487, + "step": 67900 + }, + { + "epoch": 2.685835195475489, + "grad_norm": 1.2407556192004556, + "learning_rate": 3.3025597996711566e-07, + "loss": 0.6645, + "step": 67910 + }, + { + "epoch": 2.686230694694378, + "grad_norm": 1.799509130953986, + "learning_rate": 3.294340836690918e-07, + "loss": 0.5929, + "step": 67920 + }, + { + "epoch": 2.686626193913267, + "grad_norm": 1.4364499340508392, + "learning_rate": 3.286131765169126e-07, + "loss": 0.6433, + "step": 67930 + }, + { + "epoch": 2.687021693132156, + "grad_norm": 1.733801769054392, + "learning_rate": 3.277932586844307e-07, + "loss": 0.6469, + "step": 67940 + }, + { + "epoch": 2.6874171923510453, + "grad_norm": 1.8505853992871186, + "learning_rate": 3.2697433034529214e-07, + "loss": 0.6078, + "step": 67950 + }, + { + "epoch": 2.6878126915699343, + "grad_norm": 1.3680040478374165, + "learning_rate": 3.2615639167293045e-07, + "loss": 0.6361, + "step": 67960 + }, + { + "epoch": 2.6882081907888233, + "grad_norm": 1.5000926896746636, + "learning_rate": 3.2533944284057264e-07, + "loss": 0.6602, + "step": 67970 + }, + { + "epoch": 2.6886036900077124, + "grad_norm": 1.3052371375795877, + "learning_rate": 3.2452348402123356e-07, + "loss": 0.6727, + "step": 67980 + }, + { + "epoch": 2.6889991892266014, + "grad_norm": 1.396241337167498, + "learning_rate": 3.2370851538771953e-07, + "loss": 0.6489, + "step": 67990 + }, + { + "epoch": 2.6893946884454905, + "grad_norm": 1.1532291475843564, + "learning_rate": 3.228945371126263e-07, + "loss": 0.6394, + "step": 68000 + }, + { + "epoch": 2.6897901876643795, + "grad_norm": 1.24235711859018, + "learning_rate": 3.220815493683416e-07, + "loss": 0.6314, + "step": 68010 + }, + { + "epoch": 2.6901856868832685, + "grad_norm": 1.220705732795657, + "learning_rate": 3.2126955232704094e-07, + "loss": 0.6246, + "step": 68020 + }, + { + "epoch": 2.6905811861021576, + "grad_norm": 1.3511688229780527, + "learning_rate": 3.204585461606929e-07, + "loss": 0.6368, + "step": 68030 + }, + { + "epoch": 2.6909766853210466, + "grad_norm": 1.4741450640018745, + "learning_rate": 3.196485310410535e-07, + "loss": 0.6589, + "step": 68040 + }, + { + "epoch": 2.6913721845399357, + "grad_norm": 1.7334416177762504, + "learning_rate": 3.1883950713967105e-07, + "loss": 0.6422, + "step": 68050 + }, + { + "epoch": 2.6917676837588247, + "grad_norm": 1.6395381189868599, + "learning_rate": 3.1803147462788175e-07, + "loss": 0.6258, + "step": 68060 + }, + { + "epoch": 2.6921631829777137, + "grad_norm": 1.639894593249071, + "learning_rate": 3.1722443367681487e-07, + "loss": 0.6135, + "step": 68070 + }, + { + "epoch": 2.6925586821966028, + "grad_norm": 1.2390951299359396, + "learning_rate": 3.164183844573865e-07, + "loss": 0.6693, + "step": 68080 + }, + { + "epoch": 2.692954181415492, + "grad_norm": 1.7592401704859844, + "learning_rate": 3.1561332714030334e-07, + "loss": 0.6232, + "step": 68090 + }, + { + "epoch": 2.693349680634381, + "grad_norm": 1.657449540500462, + "learning_rate": 3.1480926189606463e-07, + "loss": 0.6461, + "step": 68100 + }, + { + "epoch": 2.69374517985327, + "grad_norm": 1.3336205402744792, + "learning_rate": 3.1400618889495636e-07, + "loss": 0.6838, + "step": 68110 + }, + { + "epoch": 2.694140679072159, + "grad_norm": 1.6292808262751366, + "learning_rate": 3.132041083070564e-07, + "loss": 0.6565, + "step": 68120 + }, + { + "epoch": 2.694536178291048, + "grad_norm": 1.5217678688211929, + "learning_rate": 3.124030203022305e-07, + "loss": 0.6353, + "step": 68130 + }, + { + "epoch": 2.694931677509937, + "grad_norm": 1.5030488987102606, + "learning_rate": 3.1160292505013676e-07, + "loss": 0.6764, + "step": 68140 + }, + { + "epoch": 2.695327176728826, + "grad_norm": 1.5561741890697343, + "learning_rate": 3.1080382272021983e-07, + "loss": 0.6535, + "step": 68150 + }, + { + "epoch": 2.695722675947715, + "grad_norm": 1.368334135572594, + "learning_rate": 3.1000571348171803e-07, + "loss": 0.6443, + "step": 68160 + }, + { + "epoch": 2.696118175166604, + "grad_norm": 1.5852085599217816, + "learning_rate": 3.0920859750365574e-07, + "loss": 0.6165, + "step": 68170 + }, + { + "epoch": 2.696513674385493, + "grad_norm": 1.3695727666639599, + "learning_rate": 3.084124749548495e-07, + "loss": 0.6504, + "step": 68180 + }, + { + "epoch": 2.696909173604382, + "grad_norm": 1.5767428456848072, + "learning_rate": 3.0761734600390334e-07, + "loss": 0.6136, + "step": 68190 + }, + { + "epoch": 2.6973046728232712, + "grad_norm": 1.5278696792448265, + "learning_rate": 3.068232108192132e-07, + "loss": 0.6491, + "step": 68200 + }, + { + "epoch": 2.6977001720421603, + "grad_norm": 1.7384324531816469, + "learning_rate": 3.0603006956896165e-07, + "loss": 0.643, + "step": 68210 + }, + { + "epoch": 2.6980956712610493, + "grad_norm": 1.4578527958217349, + "learning_rate": 3.052379224211244e-07, + "loss": 0.6221, + "step": 68220 + }, + { + "epoch": 2.6984911704799384, + "grad_norm": 1.3911086240015524, + "learning_rate": 3.044467695434633e-07, + "loss": 0.6375, + "step": 68230 + }, + { + "epoch": 2.6988866696988274, + "grad_norm": 1.6910829018839328, + "learning_rate": 3.036566111035316e-07, + "loss": 0.6199, + "step": 68240 + }, + { + "epoch": 2.6992821689177164, + "grad_norm": 1.4168179772947889, + "learning_rate": 3.028674472686699e-07, + "loss": 0.6546, + "step": 68250 + }, + { + "epoch": 2.6996776681366055, + "grad_norm": 1.3986303649049687, + "learning_rate": 3.020792782060117e-07, + "loss": 0.6753, + "step": 68260 + }, + { + "epoch": 2.7000731673554945, + "grad_norm": 1.6422893016554367, + "learning_rate": 3.012921040824762e-07, + "loss": 0.659, + "step": 68270 + }, + { + "epoch": 2.7004686665743836, + "grad_norm": 1.5002977705015865, + "learning_rate": 3.005059250647746e-07, + "loss": 0.6491, + "step": 68280 + }, + { + "epoch": 2.7008641657932726, + "grad_norm": 1.240302239507857, + "learning_rate": 2.997207413194042e-07, + "loss": 0.6411, + "step": 68290 + }, + { + "epoch": 2.7012596650121616, + "grad_norm": 1.4569336444242, + "learning_rate": 2.989365530126559e-07, + "loss": 0.6622, + "step": 68300 + }, + { + "epoch": 2.7016551642310507, + "grad_norm": 1.3511359636396427, + "learning_rate": 2.9815336031060626e-07, + "loss": 0.6453, + "step": 68310 + }, + { + "epoch": 2.7020506634499397, + "grad_norm": 1.3450343464159829, + "learning_rate": 2.97371163379121e-07, + "loss": 0.6217, + "step": 68320 + }, + { + "epoch": 2.7024461626688288, + "grad_norm": 1.3192119078784772, + "learning_rate": 2.965899623838575e-07, + "loss": 0.6398, + "step": 68330 + }, + { + "epoch": 2.702841661887718, + "grad_norm": 1.4843361017815262, + "learning_rate": 2.958097574902602e-07, + "loss": 0.6165, + "step": 68340 + }, + { + "epoch": 2.703237161106607, + "grad_norm": 1.5065879981967603, + "learning_rate": 2.9503054886356353e-07, + "loss": 0.6268, + "step": 68350 + }, + { + "epoch": 2.703632660325496, + "grad_norm": 1.6528747274797109, + "learning_rate": 2.9425233666878993e-07, + "loss": 0.6368, + "step": 68360 + }, + { + "epoch": 2.704028159544385, + "grad_norm": 1.3504742246608263, + "learning_rate": 2.9347512107075207e-07, + "loss": 0.6662, + "step": 68370 + }, + { + "epoch": 2.704423658763274, + "grad_norm": 1.5273343418523304, + "learning_rate": 2.926989022340493e-07, + "loss": 0.6162, + "step": 68380 + }, + { + "epoch": 2.704819157982163, + "grad_norm": 1.5790203009416488, + "learning_rate": 2.919236803230741e-07, + "loss": 0.6369, + "step": 68390 + }, + { + "epoch": 2.705214657201052, + "grad_norm": 1.533063079252469, + "learning_rate": 2.911494555020022e-07, + "loss": 0.6572, + "step": 68400 + }, + { + "epoch": 2.705610156419941, + "grad_norm": 1.5514001641638855, + "learning_rate": 2.903762279348038e-07, + "loss": 0.64, + "step": 68410 + }, + { + "epoch": 2.70600565563883, + "grad_norm": 1.9201184076745088, + "learning_rate": 2.8960399778523384e-07, + "loss": 0.6539, + "step": 68420 + }, + { + "epoch": 2.706401154857719, + "grad_norm": 1.538000017692867, + "learning_rate": 2.888327652168377e-07, + "loss": 0.6214, + "step": 68430 + }, + { + "epoch": 2.706796654076608, + "grad_norm": 1.5139147604366952, + "learning_rate": 2.880625303929491e-07, + "loss": 0.6437, + "step": 68440 + }, + { + "epoch": 2.7071921532954972, + "grad_norm": 1.360455992413914, + "learning_rate": 2.8729329347669146e-07, + "loss": 0.6461, + "step": 68450 + }, + { + "epoch": 2.7075876525143863, + "grad_norm": 1.5686283832820274, + "learning_rate": 2.8652505463097445e-07, + "loss": 0.6596, + "step": 68460 + }, + { + "epoch": 2.7079831517332753, + "grad_norm": 1.6018929023218305, + "learning_rate": 2.857578140185002e-07, + "loss": 0.6243, + "step": 68470 + }, + { + "epoch": 2.7083786509521643, + "grad_norm": 1.5118950744260757, + "learning_rate": 2.849915718017543e-07, + "loss": 0.6324, + "step": 68480 + }, + { + "epoch": 2.7087741501710534, + "grad_norm": 1.770497101415643, + "learning_rate": 2.8422632814301523e-07, + "loss": 0.6066, + "step": 68490 + }, + { + "epoch": 2.7091696493899424, + "grad_norm": 1.4172496119037643, + "learning_rate": 2.834620832043483e-07, + "loss": 0.6406, + "step": 68500 + }, + { + "epoch": 2.7095651486088315, + "grad_norm": 1.2949555961801198, + "learning_rate": 2.8269883714760806e-07, + "loss": 0.6453, + "step": 68510 + }, + { + "epoch": 2.7099606478277205, + "grad_norm": 1.8227660460144495, + "learning_rate": 2.8193659013443563e-07, + "loss": 0.6418, + "step": 68520 + }, + { + "epoch": 2.7103561470466095, + "grad_norm": 1.3587220887292486, + "learning_rate": 2.811753423262631e-07, + "loss": 0.6483, + "step": 68530 + }, + { + "epoch": 2.7107516462654986, + "grad_norm": 1.4992946389392103, + "learning_rate": 2.8041509388430976e-07, + "loss": 0.6376, + "step": 68540 + }, + { + "epoch": 2.7111471454843876, + "grad_norm": 1.3204886574934729, + "learning_rate": 2.7965584496958185e-07, + "loss": 0.6423, + "step": 68550 + }, + { + "epoch": 2.7115426447032767, + "grad_norm": 1.517801064217121, + "learning_rate": 2.788975957428769e-07, + "loss": 0.6546, + "step": 68560 + }, + { + "epoch": 2.7119381439221657, + "grad_norm": 1.3694690323043008, + "learning_rate": 2.781403463647775e-07, + "loss": 0.6573, + "step": 68570 + }, + { + "epoch": 2.7123336431410547, + "grad_norm": 1.423737782498182, + "learning_rate": 2.773840969956576e-07, + "loss": 0.6738, + "step": 68580 + }, + { + "epoch": 2.712729142359944, + "grad_norm": 1.540953605545579, + "learning_rate": 2.7662884779567687e-07, + "loss": 0.6368, + "step": 68590 + }, + { + "epoch": 2.713124641578833, + "grad_norm": 1.5640336462122308, + "learning_rate": 2.7587459892478517e-07, + "loss": 0.6536, + "step": 68600 + }, + { + "epoch": 2.713520140797722, + "grad_norm": 1.609568168975661, + "learning_rate": 2.7512135054271806e-07, + "loss": 0.6283, + "step": 68610 + }, + { + "epoch": 2.713915640016611, + "grad_norm": 1.3666465221971882, + "learning_rate": 2.7436910280900176e-07, + "loss": 0.6338, + "step": 68620 + }, + { + "epoch": 2.7143111392355, + "grad_norm": 1.300546333214171, + "learning_rate": 2.736178558829483e-07, + "loss": 0.6426, + "step": 68630 + }, + { + "epoch": 2.714706638454389, + "grad_norm": 1.6373888804716752, + "learning_rate": 2.7286760992366046e-07, + "loss": 0.6519, + "step": 68640 + }, + { + "epoch": 2.715102137673278, + "grad_norm": 1.6376397086747032, + "learning_rate": 2.721183650900261e-07, + "loss": 0.6432, + "step": 68650 + }, + { + "epoch": 2.715497636892167, + "grad_norm": 1.4602682879598028, + "learning_rate": 2.7137012154072385e-07, + "loss": 0.6475, + "step": 68660 + }, + { + "epoch": 2.715893136111056, + "grad_norm": 1.336128118705436, + "learning_rate": 2.706228794342175e-07, + "loss": 0.6221, + "step": 68670 + }, + { + "epoch": 2.716288635329945, + "grad_norm": 1.4060509655271807, + "learning_rate": 2.6987663892876105e-07, + "loss": 0.6508, + "step": 68680 + }, + { + "epoch": 2.716684134548834, + "grad_norm": 1.6104819501962804, + "learning_rate": 2.691314001823947e-07, + "loss": 0.6367, + "step": 68690 + }, + { + "epoch": 2.717079633767723, + "grad_norm": 1.7001876219415388, + "learning_rate": 2.683871633529483e-07, + "loss": 0.6375, + "step": 68700 + }, + { + "epoch": 2.7174751329866123, + "grad_norm": 1.5032203096451775, + "learning_rate": 2.67643928598037e-07, + "loss": 0.6373, + "step": 68710 + }, + { + "epoch": 2.7178706322055013, + "grad_norm": 1.2857610675448505, + "learning_rate": 2.6690169607506697e-07, + "loss": 0.6443, + "step": 68720 + }, + { + "epoch": 2.7182661314243903, + "grad_norm": 1.296749796922664, + "learning_rate": 2.6616046594122866e-07, + "loss": 0.6052, + "step": 68730 + }, + { + "epoch": 2.7186616306432794, + "grad_norm": 1.2190414086705312, + "learning_rate": 2.654202383535032e-07, + "loss": 0.6728, + "step": 68740 + }, + { + "epoch": 2.7190571298621684, + "grad_norm": 1.6385610649159084, + "learning_rate": 2.646810134686567e-07, + "loss": 0.6159, + "step": 68750 + }, + { + "epoch": 2.7194526290810574, + "grad_norm": 1.400802721941971, + "learning_rate": 2.6394279144324576e-07, + "loss": 0.6418, + "step": 68760 + }, + { + "epoch": 2.7198481282999465, + "grad_norm": 1.5629708272782843, + "learning_rate": 2.63205572433613e-07, + "loss": 0.6358, + "step": 68770 + }, + { + "epoch": 2.7202436275188355, + "grad_norm": 1.387853163638029, + "learning_rate": 2.624693565958869e-07, + "loss": 0.6522, + "step": 68780 + }, + { + "epoch": 2.7206391267377246, + "grad_norm": 1.5030026062662623, + "learning_rate": 2.617341440859883e-07, + "loss": 0.6392, + "step": 68790 + }, + { + "epoch": 2.7210346259566136, + "grad_norm": 1.597726876397117, + "learning_rate": 2.6099993505961984e-07, + "loss": 0.6216, + "step": 68800 + }, + { + "epoch": 2.7214301251755026, + "grad_norm": 1.4943509482838175, + "learning_rate": 2.6026672967227664e-07, + "loss": 0.6235, + "step": 68810 + }, + { + "epoch": 2.7218256243943917, + "grad_norm": 1.5425165452770624, + "learning_rate": 2.595345280792372e-07, + "loss": 0.6481, + "step": 68820 + }, + { + "epoch": 2.7222211236132807, + "grad_norm": 1.3585751304652756, + "learning_rate": 2.5880333043557136e-07, + "loss": 0.638, + "step": 68830 + }, + { + "epoch": 2.7226166228321698, + "grad_norm": 1.3237370151135492, + "learning_rate": 2.5807313689613256e-07, + "loss": 0.6181, + "step": 68840 + }, + { + "epoch": 2.723012122051059, + "grad_norm": 1.5878473618651103, + "learning_rate": 2.573439476155637e-07, + "loss": 0.6451, + "step": 68850 + }, + { + "epoch": 2.723407621269948, + "grad_norm": 1.5731462815375221, + "learning_rate": 2.566157627482946e-07, + "loss": 0.6169, + "step": 68860 + }, + { + "epoch": 2.723803120488837, + "grad_norm": 1.3739114566964035, + "learning_rate": 2.558885824485424e-07, + "loss": 0.6559, + "step": 68870 + }, + { + "epoch": 2.724198619707726, + "grad_norm": 1.5122209680259404, + "learning_rate": 2.551624068703112e-07, + "loss": 0.6519, + "step": 68880 + }, + { + "epoch": 2.724594118926615, + "grad_norm": 1.3420880454933402, + "learning_rate": 2.54437236167393e-07, + "loss": 0.6336, + "step": 68890 + }, + { + "epoch": 2.724989618145504, + "grad_norm": 1.3009560719115958, + "learning_rate": 2.53713070493366e-07, + "loss": 0.6268, + "step": 68900 + }, + { + "epoch": 2.725385117364393, + "grad_norm": 1.6500623502081264, + "learning_rate": 2.52989910001597e-07, + "loss": 0.639, + "step": 68910 + }, + { + "epoch": 2.7257806165832825, + "grad_norm": 1.2388565463344678, + "learning_rate": 2.5226775484523737e-07, + "loss": 0.6363, + "step": 68920 + }, + { + "epoch": 2.726176115802171, + "grad_norm": 1.5017041054242064, + "learning_rate": 2.5154660517722917e-07, + "loss": 0.6725, + "step": 68930 + }, + { + "epoch": 2.7265716150210606, + "grad_norm": 1.3055033887653618, + "learning_rate": 2.5082646115029806e-07, + "loss": 0.6933, + "step": 68940 + }, + { + "epoch": 2.726967114239949, + "grad_norm": 1.4122468539706097, + "learning_rate": 2.5010732291695926e-07, + "loss": 0.6422, + "step": 68950 + }, + { + "epoch": 2.7273626134588387, + "grad_norm": 1.571471474460686, + "learning_rate": 2.4938919062951373e-07, + "loss": 0.6467, + "step": 68960 + }, + { + "epoch": 2.7277581126777273, + "grad_norm": 1.2842866422276005, + "learning_rate": 2.4867206444004864e-07, + "loss": 0.6517, + "step": 68970 + }, + { + "epoch": 2.7281536118966168, + "grad_norm": 1.4313994440518403, + "learning_rate": 2.4795594450043925e-07, + "loss": 0.6475, + "step": 68980 + }, + { + "epoch": 2.7285491111155054, + "grad_norm": 1.29947851444614, + "learning_rate": 2.472408309623486e-07, + "loss": 0.6547, + "step": 68990 + }, + { + "epoch": 2.728944610334395, + "grad_norm": 1.5189394205067743, + "learning_rate": 2.4652672397722397e-07, + "loss": 0.6514, + "step": 69000 + }, + { + "epoch": 2.7293401095532834, + "grad_norm": 1.8697739254086567, + "learning_rate": 2.458136236963027e-07, + "loss": 0.6283, + "step": 69010 + }, + { + "epoch": 2.729735608772173, + "grad_norm": 1.2917153407817616, + "learning_rate": 2.4510153027060615e-07, + "loss": 0.6458, + "step": 69020 + }, + { + "epoch": 2.7301311079910615, + "grad_norm": 1.4988388176962628, + "learning_rate": 2.443904438509431e-07, + "loss": 0.6587, + "step": 69030 + }, + { + "epoch": 2.730526607209951, + "grad_norm": 1.517101504264004, + "learning_rate": 2.43680364587911e-07, + "loss": 0.6693, + "step": 69040 + }, + { + "epoch": 2.7309221064288396, + "grad_norm": 1.5672061335077485, + "learning_rate": 2.4297129263189e-07, + "loss": 0.6381, + "step": 69050 + }, + { + "epoch": 2.731317605647729, + "grad_norm": 1.4367360336621544, + "learning_rate": 2.4226322813305226e-07, + "loss": 0.6712, + "step": 69060 + }, + { + "epoch": 2.7317131048666177, + "grad_norm": 1.5619493447589667, + "learning_rate": 2.4155617124135164e-07, + "loss": 0.601, + "step": 69070 + }, + { + "epoch": 2.732108604085507, + "grad_norm": 1.4542552867826393, + "learning_rate": 2.4085012210653235e-07, + "loss": 0.7147, + "step": 69080 + }, + { + "epoch": 2.7325041033043957, + "grad_norm": 1.4631889254838182, + "learning_rate": 2.4014508087812137e-07, + "loss": 0.6391, + "step": 69090 + }, + { + "epoch": 2.7328996025232852, + "grad_norm": 1.3041780732902712, + "learning_rate": 2.3944104770543653e-07, + "loss": 0.684, + "step": 69100 + }, + { + "epoch": 2.733295101742174, + "grad_norm": 1.4138354713939887, + "learning_rate": 2.38738022737578e-07, + "loss": 0.6619, + "step": 69110 + }, + { + "epoch": 2.7336906009610633, + "grad_norm": 1.4244810808186827, + "learning_rate": 2.3803600612343602e-07, + "loss": 0.6612, + "step": 69120 + }, + { + "epoch": 2.734086100179952, + "grad_norm": 1.5115910890001782, + "learning_rate": 2.3733499801168457e-07, + "loss": 0.6468, + "step": 69130 + }, + { + "epoch": 2.7344815993988414, + "grad_norm": 1.5494320243488706, + "learning_rate": 2.3663499855078653e-07, + "loss": 0.6827, + "step": 69140 + }, + { + "epoch": 2.73487709861773, + "grad_norm": 1.9854899886385176, + "learning_rate": 2.3593600788898773e-07, + "loss": 0.605, + "step": 69150 + }, + { + "epoch": 2.7352725978366195, + "grad_norm": 1.6898890540047589, + "learning_rate": 2.3523802617432477e-07, + "loss": 0.6226, + "step": 69160 + }, + { + "epoch": 2.735668097055508, + "grad_norm": 1.321907731715437, + "learning_rate": 2.345410535546161e-07, + "loss": 0.6621, + "step": 69170 + }, + { + "epoch": 2.7360635962743975, + "grad_norm": 1.5706416558424043, + "learning_rate": 2.3384509017747026e-07, + "loss": 0.6241, + "step": 69180 + }, + { + "epoch": 2.736459095493286, + "grad_norm": 1.3650093850130947, + "learning_rate": 2.3315013619027993e-07, + "loss": 0.637, + "step": 69190 + }, + { + "epoch": 2.7368545947121756, + "grad_norm": 1.4406319757428092, + "learning_rate": 2.3245619174022294e-07, + "loss": 0.6385, + "step": 69200 + }, + { + "epoch": 2.737250093931064, + "grad_norm": 1.7934054487672793, + "learning_rate": 2.3176325697426726e-07, + "loss": 0.6331, + "step": 69210 + }, + { + "epoch": 2.7376455931499537, + "grad_norm": 1.5060467002685176, + "learning_rate": 2.3107133203916331e-07, + "loss": 0.628, + "step": 69220 + }, + { + "epoch": 2.7380410923688423, + "grad_norm": 1.621182495945924, + "learning_rate": 2.303804170814483e-07, + "loss": 0.6788, + "step": 69230 + }, + { + "epoch": 2.738436591587732, + "grad_norm": 1.5765280174694711, + "learning_rate": 2.2969051224744743e-07, + "loss": 0.6423, + "step": 69240 + }, + { + "epoch": 2.738832090806621, + "grad_norm": 1.1734909918378862, + "learning_rate": 2.2900161768327044e-07, + "loss": 0.6623, + "step": 69250 + }, + { + "epoch": 2.73922759002551, + "grad_norm": 1.5728635020318578, + "learning_rate": 2.2831373353481234e-07, + "loss": 0.6134, + "step": 69260 + }, + { + "epoch": 2.739623089244399, + "grad_norm": 1.3922302774790831, + "learning_rate": 2.2762685994775657e-07, + "loss": 0.6709, + "step": 69270 + }, + { + "epoch": 2.740018588463288, + "grad_norm": 1.3963543510513603, + "learning_rate": 2.2694099706757066e-07, + "loss": 0.6558, + "step": 69280 + }, + { + "epoch": 2.740414087682177, + "grad_norm": 1.6029162929281209, + "learning_rate": 2.2625614503950843e-07, + "loss": 0.6523, + "step": 69290 + }, + { + "epoch": 2.740809586901066, + "grad_norm": 1.337793949884056, + "learning_rate": 2.2557230400860998e-07, + "loss": 0.6461, + "step": 69300 + }, + { + "epoch": 2.741205086119955, + "grad_norm": 1.4868880009013101, + "learning_rate": 2.2488947411970163e-07, + "loss": 0.6439, + "step": 69310 + }, + { + "epoch": 2.741600585338844, + "grad_norm": 1.25075291955305, + "learning_rate": 2.242076555173939e-07, + "loss": 0.6573, + "step": 69320 + }, + { + "epoch": 2.741996084557733, + "grad_norm": 1.3198802985255884, + "learning_rate": 2.2352684834608617e-07, + "loss": 0.6251, + "step": 69330 + }, + { + "epoch": 2.742391583776622, + "grad_norm": 1.4158703326538418, + "learning_rate": 2.2284705274995933e-07, + "loss": 0.6169, + "step": 69340 + }, + { + "epoch": 2.742787082995511, + "grad_norm": 1.4411382836941886, + "learning_rate": 2.2216826887298426e-07, + "loss": 0.6675, + "step": 69350 + }, + { + "epoch": 2.7431825822144003, + "grad_norm": 1.3163533262387754, + "learning_rate": 2.2149049685891434e-07, + "loss": 0.6401, + "step": 69360 + }, + { + "epoch": 2.7435780814332893, + "grad_norm": 1.316766547224836, + "learning_rate": 2.2081373685129194e-07, + "loss": 0.6273, + "step": 69370 + }, + { + "epoch": 2.7439735806521783, + "grad_norm": 1.2823164663220994, + "learning_rate": 2.2013798899344073e-07, + "loss": 0.6741, + "step": 69380 + }, + { + "epoch": 2.7443690798710674, + "grad_norm": 1.3709664460718767, + "learning_rate": 2.194632534284752e-07, + "loss": 0.6275, + "step": 69390 + }, + { + "epoch": 2.7447645790899564, + "grad_norm": 1.2116639866310164, + "learning_rate": 2.1878953029929094e-07, + "loss": 0.6598, + "step": 69400 + }, + { + "epoch": 2.7451600783088455, + "grad_norm": 1.5408849494847299, + "learning_rate": 2.181168197485717e-07, + "loss": 0.6452, + "step": 69410 + }, + { + "epoch": 2.7455555775277345, + "grad_norm": 1.5225328335156032, + "learning_rate": 2.1744512191878565e-07, + "loss": 0.651, + "step": 69420 + }, + { + "epoch": 2.7459510767466235, + "grad_norm": 1.3874667113624823, + "learning_rate": 2.1677443695218735e-07, + "loss": 0.6411, + "step": 69430 + }, + { + "epoch": 2.7463465759655126, + "grad_norm": 1.691442001775994, + "learning_rate": 2.1610476499081656e-07, + "loss": 0.6464, + "step": 69440 + }, + { + "epoch": 2.7467420751844016, + "grad_norm": 1.6784529503396555, + "learning_rate": 2.1543610617649812e-07, + "loss": 0.6622, + "step": 69450 + }, + { + "epoch": 2.7471375744032906, + "grad_norm": 1.503828137536102, + "learning_rate": 2.1476846065084157e-07, + "loss": 0.6581, + "step": 69460 + }, + { + "epoch": 2.7475330736221797, + "grad_norm": 1.4900429380242568, + "learning_rate": 2.1410182855524376e-07, + "loss": 0.6534, + "step": 69470 + }, + { + "epoch": 2.7479285728410687, + "grad_norm": 1.7257924432736504, + "learning_rate": 2.134362100308862e-07, + "loss": 0.5996, + "step": 69480 + }, + { + "epoch": 2.7483240720599578, + "grad_norm": 1.38035700512195, + "learning_rate": 2.1277160521873452e-07, + "loss": 0.6223, + "step": 69490 + }, + { + "epoch": 2.748719571278847, + "grad_norm": 1.5150352066029071, + "learning_rate": 2.1210801425954165e-07, + "loss": 0.647, + "step": 69500 + }, + { + "epoch": 2.749115070497736, + "grad_norm": 1.2806491208746578, + "learning_rate": 2.1144543729384348e-07, + "loss": 0.6299, + "step": 69510 + }, + { + "epoch": 2.749510569716625, + "grad_norm": 1.931191023765883, + "learning_rate": 2.1078387446196392e-07, + "loss": 0.65, + "step": 69520 + }, + { + "epoch": 2.749906068935514, + "grad_norm": 1.4297792979739687, + "learning_rate": 2.101233259040092e-07, + "loss": 0.6547, + "step": 69530 + }, + { + "epoch": 2.750301568154403, + "grad_norm": 1.8976901558890449, + "learning_rate": 2.094637917598741e-07, + "loss": 0.6255, + "step": 69540 + }, + { + "epoch": 2.750697067373292, + "grad_norm": 1.454416603524465, + "learning_rate": 2.0880527216923462e-07, + "loss": 0.6141, + "step": 69550 + }, + { + "epoch": 2.751092566592181, + "grad_norm": 1.6237589818678877, + "learning_rate": 2.081477672715554e-07, + "loss": 0.6735, + "step": 69560 + }, + { + "epoch": 2.75148806581107, + "grad_norm": 1.5912237571555568, + "learning_rate": 2.0749127720608443e-07, + "loss": 0.6454, + "step": 69570 + }, + { + "epoch": 2.751883565029959, + "grad_norm": 1.5235691770166913, + "learning_rate": 2.068358021118544e-07, + "loss": 0.6384, + "step": 69580 + }, + { + "epoch": 2.752279064248848, + "grad_norm": 1.4196356588930572, + "learning_rate": 2.0618134212768371e-07, + "loss": 0.6439, + "step": 69590 + }, + { + "epoch": 2.752674563467737, + "grad_norm": 1.396346920875002, + "learning_rate": 2.055278973921765e-07, + "loss": 0.5926, + "step": 69600 + }, + { + "epoch": 2.7530700626866262, + "grad_norm": 1.3020287675495448, + "learning_rate": 2.048754680437204e-07, + "loss": 0.6271, + "step": 69610 + }, + { + "epoch": 2.7534655619055153, + "grad_norm": 1.644638699820278, + "learning_rate": 2.0422405422048985e-07, + "loss": 0.6654, + "step": 69620 + }, + { + "epoch": 2.7538610611244043, + "grad_norm": 1.453576215780983, + "learning_rate": 2.0357365606044177e-07, + "loss": 0.6833, + "step": 69630 + }, + { + "epoch": 2.7542565603432934, + "grad_norm": 1.3471039447623419, + "learning_rate": 2.0292427370132095e-07, + "loss": 0.6528, + "step": 69640 + }, + { + "epoch": 2.7546520595621824, + "grad_norm": 1.3397498760496365, + "learning_rate": 2.022759072806535e-07, + "loss": 0.6443, + "step": 69650 + }, + { + "epoch": 2.7550475587810714, + "grad_norm": 1.389448340434731, + "learning_rate": 2.01628556935754e-07, + "loss": 0.6575, + "step": 69660 + }, + { + "epoch": 2.7554430579999605, + "grad_norm": 1.3721106493078639, + "learning_rate": 2.0098222280372003e-07, + "loss": 0.6655, + "step": 69670 + }, + { + "epoch": 2.7558385572188495, + "grad_norm": 1.530698837259324, + "learning_rate": 2.0033690502143266e-07, + "loss": 0.643, + "step": 69680 + }, + { + "epoch": 2.7562340564377386, + "grad_norm": 1.3278282492733717, + "learning_rate": 1.9969260372556033e-07, + "loss": 0.6307, + "step": 69690 + }, + { + "epoch": 2.7566295556566276, + "grad_norm": 1.501316534193504, + "learning_rate": 1.9904931905255553e-07, + "loss": 0.6191, + "step": 69700 + }, + { + "epoch": 2.7570250548755166, + "grad_norm": 1.367879676115424, + "learning_rate": 1.9840705113865322e-07, + "loss": 0.627, + "step": 69710 + }, + { + "epoch": 2.7574205540944057, + "grad_norm": 1.3133081204376638, + "learning_rate": 1.9776580011987566e-07, + "loss": 0.6197, + "step": 69720 + }, + { + "epoch": 2.7578160533132947, + "grad_norm": 1.4628042349087949, + "learning_rate": 1.9712556613202928e-07, + "loss": 0.6433, + "step": 69730 + }, + { + "epoch": 2.7582115525321838, + "grad_norm": 1.5779612026261522, + "learning_rate": 1.9648634931070386e-07, + "loss": 0.6288, + "step": 69740 + }, + { + "epoch": 2.758607051751073, + "grad_norm": 1.556417617607726, + "learning_rate": 1.9584814979127563e-07, + "loss": 0.6898, + "step": 69750 + }, + { + "epoch": 2.759002550969962, + "grad_norm": 1.5612831206343956, + "learning_rate": 1.9521096770890257e-07, + "loss": 0.6688, + "step": 69760 + }, + { + "epoch": 2.759398050188851, + "grad_norm": 1.3764924788728663, + "learning_rate": 1.9457480319853116e-07, + "loss": 0.6233, + "step": 69770 + }, + { + "epoch": 2.75979354940774, + "grad_norm": 1.351673060665852, + "learning_rate": 1.939396563948881e-07, + "loss": 0.6579, + "step": 69780 + }, + { + "epoch": 2.760189048626629, + "grad_norm": 1.4287970695025112, + "learning_rate": 1.9330552743248798e-07, + "loss": 0.6762, + "step": 69790 + }, + { + "epoch": 2.760584547845518, + "grad_norm": 1.5326256345661453, + "learning_rate": 1.9267241644562728e-07, + "loss": 0.6562, + "step": 69800 + }, + { + "epoch": 2.760980047064407, + "grad_norm": 1.2716691150426762, + "learning_rate": 1.920403235683893e-07, + "loss": 0.6495, + "step": 69810 + }, + { + "epoch": 2.761375546283296, + "grad_norm": 1.629671043633906, + "learning_rate": 1.914092489346403e-07, + "loss": 0.6476, + "step": 69820 + }, + { + "epoch": 2.761771045502185, + "grad_norm": 1.5945873954823444, + "learning_rate": 1.9077919267803058e-07, + "loss": 0.6204, + "step": 69830 + }, + { + "epoch": 2.762166544721074, + "grad_norm": 1.627265891732816, + "learning_rate": 1.901501549319945e-07, + "loss": 0.6542, + "step": 69840 + }, + { + "epoch": 2.762562043939963, + "grad_norm": 1.4076824150040739, + "learning_rate": 1.8952213582975332e-07, + "loss": 0.6555, + "step": 69850 + }, + { + "epoch": 2.7629575431588522, + "grad_norm": 1.6614197374612025, + "learning_rate": 1.8889513550430892e-07, + "loss": 0.6668, + "step": 69860 + }, + { + "epoch": 2.7633530423777413, + "grad_norm": 1.348660418248994, + "learning_rate": 1.882691540884507e-07, + "loss": 0.6452, + "step": 69870 + }, + { + "epoch": 2.7637485415966303, + "grad_norm": 1.2861102602800623, + "learning_rate": 1.8764419171474923e-07, + "loss": 0.6251, + "step": 69880 + }, + { + "epoch": 2.7641440408155193, + "grad_norm": 1.2752208612507339, + "learning_rate": 1.8702024851556255e-07, + "loss": 0.6337, + "step": 69890 + }, + { + "epoch": 2.7645395400344084, + "grad_norm": 1.3812320918730367, + "learning_rate": 1.8639732462303051e-07, + "loss": 0.6414, + "step": 69900 + }, + { + "epoch": 2.7649350392532974, + "grad_norm": 1.5308342141209106, + "learning_rate": 1.85775420169077e-07, + "loss": 0.6614, + "step": 69910 + }, + { + "epoch": 2.7653305384721865, + "grad_norm": 1.2806709986852391, + "learning_rate": 1.8515453528541172e-07, + "loss": 0.6506, + "step": 69920 + }, + { + "epoch": 2.7657260376910755, + "grad_norm": 1.3895428090263962, + "learning_rate": 1.8453467010352667e-07, + "loss": 0.6299, + "step": 69930 + }, + { + "epoch": 2.7661215369099645, + "grad_norm": 1.42698733467093, + "learning_rate": 1.839158247546996e-07, + "loss": 0.6463, + "step": 69940 + }, + { + "epoch": 2.7665170361288536, + "grad_norm": 1.3367842849416025, + "learning_rate": 1.8329799936999072e-07, + "loss": 0.6443, + "step": 69950 + }, + { + "epoch": 2.7669125353477426, + "grad_norm": 1.7551605377211708, + "learning_rate": 1.8268119408024476e-07, + "loss": 0.6077, + "step": 69960 + }, + { + "epoch": 2.7673080345666317, + "grad_norm": 1.5393491753605335, + "learning_rate": 1.8206540901609004e-07, + "loss": 0.6313, + "step": 69970 + }, + { + "epoch": 2.7677035337855207, + "grad_norm": 1.2870384692928958, + "learning_rate": 1.8145064430794058e-07, + "loss": 0.6516, + "step": 69980 + }, + { + "epoch": 2.7680990330044097, + "grad_norm": 1.5348966340685912, + "learning_rate": 1.8083690008599163e-07, + "loss": 0.6725, + "step": 69990 + }, + { + "epoch": 2.7684945322232988, + "grad_norm": 1.2976751426033901, + "learning_rate": 1.802241764802254e-07, + "loss": 0.6599, + "step": 70000 + }, + { + "epoch": 2.768890031442188, + "grad_norm": 1.2988854684535658, + "learning_rate": 1.7961247362040468e-07, + "loss": 0.6512, + "step": 70010 + }, + { + "epoch": 2.769285530661077, + "grad_norm": 1.32835481484248, + "learning_rate": 1.7900179163607866e-07, + "loss": 0.6453, + "step": 70020 + }, + { + "epoch": 2.769681029879966, + "grad_norm": 1.6783409781445278, + "learning_rate": 1.7839213065657835e-07, + "loss": 0.6353, + "step": 70030 + }, + { + "epoch": 2.770076529098855, + "grad_norm": 1.3247836198190301, + "learning_rate": 1.7778349081102042e-07, + "loss": 0.6365, + "step": 70040 + }, + { + "epoch": 2.770472028317744, + "grad_norm": 1.4445303567851318, + "learning_rate": 1.77175872228304e-07, + "loss": 0.6327, + "step": 70050 + }, + { + "epoch": 2.770867527536633, + "grad_norm": 1.2509364936379517, + "learning_rate": 1.7656927503711284e-07, + "loss": 0.6235, + "step": 70060 + }, + { + "epoch": 2.771263026755522, + "grad_norm": 1.4464360294031313, + "learning_rate": 1.759636993659125e-07, + "loss": 0.6625, + "step": 70070 + }, + { + "epoch": 2.771658525974411, + "grad_norm": 1.3427423811400483, + "learning_rate": 1.7535914534295485e-07, + "loss": 0.6301, + "step": 70080 + }, + { + "epoch": 2.7720540251933, + "grad_norm": 1.639592752745467, + "learning_rate": 1.7475561309627298e-07, + "loss": 0.6448, + "step": 70090 + }, + { + "epoch": 2.772449524412189, + "grad_norm": 1.359407536333506, + "learning_rate": 1.741531027536858e-07, + "loss": 0.6528, + "step": 70100 + }, + { + "epoch": 2.772845023631078, + "grad_norm": 1.316898559916085, + "learning_rate": 1.7355161444279346e-07, + "loss": 0.6831, + "step": 70110 + }, + { + "epoch": 2.7732405228499672, + "grad_norm": 1.5151070387663423, + "learning_rate": 1.7295114829098237e-07, + "loss": 0.6108, + "step": 70120 + }, + { + "epoch": 2.7736360220688563, + "grad_norm": 1.6430217099381406, + "learning_rate": 1.723517044254197e-07, + "loss": 0.6521, + "step": 70130 + }, + { + "epoch": 2.7740315212877453, + "grad_norm": 1.2411080704034883, + "learning_rate": 1.7175328297305782e-07, + "loss": 0.6345, + "step": 70140 + }, + { + "epoch": 2.7744270205066344, + "grad_norm": 1.5426078940055101, + "learning_rate": 1.7115588406063255e-07, + "loss": 0.6668, + "step": 70150 + }, + { + "epoch": 2.7748225197255234, + "grad_norm": 1.627580513848178, + "learning_rate": 1.7055950781466157e-07, + "loss": 0.6261, + "step": 70160 + }, + { + "epoch": 2.7752180189444124, + "grad_norm": 1.7985545682125468, + "learning_rate": 1.6996415436144887e-07, + "loss": 0.6172, + "step": 70170 + }, + { + "epoch": 2.7756135181633015, + "grad_norm": 1.6044468180595712, + "learning_rate": 1.6936982382707855e-07, + "loss": 0.6525, + "step": 70180 + }, + { + "epoch": 2.7760090173821905, + "grad_norm": 1.5822653965454174, + "learning_rate": 1.687765163374211e-07, + "loss": 0.6367, + "step": 70190 + }, + { + "epoch": 2.7764045166010796, + "grad_norm": 1.285159732438064, + "learning_rate": 1.6818423201812705e-07, + "loss": 0.6565, + "step": 70200 + }, + { + "epoch": 2.7768000158199686, + "grad_norm": 1.3308012964646518, + "learning_rate": 1.675929709946339e-07, + "loss": 0.6206, + "step": 70210 + }, + { + "epoch": 2.7771955150388576, + "grad_norm": 1.5697358656515874, + "learning_rate": 1.6700273339215922e-07, + "loss": 0.6302, + "step": 70220 + }, + { + "epoch": 2.7775910142577467, + "grad_norm": 1.5741341975123648, + "learning_rate": 1.664135193357058e-07, + "loss": 0.6657, + "step": 70230 + }, + { + "epoch": 2.7779865134766357, + "grad_norm": 1.6300738170318227, + "learning_rate": 1.6582532895005888e-07, + "loss": 0.6337, + "step": 70240 + }, + { + "epoch": 2.7783820126955248, + "grad_norm": 1.386117346121053, + "learning_rate": 1.6523816235978817e-07, + "loss": 0.6603, + "step": 70250 + }, + { + "epoch": 2.778777511914414, + "grad_norm": 1.3896923700628134, + "learning_rate": 1.646520196892437e-07, + "loss": 0.6435, + "step": 70260 + }, + { + "epoch": 2.7791730111333033, + "grad_norm": 1.49885510815463, + "learning_rate": 1.6406690106256174e-07, + "loss": 0.6458, + "step": 70270 + }, + { + "epoch": 2.779568510352192, + "grad_norm": 1.4641701806077778, + "learning_rate": 1.6348280660365978e-07, + "loss": 0.6476, + "step": 70280 + }, + { + "epoch": 2.7799640095710814, + "grad_norm": 1.565453130641798, + "learning_rate": 1.6289973643623947e-07, + "loss": 0.618, + "step": 70290 + }, + { + "epoch": 2.78035950878997, + "grad_norm": 1.4376018413566471, + "learning_rate": 1.623176906837848e-07, + "loss": 0.6301, + "step": 70300 + }, + { + "epoch": 2.7807550080088594, + "grad_norm": 1.25605809644225, + "learning_rate": 1.6173666946956378e-07, + "loss": 0.6385, + "step": 70310 + }, + { + "epoch": 2.781150507227748, + "grad_norm": 1.3395555696135488, + "learning_rate": 1.611566729166253e-07, + "loss": 0.6392, + "step": 70320 + }, + { + "epoch": 2.7815460064466375, + "grad_norm": 1.4924151482989823, + "learning_rate": 1.605777011478038e-07, + "loss": 0.6438, + "step": 70330 + }, + { + "epoch": 2.781941505665526, + "grad_norm": 1.4345607053485638, + "learning_rate": 1.5999975428571513e-07, + "loss": 0.6113, + "step": 70340 + }, + { + "epoch": 2.7823370048844156, + "grad_norm": 1.676604636784896, + "learning_rate": 1.5942283245275913e-07, + "loss": 0.642, + "step": 70350 + }, + { + "epoch": 2.782732504103304, + "grad_norm": 1.5528653306400786, + "learning_rate": 1.5884693577111698e-07, + "loss": 0.6533, + "step": 70360 + }, + { + "epoch": 2.7831280033221937, + "grad_norm": 1.4631525480539687, + "learning_rate": 1.5827206436275443e-07, + "loss": 0.6514, + "step": 70370 + }, + { + "epoch": 2.7835235025410823, + "grad_norm": 1.5734008953544165, + "learning_rate": 1.5769821834941968e-07, + "loss": 0.6043, + "step": 70380 + }, + { + "epoch": 2.7839190017599718, + "grad_norm": 1.294508863775289, + "learning_rate": 1.571253978526427e-07, + "loss": 0.6511, + "step": 70390 + }, + { + "epoch": 2.7843145009788604, + "grad_norm": 1.57276638787535, + "learning_rate": 1.5655360299373757e-07, + "loss": 0.6454, + "step": 70400 + }, + { + "epoch": 2.78471000019775, + "grad_norm": 1.3818690470423782, + "learning_rate": 1.5598283389379965e-07, + "loss": 0.6531, + "step": 70410 + }, + { + "epoch": 2.7851054994166384, + "grad_norm": 1.1968357075455516, + "learning_rate": 1.5541309067371002e-07, + "loss": 0.6573, + "step": 70420 + }, + { + "epoch": 2.785500998635528, + "grad_norm": 1.4734324030449588, + "learning_rate": 1.548443734541294e-07, + "loss": 0.6251, + "step": 70430 + }, + { + "epoch": 2.7858964978544165, + "grad_norm": 1.6733311382296236, + "learning_rate": 1.5427668235550196e-07, + "loss": 0.6193, + "step": 70440 + }, + { + "epoch": 2.786291997073306, + "grad_norm": 1.3773379777397299, + "learning_rate": 1.5371001749805492e-07, + "loss": 0.6307, + "step": 70450 + }, + { + "epoch": 2.7866874962921946, + "grad_norm": 1.4497533763907762, + "learning_rate": 1.5314437900179945e-07, + "loss": 0.6689, + "step": 70460 + }, + { + "epoch": 2.787082995511084, + "grad_norm": 1.74263901083047, + "learning_rate": 1.5257976698652644e-07, + "loss": 0.6346, + "step": 70470 + }, + { + "epoch": 2.7874784947299727, + "grad_norm": 1.4079424098713957, + "learning_rate": 1.520161815718124e-07, + "loss": 0.6447, + "step": 70480 + }, + { + "epoch": 2.787873993948862, + "grad_norm": 1.4718817217638045, + "learning_rate": 1.5145362287701416e-07, + "loss": 0.6744, + "step": 70490 + }, + { + "epoch": 2.7882694931677507, + "grad_norm": 1.579376219566021, + "learning_rate": 1.50892091021273e-07, + "loss": 0.6518, + "step": 70500 + }, + { + "epoch": 2.7886649923866402, + "grad_norm": 1.5148739392164345, + "learning_rate": 1.5033158612350996e-07, + "loss": 0.6819, + "step": 70510 + }, + { + "epoch": 2.789060491605529, + "grad_norm": 1.4138321887912344, + "learning_rate": 1.4977210830243282e-07, + "loss": 0.6234, + "step": 70520 + }, + { + "epoch": 2.7894559908244183, + "grad_norm": 1.3767362246811161, + "learning_rate": 1.4921365767652741e-07, + "loss": 0.6458, + "step": 70530 + }, + { + "epoch": 2.789851490043307, + "grad_norm": 1.2651323005352821, + "learning_rate": 1.4865623436406517e-07, + "loss": 0.6643, + "step": 70540 + }, + { + "epoch": 2.7902469892621964, + "grad_norm": 1.54661973112921, + "learning_rate": 1.4809983848309783e-07, + "loss": 0.6151, + "step": 70550 + }, + { + "epoch": 2.790642488481085, + "grad_norm": 1.5729891372496556, + "learning_rate": 1.4754447015146167e-07, + "loss": 0.6439, + "step": 70560 + }, + { + "epoch": 2.7910379876999745, + "grad_norm": 1.4062440147445077, + "learning_rate": 1.4699012948677317e-07, + "loss": 0.6208, + "step": 70570 + }, + { + "epoch": 2.791433486918863, + "grad_norm": 1.2542659186910414, + "learning_rate": 1.4643681660643228e-07, + "loss": 0.6617, + "step": 70580 + }, + { + "epoch": 2.7918289861377525, + "grad_norm": 1.63192997647672, + "learning_rate": 1.458845316276214e-07, + "loss": 0.6362, + "step": 70590 + }, + { + "epoch": 2.7922244853566416, + "grad_norm": 1.678368655790032, + "learning_rate": 1.4533327466730585e-07, + "loss": 0.6628, + "step": 70600 + }, + { + "epoch": 2.7926199845755306, + "grad_norm": 1.6681442093536465, + "learning_rate": 1.4478304584223168e-07, + "loss": 0.6522, + "step": 70610 + }, + { + "epoch": 2.7930154837944197, + "grad_norm": 1.5728678176256543, + "learning_rate": 1.442338452689268e-07, + "loss": 0.6477, + "step": 70620 + }, + { + "epoch": 2.7934109830133087, + "grad_norm": 1.4940185489575846, + "learning_rate": 1.4368567306370486e-07, + "loss": 0.6628, + "step": 70630 + }, + { + "epoch": 2.7938064822321977, + "grad_norm": 1.3653235148324767, + "learning_rate": 1.431385293426568e-07, + "loss": 0.6243, + "step": 70640 + }, + { + "epoch": 2.794201981451087, + "grad_norm": 1.2546474939810106, + "learning_rate": 1.4259241422166058e-07, + "loss": 0.6425, + "step": 70650 + }, + { + "epoch": 2.794597480669976, + "grad_norm": 1.4349327109309442, + "learning_rate": 1.4204732781637255e-07, + "loss": 0.6484, + "step": 70660 + }, + { + "epoch": 2.794992979888865, + "grad_norm": 1.3787818241105554, + "learning_rate": 1.415032702422331e-07, + "loss": 0.652, + "step": 70670 + }, + { + "epoch": 2.795388479107754, + "grad_norm": 1.633678026200489, + "learning_rate": 1.4096024161446453e-07, + "loss": 0.6367, + "step": 70680 + }, + { + "epoch": 2.795783978326643, + "grad_norm": 1.548539279796999, + "learning_rate": 1.4041824204807098e-07, + "loss": 0.6514, + "step": 70690 + }, + { + "epoch": 2.796179477545532, + "grad_norm": 1.9867896044037945, + "learning_rate": 1.3987727165783782e-07, + "loss": 0.6117, + "step": 70700 + }, + { + "epoch": 2.796574976764421, + "grad_norm": 1.4428548669428978, + "learning_rate": 1.3933733055833453e-07, + "loss": 0.6561, + "step": 70710 + }, + { + "epoch": 2.79697047598331, + "grad_norm": 1.555012078990304, + "learning_rate": 1.3879841886391077e-07, + "loss": 0.6187, + "step": 70720 + }, + { + "epoch": 2.797365975202199, + "grad_norm": 1.4561341297622712, + "learning_rate": 1.3826053668869854e-07, + "loss": 0.6682, + "step": 70730 + }, + { + "epoch": 2.797761474421088, + "grad_norm": 1.3523860626914401, + "learning_rate": 1.377236841466123e-07, + "loss": 0.6529, + "step": 70740 + }, + { + "epoch": 2.798156973639977, + "grad_norm": 1.6296871689652246, + "learning_rate": 1.3718786135134887e-07, + "loss": 0.589, + "step": 70750 + }, + { + "epoch": 2.798552472858866, + "grad_norm": 1.3072960671335296, + "learning_rate": 1.3665306841638526e-07, + "loss": 0.6695, + "step": 70760 + }, + { + "epoch": 2.7989479720777553, + "grad_norm": 1.252329441611291, + "learning_rate": 1.3611930545498253e-07, + "loss": 0.6504, + "step": 70770 + }, + { + "epoch": 2.7993434712966443, + "grad_norm": 1.4252708063986979, + "learning_rate": 1.3558657258018193e-07, + "loss": 0.6652, + "step": 70780 + }, + { + "epoch": 2.7997389705155333, + "grad_norm": 1.395769794154177, + "learning_rate": 1.350548699048071e-07, + "loss": 0.6434, + "step": 70790 + }, + { + "epoch": 2.8001344697344224, + "grad_norm": 1.588739536643672, + "learning_rate": 1.345241975414635e-07, + "loss": 0.6528, + "step": 70800 + }, + { + "epoch": 2.8005299689533114, + "grad_norm": 1.2582128132655923, + "learning_rate": 1.3399455560253903e-07, + "loss": 0.663, + "step": 70810 + }, + { + "epoch": 2.8009254681722004, + "grad_norm": 1.2966416685606534, + "learning_rate": 1.3346594420020176e-07, + "loss": 0.6595, + "step": 70820 + }, + { + "epoch": 2.8013209673910895, + "grad_norm": 1.2699773005541533, + "learning_rate": 1.3293836344640321e-07, + "loss": 0.6417, + "step": 70830 + }, + { + "epoch": 2.8017164666099785, + "grad_norm": 1.283172380779107, + "learning_rate": 1.3241181345287624e-07, + "loss": 0.6369, + "step": 70840 + }, + { + "epoch": 2.8021119658288676, + "grad_norm": 1.72367921616284, + "learning_rate": 1.3188629433113443e-07, + "loss": 0.6225, + "step": 70850 + }, + { + "epoch": 2.8025074650477566, + "grad_norm": 1.2816956811493818, + "learning_rate": 1.313618061924743e-07, + "loss": 0.6617, + "step": 70860 + }, + { + "epoch": 2.8029029642666456, + "grad_norm": 1.338547532837257, + "learning_rate": 1.3083834914797255e-07, + "loss": 0.6667, + "step": 70870 + }, + { + "epoch": 2.8032984634855347, + "grad_norm": 1.7753813131278051, + "learning_rate": 1.3031592330848995e-07, + "loss": 0.6398, + "step": 70880 + }, + { + "epoch": 2.8036939627044237, + "grad_norm": 1.2871159392263742, + "learning_rate": 1.2979452878466582e-07, + "loss": 0.6153, + "step": 70890 + }, + { + "epoch": 2.8040894619233128, + "grad_norm": 1.3586814422551918, + "learning_rate": 1.292741656869234e-07, + "loss": 0.6187, + "step": 70900 + }, + { + "epoch": 2.804484961142202, + "grad_norm": 1.2812685003902786, + "learning_rate": 1.287548341254663e-07, + "loss": 0.6718, + "step": 70910 + }, + { + "epoch": 2.804880460361091, + "grad_norm": 1.4752136792868573, + "learning_rate": 1.2823653421028092e-07, + "loss": 0.6014, + "step": 70920 + }, + { + "epoch": 2.80527595957998, + "grad_norm": 1.7100484199761878, + "learning_rate": 1.2771926605113283e-07, + "loss": 0.6408, + "step": 70930 + }, + { + "epoch": 2.805671458798869, + "grad_norm": 1.503382808450596, + "learning_rate": 1.2720302975757214e-07, + "loss": 0.6408, + "step": 70940 + }, + { + "epoch": 2.806066958017758, + "grad_norm": 1.6564643609656255, + "learning_rate": 1.26687825438927e-07, + "loss": 0.6475, + "step": 70950 + }, + { + "epoch": 2.806462457236647, + "grad_norm": 1.4523722470610614, + "learning_rate": 1.2617365320431063e-07, + "loss": 0.61, + "step": 70960 + }, + { + "epoch": 2.806857956455536, + "grad_norm": 1.8597013290015023, + "learning_rate": 1.2566051316261485e-07, + "loss": 0.6691, + "step": 70970 + }, + { + "epoch": 2.807253455674425, + "grad_norm": 1.560784573235987, + "learning_rate": 1.2514840542251495e-07, + "loss": 0.6178, + "step": 70980 + }, + { + "epoch": 2.807648954893314, + "grad_norm": 1.5515378282283863, + "learning_rate": 1.246373300924647e-07, + "loss": 0.6258, + "step": 70990 + }, + { + "epoch": 2.808044454112203, + "grad_norm": 1.499857883745644, + "learning_rate": 1.2412728728070367e-07, + "loss": 0.637, + "step": 71000 + }, + { + "epoch": 2.808439953331092, + "grad_norm": 1.729513914812002, + "learning_rate": 1.2361827709524765e-07, + "loss": 0.6154, + "step": 71010 + }, + { + "epoch": 2.8088354525499812, + "grad_norm": 1.4987849422891406, + "learning_rate": 1.231102996438982e-07, + "loss": 0.5912, + "step": 71020 + }, + { + "epoch": 2.8092309517688703, + "grad_norm": 1.3519845243946256, + "learning_rate": 1.2260335503423537e-07, + "loss": 0.6385, + "step": 71030 + }, + { + "epoch": 2.8096264509877593, + "grad_norm": 1.3482338424726288, + "learning_rate": 1.220974433736216e-07, + "loss": 0.6107, + "step": 71040 + }, + { + "epoch": 2.8100219502066484, + "grad_norm": 1.3356539631697795, + "learning_rate": 1.2159256476919957e-07, + "loss": 0.6527, + "step": 71050 + }, + { + "epoch": 2.8104174494255374, + "grad_norm": 1.379259505253153, + "learning_rate": 1.2108871932789534e-07, + "loss": 0.6691, + "step": 71060 + }, + { + "epoch": 2.8108129486444264, + "grad_norm": 1.7146532877745266, + "learning_rate": 1.205859071564136e-07, + "loss": 0.6463, + "step": 71070 + }, + { + "epoch": 2.8112084478633155, + "grad_norm": 1.533848926021102, + "learning_rate": 1.2008412836124139e-07, + "loss": 0.6026, + "step": 71080 + }, + { + "epoch": 2.8116039470822045, + "grad_norm": 1.3335394683551018, + "learning_rate": 1.1958338304864704e-07, + "loss": 0.651, + "step": 71090 + }, + { + "epoch": 2.8119994463010936, + "grad_norm": 1.797687976638747, + "learning_rate": 1.1908367132468012e-07, + "loss": 0.65, + "step": 71100 + }, + { + "epoch": 2.8123949455199826, + "grad_norm": 1.3262616638829634, + "learning_rate": 1.1858499329517104e-07, + "loss": 0.6716, + "step": 71110 + }, + { + "epoch": 2.8127904447388716, + "grad_norm": 1.7733830450989985, + "learning_rate": 1.1808734906573083e-07, + "loss": 0.6092, + "step": 71120 + }, + { + "epoch": 2.8131859439577607, + "grad_norm": 1.4960562923521619, + "learning_rate": 1.1759073874175242e-07, + "loss": 0.6072, + "step": 71130 + }, + { + "epoch": 2.8135814431766497, + "grad_norm": 1.400976180118364, + "learning_rate": 1.170951624284089e-07, + "loss": 0.679, + "step": 71140 + }, + { + "epoch": 2.8139769423955387, + "grad_norm": 1.455967690331034, + "learning_rate": 1.1660062023065521e-07, + "loss": 0.6223, + "step": 71150 + }, + { + "epoch": 2.814372441614428, + "grad_norm": 1.4461566745830368, + "learning_rate": 1.1610711225322702e-07, + "loss": 0.6312, + "step": 71160 + }, + { + "epoch": 2.814767940833317, + "grad_norm": 1.3091359959437165, + "learning_rate": 1.1561463860064126e-07, + "loss": 0.6698, + "step": 71170 + }, + { + "epoch": 2.815163440052206, + "grad_norm": 1.7632193077363547, + "learning_rate": 1.151231993771934e-07, + "loss": 0.6276, + "step": 71180 + }, + { + "epoch": 2.815558939271095, + "grad_norm": 1.3683489118262935, + "learning_rate": 1.1463279468696464e-07, + "loss": 0.634, + "step": 71190 + }, + { + "epoch": 2.815954438489984, + "grad_norm": 1.4772338543995671, + "learning_rate": 1.1414342463381189e-07, + "loss": 0.665, + "step": 71200 + }, + { + "epoch": 2.816349937708873, + "grad_norm": 1.7091646355933343, + "learning_rate": 1.1365508932137726e-07, + "loss": 0.645, + "step": 71210 + }, + { + "epoch": 2.816745436927762, + "grad_norm": 1.3713118845500332, + "learning_rate": 1.1316778885308022e-07, + "loss": 0.6484, + "step": 71220 + }, + { + "epoch": 2.817140936146651, + "grad_norm": 1.6795543988991155, + "learning_rate": 1.1268152333212378e-07, + "loss": 0.656, + "step": 71230 + }, + { + "epoch": 2.81753643536554, + "grad_norm": 1.6196904160666268, + "learning_rate": 1.1219629286148948e-07, + "loss": 0.6343, + "step": 71240 + }, + { + "epoch": 2.817931934584429, + "grad_norm": 1.5980321479095312, + "learning_rate": 1.117120975439423e-07, + "loss": 0.6637, + "step": 71250 + }, + { + "epoch": 2.818327433803318, + "grad_norm": 1.2787969320191077, + "learning_rate": 1.1122893748202578e-07, + "loss": 0.6488, + "step": 71260 + }, + { + "epoch": 2.8187229330222072, + "grad_norm": 1.5814500169646448, + "learning_rate": 1.1074681277806476e-07, + "loss": 0.6441, + "step": 71270 + }, + { + "epoch": 2.8191184322410963, + "grad_norm": 1.6815995049544257, + "learning_rate": 1.1026572353416532e-07, + "loss": 0.6207, + "step": 71280 + }, + { + "epoch": 2.8195139314599853, + "grad_norm": 1.236125210479998, + "learning_rate": 1.0978566985221428e-07, + "loss": 0.6214, + "step": 71290 + }, + { + "epoch": 2.8199094306788743, + "grad_norm": 1.3884029202987864, + "learning_rate": 1.0930665183387701e-07, + "loss": 0.6369, + "step": 71300 + }, + { + "epoch": 2.8203049298977634, + "grad_norm": 1.6981650316123413, + "learning_rate": 1.08828669580604e-07, + "loss": 0.6346, + "step": 71310 + }, + { + "epoch": 2.8207004291166524, + "grad_norm": 1.3093396160459512, + "learning_rate": 1.0835172319362153e-07, + "loss": 0.6644, + "step": 71320 + }, + { + "epoch": 2.8210959283355415, + "grad_norm": 1.4272402044636827, + "learning_rate": 1.0787581277393932e-07, + "loss": 0.5882, + "step": 71330 + }, + { + "epoch": 2.8214914275544305, + "grad_norm": 1.5532684370975687, + "learning_rate": 1.0740093842234734e-07, + "loss": 0.6195, + "step": 71340 + }, + { + "epoch": 2.8218869267733195, + "grad_norm": 1.3198582362346123, + "learning_rate": 1.0692710023941566e-07, + "loss": 0.6564, + "step": 71350 + }, + { + "epoch": 2.8222824259922086, + "grad_norm": 1.7941719683925077, + "learning_rate": 1.0645429832549514e-07, + "loss": 0.6093, + "step": 71360 + }, + { + "epoch": 2.8226779252110976, + "grad_norm": 1.5344986401770568, + "learning_rate": 1.0598253278071679e-07, + "loss": 0.6497, + "step": 71370 + }, + { + "epoch": 2.8230734244299867, + "grad_norm": 1.3474874968039983, + "learning_rate": 1.0551180370499348e-07, + "loss": 0.6759, + "step": 71380 + }, + { + "epoch": 2.8234689236488757, + "grad_norm": 1.3443842174996126, + "learning_rate": 1.0504211119801599e-07, + "loss": 0.6632, + "step": 71390 + }, + { + "epoch": 2.8238644228677647, + "grad_norm": 1.4571229259003577, + "learning_rate": 1.0457345535925866e-07, + "loss": 0.6473, + "step": 71400 + }, + { + "epoch": 2.8242599220866538, + "grad_norm": 1.4397939063178338, + "learning_rate": 1.0410583628797377e-07, + "loss": 0.6083, + "step": 71410 + }, + { + "epoch": 2.824655421305543, + "grad_norm": 1.4478803115383612, + "learning_rate": 1.0363925408319597e-07, + "loss": 0.6516, + "step": 71420 + }, + { + "epoch": 2.825050920524432, + "grad_norm": 1.6263864733819333, + "learning_rate": 1.0317370884373789e-07, + "loss": 0.6157, + "step": 71430 + }, + { + "epoch": 2.825446419743321, + "grad_norm": 1.648136273206422, + "learning_rate": 1.0270920066819567e-07, + "loss": 0.6413, + "step": 71440 + }, + { + "epoch": 2.82584191896221, + "grad_norm": 1.5424322560928423, + "learning_rate": 1.0224572965494284e-07, + "loss": 0.6658, + "step": 71450 + }, + { + "epoch": 2.826237418181099, + "grad_norm": 1.4329446773604395, + "learning_rate": 1.0178329590213532e-07, + "loss": 0.6333, + "step": 71460 + }, + { + "epoch": 2.826632917399988, + "grad_norm": 1.2667116444395536, + "learning_rate": 1.0132189950770865e-07, + "loss": 0.682, + "step": 71470 + }, + { + "epoch": 2.827028416618877, + "grad_norm": 1.3723226841049856, + "learning_rate": 1.0086154056937858e-07, + "loss": 0.6571, + "step": 71480 + }, + { + "epoch": 2.827423915837766, + "grad_norm": 1.4224864145469023, + "learning_rate": 1.0040221918464155e-07, + "loss": 0.6362, + "step": 71490 + }, + { + "epoch": 2.827819415056655, + "grad_norm": 1.5072740557916464, + "learning_rate": 9.994393545077308e-08, + "loss": 0.6698, + "step": 71500 + }, + { + "epoch": 2.828214914275544, + "grad_norm": 1.5421550523979828, + "learning_rate": 9.94866894648311e-08, + "loss": 0.6513, + "step": 71510 + }, + { + "epoch": 2.828610413494433, + "grad_norm": 1.6190451322504884, + "learning_rate": 9.90304813236509e-08, + "loss": 0.6508, + "step": 71520 + }, + { + "epoch": 2.8290059127133222, + "grad_norm": 1.4927239530699181, + "learning_rate": 9.857531112385133e-08, + "loss": 0.6173, + "step": 71530 + }, + { + "epoch": 2.8294014119322113, + "grad_norm": 1.5948860647555736, + "learning_rate": 9.81211789618286e-08, + "loss": 0.6582, + "step": 71540 + }, + { + "epoch": 2.8297969111511003, + "grad_norm": 1.42711083667429, + "learning_rate": 9.766808493375968e-08, + "loss": 0.6242, + "step": 71550 + }, + { + "epoch": 2.8301924103699894, + "grad_norm": 1.393300021615929, + "learning_rate": 9.72160291356028e-08, + "loss": 0.6473, + "step": 71560 + }, + { + "epoch": 2.8305879095888784, + "grad_norm": 1.3943864491392837, + "learning_rate": 9.676501166309582e-08, + "loss": 0.693, + "step": 71570 + }, + { + "epoch": 2.8309834088077674, + "grad_norm": 1.4348801207867234, + "learning_rate": 9.631503261175567e-08, + "loss": 0.6408, + "step": 71580 + }, + { + "epoch": 2.8313789080266565, + "grad_norm": 1.53546313887525, + "learning_rate": 9.58660920768817e-08, + "loss": 0.647, + "step": 71590 + }, + { + "epoch": 2.831774407245546, + "grad_norm": 1.5414501086832788, + "learning_rate": 9.541819015355003e-08, + "loss": 0.639, + "step": 71600 + }, + { + "epoch": 2.8321699064644346, + "grad_norm": 1.4870570239235834, + "learning_rate": 9.497132693661981e-08, + "loss": 0.6565, + "step": 71610 + }, + { + "epoch": 2.832565405683324, + "grad_norm": 1.4479950531786703, + "learning_rate": 9.452550252072867e-08, + "loss": 0.6463, + "step": 71620 + }, + { + "epoch": 2.8329609049022126, + "grad_norm": 1.5534376854482819, + "learning_rate": 9.408071700029442e-08, + "loss": 0.6548, + "step": 71630 + }, + { + "epoch": 2.833356404121102, + "grad_norm": 1.5704144184999533, + "learning_rate": 9.363697046951504e-08, + "loss": 0.6578, + "step": 71640 + }, + { + "epoch": 2.8337519033399907, + "grad_norm": 1.3940005353907636, + "learning_rate": 9.319426302236922e-08, + "loss": 0.6522, + "step": 71650 + }, + { + "epoch": 2.83414740255888, + "grad_norm": 1.3620695261247602, + "learning_rate": 9.275259475261366e-08, + "loss": 0.6503, + "step": 71660 + }, + { + "epoch": 2.834542901777769, + "grad_norm": 1.5659402360075487, + "learning_rate": 9.231196575378687e-08, + "loss": 0.6114, + "step": 71670 + }, + { + "epoch": 2.8349384009966583, + "grad_norm": 1.287793740585784, + "learning_rate": 9.187237611920585e-08, + "loss": 0.6371, + "step": 71680 + }, + { + "epoch": 2.835333900215547, + "grad_norm": 1.3874033632511464, + "learning_rate": 9.143382594196948e-08, + "loss": 0.6344, + "step": 71690 + }, + { + "epoch": 2.8357293994344364, + "grad_norm": 1.4629847629990715, + "learning_rate": 9.099631531495346e-08, + "loss": 0.636, + "step": 71700 + }, + { + "epoch": 2.836124898653325, + "grad_norm": 1.4942618092172686, + "learning_rate": 9.055984433081588e-08, + "loss": 0.6901, + "step": 71710 + }, + { + "epoch": 2.8365203978722144, + "grad_norm": 1.2801768958137962, + "learning_rate": 9.012441308199449e-08, + "loss": 0.6434, + "step": 71720 + }, + { + "epoch": 2.836915897091103, + "grad_norm": 1.6076141619874595, + "learning_rate": 8.969002166070496e-08, + "loss": 0.6337, + "step": 71730 + }, + { + "epoch": 2.8373113963099925, + "grad_norm": 1.2415270630600788, + "learning_rate": 8.925667015894479e-08, + "loss": 0.655, + "step": 71740 + }, + { + "epoch": 2.837706895528881, + "grad_norm": 1.409355282040025, + "learning_rate": 8.882435866848948e-08, + "loss": 0.6421, + "step": 71750 + }, + { + "epoch": 2.8381023947477706, + "grad_norm": 1.5267283857952438, + "learning_rate": 8.839308728089635e-08, + "loss": 0.6584, + "step": 71760 + }, + { + "epoch": 2.838497893966659, + "grad_norm": 1.56125606558126, + "learning_rate": 8.79628560875001e-08, + "loss": 0.6636, + "step": 71770 + }, + { + "epoch": 2.8388933931855487, + "grad_norm": 1.3706291895479226, + "learning_rate": 8.753366517941841e-08, + "loss": 0.6669, + "step": 71780 + }, + { + "epoch": 2.8392888924044373, + "grad_norm": 1.3035873133951499, + "learning_rate": 8.710551464754414e-08, + "loss": 0.6406, + "step": 71790 + }, + { + "epoch": 2.8396843916233268, + "grad_norm": 1.3825212769679238, + "learning_rate": 8.667840458255305e-08, + "loss": 0.6315, + "step": 71800 + }, + { + "epoch": 2.8400798908422153, + "grad_norm": 1.3982388632872584, + "learning_rate": 8.625233507490005e-08, + "loss": 0.632, + "step": 71810 + }, + { + "epoch": 2.840475390061105, + "grad_norm": 1.5781047096447292, + "learning_rate": 8.58273062148196e-08, + "loss": 0.6341, + "step": 71820 + }, + { + "epoch": 2.8408708892799934, + "grad_norm": 1.5441019233512676, + "learning_rate": 8.540331809232471e-08, + "loss": 0.6257, + "step": 71830 + }, + { + "epoch": 2.841266388498883, + "grad_norm": 1.5054411731881552, + "learning_rate": 8.498037079720966e-08, + "loss": 0.6513, + "step": 71840 + }, + { + "epoch": 2.8416618877177715, + "grad_norm": 1.4815673703969714, + "learning_rate": 8.455846441904669e-08, + "loss": 0.652, + "step": 71850 + }, + { + "epoch": 2.842057386936661, + "grad_norm": 1.503475398636474, + "learning_rate": 8.413759904718877e-08, + "loss": 0.6467, + "step": 71860 + }, + { + "epoch": 2.8424528861555496, + "grad_norm": 2.067684370145825, + "learning_rate": 8.371777477076792e-08, + "loss": 0.608, + "step": 71870 + }, + { + "epoch": 2.842848385374439, + "grad_norm": 1.387306214458457, + "learning_rate": 8.329899167869582e-08, + "loss": 0.6557, + "step": 71880 + }, + { + "epoch": 2.8432438845933277, + "grad_norm": 1.3374542335648076, + "learning_rate": 8.288124985966262e-08, + "loss": 0.6602, + "step": 71890 + }, + { + "epoch": 2.843639383812217, + "grad_norm": 1.478595604111618, + "learning_rate": 8.246454940214143e-08, + "loss": 0.6679, + "step": 71900 + }, + { + "epoch": 2.8440348830311057, + "grad_norm": 1.437195358675303, + "learning_rate": 8.20488903943789e-08, + "loss": 0.6459, + "step": 71910 + }, + { + "epoch": 2.8444303822499952, + "grad_norm": 1.5705743870705775, + "learning_rate": 8.163427292440685e-08, + "loss": 0.6148, + "step": 71920 + }, + { + "epoch": 2.844825881468884, + "grad_norm": 1.2780124466918952, + "learning_rate": 8.122069708003333e-08, + "loss": 0.6674, + "step": 71930 + }, + { + "epoch": 2.8452213806877733, + "grad_norm": 1.4710905571876607, + "learning_rate": 8.080816294884664e-08, + "loss": 0.6189, + "step": 71940 + }, + { + "epoch": 2.8456168799066623, + "grad_norm": 1.3484872251320974, + "learning_rate": 8.03966706182141e-08, + "loss": 0.6511, + "step": 71950 + }, + { + "epoch": 2.8460123791255514, + "grad_norm": 1.5038879828895375, + "learning_rate": 7.99862201752838e-08, + "loss": 0.6491, + "step": 71960 + }, + { + "epoch": 2.8464078783444404, + "grad_norm": 1.2677884295346622, + "learning_rate": 7.957681170698117e-08, + "loss": 0.6405, + "step": 71970 + }, + { + "epoch": 2.8468033775633295, + "grad_norm": 1.8674788523434265, + "learning_rate": 7.916844530001244e-08, + "loss": 0.6139, + "step": 71980 + }, + { + "epoch": 2.8471988767822185, + "grad_norm": 1.2824188682306423, + "learning_rate": 7.876112104086231e-08, + "loss": 0.6638, + "step": 71990 + }, + { + "epoch": 2.8475943760011075, + "grad_norm": 1.6401352971899505, + "learning_rate": 7.835483901579454e-08, + "loss": 0.6653, + "step": 72000 + }, + { + "epoch": 2.8479898752199966, + "grad_norm": 1.3961740398017697, + "learning_rate": 7.794959931085422e-08, + "loss": 0.6649, + "step": 72010 + }, + { + "epoch": 2.8483853744388856, + "grad_norm": 1.4114497198115452, + "learning_rate": 7.754540201186267e-08, + "loss": 0.6724, + "step": 72020 + }, + { + "epoch": 2.8487808736577747, + "grad_norm": 1.4666726890426882, + "learning_rate": 7.714224720442309e-08, + "loss": 0.614, + "step": 72030 + }, + { + "epoch": 2.8491763728766637, + "grad_norm": 1.5083421564719712, + "learning_rate": 7.674013497391553e-08, + "loss": 0.6245, + "step": 72040 + }, + { + "epoch": 2.8495718720955527, + "grad_norm": 1.3893655027117535, + "learning_rate": 7.633906540550185e-08, + "loss": 0.6518, + "step": 72050 + }, + { + "epoch": 2.8499673713144418, + "grad_norm": 1.1982052669857983, + "learning_rate": 7.593903858412022e-08, + "loss": 0.6671, + "step": 72060 + }, + { + "epoch": 2.850362870533331, + "grad_norm": 1.5550177486475851, + "learning_rate": 7.554005459449065e-08, + "loss": 0.6359, + "step": 72070 + }, + { + "epoch": 2.85075836975222, + "grad_norm": 1.4845948876461585, + "learning_rate": 7.514211352111056e-08, + "loss": 0.6421, + "step": 72080 + }, + { + "epoch": 2.851153868971109, + "grad_norm": 1.6801664990383498, + "learning_rate": 7.474521544825752e-08, + "loss": 0.6371, + "step": 72090 + }, + { + "epoch": 2.851549368189998, + "grad_norm": 1.428534336493384, + "learning_rate": 7.434936045998764e-08, + "loss": 0.6511, + "step": 72100 + }, + { + "epoch": 2.851944867408887, + "grad_norm": 1.5162142873336415, + "learning_rate": 7.395454864013552e-08, + "loss": 0.6463, + "step": 72110 + }, + { + "epoch": 2.852340366627776, + "grad_norm": 1.4202078216114058, + "learning_rate": 7.356078007231649e-08, + "loss": 0.6358, + "step": 72120 + }, + { + "epoch": 2.852735865846665, + "grad_norm": 1.6415014379473447, + "learning_rate": 7.316805483992329e-08, + "loss": 0.6299, + "step": 72130 + }, + { + "epoch": 2.853131365065554, + "grad_norm": 1.5678912756714454, + "learning_rate": 7.277637302612883e-08, + "loss": 0.6517, + "step": 72140 + }, + { + "epoch": 2.853526864284443, + "grad_norm": 1.3899347976952365, + "learning_rate": 7.238573471388455e-08, + "loss": 0.6358, + "step": 72150 + }, + { + "epoch": 2.853922363503332, + "grad_norm": 1.3111640823340012, + "learning_rate": 7.199613998592036e-08, + "loss": 0.6414, + "step": 72160 + }, + { + "epoch": 2.854317862722221, + "grad_norm": 1.229488001937905, + "learning_rate": 7.160758892474695e-08, + "loss": 0.6269, + "step": 72170 + }, + { + "epoch": 2.8547133619411103, + "grad_norm": 1.2757646388419683, + "learning_rate": 7.122008161265126e-08, + "loss": 0.6732, + "step": 72180 + }, + { + "epoch": 2.8551088611599993, + "grad_norm": 1.6348302350276103, + "learning_rate": 7.083361813170208e-08, + "loss": 0.6245, + "step": 72190 + }, + { + "epoch": 2.8555043603788883, + "grad_norm": 1.4705692079303059, + "learning_rate": 7.044819856374507e-08, + "loss": 0.6365, + "step": 72200 + }, + { + "epoch": 2.8558998595977774, + "grad_norm": 1.7227328874389856, + "learning_rate": 7.006382299040493e-08, + "loss": 0.6529, + "step": 72210 + }, + { + "epoch": 2.8562953588166664, + "grad_norm": 1.3573578734233607, + "learning_rate": 6.968049149308708e-08, + "loss": 0.6801, + "step": 72220 + }, + { + "epoch": 2.8566908580355554, + "grad_norm": 1.399463356219504, + "learning_rate": 6.929820415297383e-08, + "loss": 0.6639, + "step": 72230 + }, + { + "epoch": 2.8570863572544445, + "grad_norm": 1.658952308953823, + "learning_rate": 6.891696105102763e-08, + "loss": 0.6187, + "step": 72240 + }, + { + "epoch": 2.8574818564733335, + "grad_norm": 1.5435937784319809, + "learning_rate": 6.853676226798777e-08, + "loss": 0.6259, + "step": 72250 + }, + { + "epoch": 2.8578773556922226, + "grad_norm": 1.4117160225612395, + "learning_rate": 6.815760788437598e-08, + "loss": 0.6024, + "step": 72260 + }, + { + "epoch": 2.8582728549111116, + "grad_norm": 1.7455187494468822, + "learning_rate": 6.777949798048966e-08, + "loss": 0.6476, + "step": 72270 + }, + { + "epoch": 2.8586683541300006, + "grad_norm": 1.4072155571715825, + "learning_rate": 6.740243263640533e-08, + "loss": 0.6264, + "step": 72280 + }, + { + "epoch": 2.8590638533488897, + "grad_norm": 1.759758970873472, + "learning_rate": 6.702641193197967e-08, + "loss": 0.6415, + "step": 72290 + }, + { + "epoch": 2.8594593525677787, + "grad_norm": 1.700861968911209, + "learning_rate": 6.66514359468473e-08, + "loss": 0.6276, + "step": 72300 + }, + { + "epoch": 2.8598548517866678, + "grad_norm": 1.3422475633848485, + "learning_rate": 6.627750476042139e-08, + "loss": 0.6677, + "step": 72310 + }, + { + "epoch": 2.860250351005557, + "grad_norm": 1.3182407408161956, + "learning_rate": 6.590461845189522e-08, + "loss": 0.6515, + "step": 72320 + }, + { + "epoch": 2.860645850224446, + "grad_norm": 1.4806199732936745, + "learning_rate": 6.553277710023842e-08, + "loss": 0.651, + "step": 72330 + }, + { + "epoch": 2.861041349443335, + "grad_norm": 1.3502937366604248, + "learning_rate": 6.516198078420189e-08, + "loss": 0.6437, + "step": 72340 + }, + { + "epoch": 2.861436848662224, + "grad_norm": 1.8530057237432134, + "learning_rate": 6.47922295823128e-08, + "loss": 0.6428, + "step": 72350 + }, + { + "epoch": 2.861832347881113, + "grad_norm": 1.2879955958524398, + "learning_rate": 6.442352357287852e-08, + "loss": 0.6334, + "step": 72360 + }, + { + "epoch": 2.862227847100002, + "grad_norm": 1.4072641193957272, + "learning_rate": 6.405586283398491e-08, + "loss": 0.6896, + "step": 72370 + }, + { + "epoch": 2.862623346318891, + "grad_norm": 1.4983629491296078, + "learning_rate": 6.36892474434958e-08, + "loss": 0.646, + "step": 72380 + }, + { + "epoch": 2.86301884553778, + "grad_norm": 1.4326776250624267, + "learning_rate": 6.332367747905466e-08, + "loss": 0.6906, + "step": 72390 + }, + { + "epoch": 2.863414344756669, + "grad_norm": 1.3635475141215665, + "learning_rate": 6.295915301808231e-08, + "loss": 0.6876, + "step": 72400 + }, + { + "epoch": 2.863809843975558, + "grad_norm": 1.2200244161353813, + "learning_rate": 6.259567413777868e-08, + "loss": 0.6347, + "step": 72410 + }, + { + "epoch": 2.864205343194447, + "grad_norm": 1.3552047131982294, + "learning_rate": 6.223324091512329e-08, + "loss": 0.6497, + "step": 72420 + }, + { + "epoch": 2.8646008424133362, + "grad_norm": 1.5375161371950397, + "learning_rate": 6.187185342687252e-08, + "loss": 0.6285, + "step": 72430 + }, + { + "epoch": 2.8649963416322253, + "grad_norm": 1.249655970444738, + "learning_rate": 6.151151174956182e-08, + "loss": 0.6421, + "step": 72440 + }, + { + "epoch": 2.8653918408511143, + "grad_norm": 1.262237077238412, + "learning_rate": 6.115221595950682e-08, + "loss": 0.6164, + "step": 72450 + }, + { + "epoch": 2.8657873400700034, + "grad_norm": 1.203439467062273, + "learning_rate": 6.07939661327983e-08, + "loss": 0.643, + "step": 72460 + }, + { + "epoch": 2.8661828392888924, + "grad_norm": 1.417418223036004, + "learning_rate": 6.043676234530837e-08, + "loss": 0.6226, + "step": 72470 + }, + { + "epoch": 2.8665783385077814, + "grad_norm": 1.5740146543409002, + "learning_rate": 6.008060467268706e-08, + "loss": 0.6523, + "step": 72480 + }, + { + "epoch": 2.8669738377266705, + "grad_norm": 1.349582716418027, + "learning_rate": 5.97254931903618e-08, + "loss": 0.6938, + "step": 72490 + }, + { + "epoch": 2.8673693369455595, + "grad_norm": 1.634075965660607, + "learning_rate": 5.937142797353912e-08, + "loss": 0.6527, + "step": 72500 + }, + { + "epoch": 2.8677648361644485, + "grad_norm": 1.3807830694126326, + "learning_rate": 5.901840909720513e-08, + "loss": 0.6313, + "step": 72510 + }, + { + "epoch": 2.8681603353833376, + "grad_norm": 1.7106302140460086, + "learning_rate": 5.86664366361217e-08, + "loss": 0.625, + "step": 72520 + }, + { + "epoch": 2.8685558346022266, + "grad_norm": 1.5619605048497767, + "learning_rate": 5.831551066483088e-08, + "loss": 0.6364, + "step": 72530 + }, + { + "epoch": 2.8689513338211157, + "grad_norm": 1.421725802970187, + "learning_rate": 5.79656312576532e-08, + "loss": 0.619, + "step": 72540 + }, + { + "epoch": 2.8693468330400047, + "grad_norm": 1.2069242650206728, + "learning_rate": 5.7616798488687176e-08, + "loss": 0.6526, + "step": 72550 + }, + { + "epoch": 2.8697423322588937, + "grad_norm": 1.7169527039551473, + "learning_rate": 5.726901243180871e-08, + "loss": 0.6472, + "step": 72560 + }, + { + "epoch": 2.870137831477783, + "grad_norm": 1.480201705865561, + "learning_rate": 5.6922273160674444e-08, + "loss": 0.6177, + "step": 72570 + }, + { + "epoch": 2.870533330696672, + "grad_norm": 1.4752908902225705, + "learning_rate": 5.6576580748716195e-08, + "loss": 0.665, + "step": 72580 + }, + { + "epoch": 2.870928829915561, + "grad_norm": 1.5083567174791324, + "learning_rate": 5.6231935269147054e-08, + "loss": 0.6305, + "step": 72590 + }, + { + "epoch": 2.87132432913445, + "grad_norm": 1.561256121729682, + "learning_rate": 5.588833679495642e-08, + "loss": 0.643, + "step": 72600 + }, + { + "epoch": 2.871719828353339, + "grad_norm": 1.610856633861929, + "learning_rate": 5.5545785398912755e-08, + "loss": 0.6547, + "step": 72610 + }, + { + "epoch": 2.872115327572228, + "grad_norm": 1.4777326951322076, + "learning_rate": 5.5204281153561914e-08, + "loss": 0.6531, + "step": 72620 + }, + { + "epoch": 2.872510826791117, + "grad_norm": 1.35165347190245, + "learning_rate": 5.486382413122937e-08, + "loss": 0.6747, + "step": 72630 + }, + { + "epoch": 2.872906326010006, + "grad_norm": 1.465090142971119, + "learning_rate": 5.452441440401801e-08, + "loss": 0.6729, + "step": 72640 + }, + { + "epoch": 2.873301825228895, + "grad_norm": 1.2597268928551326, + "learning_rate": 5.4186052043808666e-08, + "loss": 0.6575, + "step": 72650 + }, + { + "epoch": 2.873697324447784, + "grad_norm": 1.5184075743379273, + "learning_rate": 5.384873712226124e-08, + "loss": 0.6589, + "step": 72660 + }, + { + "epoch": 2.874092823666673, + "grad_norm": 1.6122070085517288, + "learning_rate": 5.351246971081248e-08, + "loss": 0.6642, + "step": 72670 + }, + { + "epoch": 2.874488322885562, + "grad_norm": 1.718819098683133, + "learning_rate": 5.317724988067874e-08, + "loss": 0.6195, + "step": 72680 + }, + { + "epoch": 2.8748838221044513, + "grad_norm": 1.4102826738237104, + "learning_rate": 5.284307770285324e-08, + "loss": 0.665, + "step": 72690 + }, + { + "epoch": 2.8752793213233403, + "grad_norm": 1.4836432098153078, + "learning_rate": 5.250995324810826e-08, + "loss": 0.6561, + "step": 72700 + }, + { + "epoch": 2.8756748205422293, + "grad_norm": 1.210518337484923, + "learning_rate": 5.2177876586994024e-08, + "loss": 0.6596, + "step": 72710 + }, + { + "epoch": 2.8760703197611184, + "grad_norm": 1.4309615217082419, + "learning_rate": 5.184684778983873e-08, + "loss": 0.6389, + "step": 72720 + }, + { + "epoch": 2.8764658189800074, + "grad_norm": 1.531355736173954, + "learning_rate": 5.1516866926747953e-08, + "loss": 0.6329, + "step": 72730 + }, + { + "epoch": 2.8768613181988965, + "grad_norm": 1.4103575427945292, + "learning_rate": 5.118793406760636e-08, + "loss": 0.6832, + "step": 72740 + }, + { + "epoch": 2.8772568174177855, + "grad_norm": 1.4535690283014577, + "learning_rate": 5.086004928207655e-08, + "loss": 0.6613, + "step": 72750 + }, + { + "epoch": 2.8776523166366745, + "grad_norm": 1.4005578818184357, + "learning_rate": 5.053321263959909e-08, + "loss": 0.6669, + "step": 72760 + }, + { + "epoch": 2.8780478158555636, + "grad_norm": 1.6556440501689134, + "learning_rate": 5.02074242093914e-08, + "loss": 0.6374, + "step": 72770 + }, + { + "epoch": 2.8784433150744526, + "grad_norm": 1.6805707688486475, + "learning_rate": 4.988268406045105e-08, + "loss": 0.6659, + "step": 72780 + }, + { + "epoch": 2.8788388142933417, + "grad_norm": 1.2452304115421555, + "learning_rate": 4.9558992261551364e-08, + "loss": 0.6601, + "step": 72790 + }, + { + "epoch": 2.8792343135122307, + "grad_norm": 1.4993082581900317, + "learning_rate": 4.9236348881245286e-08, + "loss": 0.6566, + "step": 72800 + }, + { + "epoch": 2.8796298127311197, + "grad_norm": 1.5226098101439518, + "learning_rate": 4.891475398786316e-08, + "loss": 0.6371, + "step": 72810 + }, + { + "epoch": 2.8800253119500088, + "grad_norm": 1.255480487223256, + "learning_rate": 4.859420764951328e-08, + "loss": 0.6698, + "step": 72820 + }, + { + "epoch": 2.880420811168898, + "grad_norm": 1.3476733314233706, + "learning_rate": 4.827470993408134e-08, + "loss": 0.6538, + "step": 72830 + }, + { + "epoch": 2.880816310387787, + "grad_norm": 1.4669120556339963, + "learning_rate": 4.795626090923267e-08, + "loss": 0.6531, + "step": 72840 + }, + { + "epoch": 2.881211809606676, + "grad_norm": 1.2327257736566601, + "learning_rate": 4.7638860642408305e-08, + "loss": 0.6445, + "step": 72850 + }, + { + "epoch": 2.881607308825565, + "grad_norm": 1.43284919010261, + "learning_rate": 4.732250920082837e-08, + "loss": 0.6496, + "step": 72860 + }, + { + "epoch": 2.882002808044454, + "grad_norm": 1.678470475015289, + "learning_rate": 4.700720665149094e-08, + "loss": 0.6274, + "step": 72870 + }, + { + "epoch": 2.882398307263343, + "grad_norm": 1.3212522110082225, + "learning_rate": 4.66929530611715e-08, + "loss": 0.6435, + "step": 72880 + }, + { + "epoch": 2.882793806482232, + "grad_norm": 1.5237024323125776, + "learning_rate": 4.637974849642346e-08, + "loss": 0.6203, + "step": 72890 + }, + { + "epoch": 2.883189305701121, + "grad_norm": 1.3369753703052774, + "learning_rate": 4.606759302357822e-08, + "loss": 0.6707, + "step": 72900 + }, + { + "epoch": 2.88358480492001, + "grad_norm": 1.487196996409093, + "learning_rate": 4.575648670874566e-08, + "loss": 0.6162, + "step": 72910 + }, + { + "epoch": 2.883980304138899, + "grad_norm": 1.652736144974241, + "learning_rate": 4.544642961781143e-08, + "loss": 0.6396, + "step": 72920 + }, + { + "epoch": 2.884375803357788, + "grad_norm": 1.3567343586098006, + "learning_rate": 4.513742181644187e-08, + "loss": 0.6671, + "step": 72930 + }, + { + "epoch": 2.8847713025766772, + "grad_norm": 1.4751191129610746, + "learning_rate": 4.482946337007799e-08, + "loss": 0.6282, + "step": 72940 + }, + { + "epoch": 2.8851668017955667, + "grad_norm": 1.450479351550353, + "learning_rate": 4.452255434394093e-08, + "loss": 0.6393, + "step": 72950 + }, + { + "epoch": 2.8855623010144553, + "grad_norm": 1.5609670866457233, + "learning_rate": 4.4216694803028705e-08, + "loss": 0.6555, + "step": 72960 + }, + { + "epoch": 2.885957800233345, + "grad_norm": 1.3885795081012786, + "learning_rate": 4.3911884812117276e-08, + "loss": 0.6385, + "step": 72970 + }, + { + "epoch": 2.8863532994522334, + "grad_norm": 1.4323153141002634, + "learning_rate": 4.360812443576001e-08, + "loss": 0.6705, + "step": 72980 + }, + { + "epoch": 2.886748798671123, + "grad_norm": 1.5802102158166254, + "learning_rate": 4.330541373828823e-08, + "loss": 0.6549, + "step": 72990 + }, + { + "epoch": 2.8871442978900115, + "grad_norm": 1.5173291056679128, + "learning_rate": 4.300375278381064e-08, + "loss": 0.6382, + "step": 73000 + }, + { + "epoch": 2.887539797108901, + "grad_norm": 1.4228779557593552, + "learning_rate": 4.270314163621447e-08, + "loss": 0.6388, + "step": 73010 + }, + { + "epoch": 2.8879352963277896, + "grad_norm": 1.2394541547630626, + "learning_rate": 4.24035803591627e-08, + "loss": 0.6147, + "step": 73020 + }, + { + "epoch": 2.888330795546679, + "grad_norm": 1.158935180851554, + "learning_rate": 4.210506901609901e-08, + "loss": 0.6614, + "step": 73030 + }, + { + "epoch": 2.8887262947655676, + "grad_norm": 1.3394673174843224, + "learning_rate": 4.180760767024117e-08, + "loss": 0.6339, + "step": 73040 + }, + { + "epoch": 2.889121793984457, + "grad_norm": 1.4991891227655139, + "learning_rate": 4.1511196384588226e-08, + "loss": 0.6588, + "step": 73050 + }, + { + "epoch": 2.8895172932033457, + "grad_norm": 1.5894360741224016, + "learning_rate": 4.12158352219133e-08, + "loss": 0.6315, + "step": 73060 + }, + { + "epoch": 2.889912792422235, + "grad_norm": 1.2212752434595693, + "learning_rate": 4.092152424477025e-08, + "loss": 0.6372, + "step": 73070 + }, + { + "epoch": 2.890308291641124, + "grad_norm": 1.5621856230101503, + "learning_rate": 4.0628263515488654e-08, + "loss": 0.6373, + "step": 73080 + }, + { + "epoch": 2.8907037908600133, + "grad_norm": 1.6804074142, + "learning_rate": 4.0336053096174945e-08, + "loss": 0.6308, + "step": 73090 + }, + { + "epoch": 2.891099290078902, + "grad_norm": 1.4844377693483426, + "learning_rate": 4.004489304871628e-08, + "loss": 0.6456, + "step": 73100 + }, + { + "epoch": 2.8914947892977914, + "grad_norm": 1.292577091658977, + "learning_rate": 3.97547834347739e-08, + "loss": 0.6533, + "step": 73110 + }, + { + "epoch": 2.89189028851668, + "grad_norm": 1.2870549378420648, + "learning_rate": 3.9465724315788655e-08, + "loss": 0.6776, + "step": 73120 + }, + { + "epoch": 2.8922857877355694, + "grad_norm": 1.5082532434548177, + "learning_rate": 3.9177715752978244e-08, + "loss": 0.6253, + "step": 73130 + }, + { + "epoch": 2.892681286954458, + "grad_norm": 1.7460971315415368, + "learning_rate": 3.889075780733831e-08, + "loss": 0.5943, + "step": 73140 + }, + { + "epoch": 2.8930767861733475, + "grad_norm": 1.3485375973865619, + "learning_rate": 3.860485053964025e-08, + "loss": 0.6708, + "step": 73150 + }, + { + "epoch": 2.893472285392236, + "grad_norm": 1.4099137017458634, + "learning_rate": 3.831999401043618e-08, + "loss": 0.6338, + "step": 73160 + }, + { + "epoch": 2.8938677846111256, + "grad_norm": 1.4691497602730943, + "learning_rate": 3.803618828005229e-08, + "loss": 0.6788, + "step": 73170 + }, + { + "epoch": 2.894263283830014, + "grad_norm": 1.4353341115967935, + "learning_rate": 3.775343340859494e-08, + "loss": 0.6394, + "step": 73180 + }, + { + "epoch": 2.8946587830489037, + "grad_norm": 1.3132876924754062, + "learning_rate": 3.747172945594568e-08, + "loss": 0.6471, + "step": 73190 + }, + { + "epoch": 2.8950542822677923, + "grad_norm": 1.4791646400255996, + "learning_rate": 3.7191076481765675e-08, + "loss": 0.6392, + "step": 73200 + }, + { + "epoch": 2.8954497814866818, + "grad_norm": 1.4725139616173228, + "learning_rate": 3.691147454549127e-08, + "loss": 0.5941, + "step": 73210 + }, + { + "epoch": 2.8958452807055703, + "grad_norm": 1.491512605359283, + "learning_rate": 3.663292370633842e-08, + "loss": 0.6715, + "step": 73220 + }, + { + "epoch": 2.89624077992446, + "grad_norm": 1.4748372298308459, + "learning_rate": 3.635542402329883e-08, + "loss": 0.6519, + "step": 73230 + }, + { + "epoch": 2.8966362791433484, + "grad_norm": 1.6510368319464077, + "learning_rate": 3.6078975555142724e-08, + "loss": 0.6286, + "step": 73240 + }, + { + "epoch": 2.897031778362238, + "grad_norm": 1.3607040530014676, + "learning_rate": 3.5803578360416594e-08, + "loss": 0.6335, + "step": 73250 + }, + { + "epoch": 2.8974272775811265, + "grad_norm": 1.4017562980589908, + "learning_rate": 3.552923249744489e-08, + "loss": 0.6145, + "step": 73260 + }, + { + "epoch": 2.897822776800016, + "grad_norm": 1.3808757950575428, + "learning_rate": 3.5255938024329475e-08, + "loss": 0.6228, + "step": 73270 + }, + { + "epoch": 2.898218276018905, + "grad_norm": 1.1678762392477666, + "learning_rate": 3.498369499894905e-08, + "loss": 0.634, + "step": 73280 + }, + { + "epoch": 2.898613775237794, + "grad_norm": 1.4261898522473708, + "learning_rate": 3.4712503478960827e-08, + "loss": 0.6502, + "step": 73290 + }, + { + "epoch": 2.899009274456683, + "grad_norm": 1.4858853493301873, + "learning_rate": 3.444236352179831e-08, + "loss": 0.6359, + "step": 73300 + }, + { + "epoch": 2.899404773675572, + "grad_norm": 1.3449930880817293, + "learning_rate": 3.4173275184672396e-08, + "loss": 0.6295, + "step": 73310 + }, + { + "epoch": 2.899800272894461, + "grad_norm": 1.6060015811468955, + "learning_rate": 3.390523852457084e-08, + "loss": 0.5993, + "step": 73320 + }, + { + "epoch": 2.9001957721133502, + "grad_norm": 1.2430674200860625, + "learning_rate": 3.36382535982599e-08, + "loss": 0.6261, + "step": 73330 + }, + { + "epoch": 2.9005912713322393, + "grad_norm": 1.4579801675869906, + "learning_rate": 3.337232046228211e-08, + "loss": 0.65, + "step": 73340 + }, + { + "epoch": 2.9009867705511283, + "grad_norm": 1.3857771060602695, + "learning_rate": 3.3107439172958e-08, + "loss": 0.6449, + "step": 73350 + }, + { + "epoch": 2.9013822697700173, + "grad_norm": 1.5374007586473863, + "learning_rate": 3.284360978638379e-08, + "loss": 0.6731, + "step": 73360 + }, + { + "epoch": 2.9017777689889064, + "grad_norm": 1.3020068730036634, + "learning_rate": 3.25808323584359e-08, + "loss": 0.6775, + "step": 73370 + }, + { + "epoch": 2.9021732682077954, + "grad_norm": 1.4269098238545106, + "learning_rate": 3.23191069447637e-08, + "loss": 0.6328, + "step": 73380 + }, + { + "epoch": 2.9025687674266845, + "grad_norm": 1.6622459834457164, + "learning_rate": 3.205843360079841e-08, + "loss": 0.6536, + "step": 73390 + }, + { + "epoch": 2.9029642666455735, + "grad_norm": 1.7766758127097282, + "learning_rate": 3.179881238174476e-08, + "loss": 0.644, + "step": 73400 + }, + { + "epoch": 2.9033597658644625, + "grad_norm": 1.5757928447311629, + "learning_rate": 3.154024334258654e-08, + "loss": 0.6316, + "step": 73410 + }, + { + "epoch": 2.9037552650833516, + "grad_norm": 1.3925146576999683, + "learning_rate": 3.128272653808384e-08, + "loss": 0.6447, + "step": 73420 + }, + { + "epoch": 2.9041507643022406, + "grad_norm": 1.7114550606358028, + "learning_rate": 3.10262620227747e-08, + "loss": 0.6464, + "step": 73430 + }, + { + "epoch": 2.9045462635211297, + "grad_norm": 1.3473978752412272, + "learning_rate": 3.0770849850974006e-08, + "loss": 0.6417, + "step": 73440 + }, + { + "epoch": 2.9049417627400187, + "grad_norm": 1.2790154087281085, + "learning_rate": 3.051649007677404e-08, + "loss": 0.6779, + "step": 73450 + }, + { + "epoch": 2.9053372619589077, + "grad_norm": 1.364313043353785, + "learning_rate": 3.026318275404283e-08, + "loss": 0.6609, + "step": 73460 + }, + { + "epoch": 2.9057327611777968, + "grad_norm": 1.9577798360631313, + "learning_rate": 3.00109279364269e-08, + "loss": 0.6426, + "step": 73470 + }, + { + "epoch": 2.906128260396686, + "grad_norm": 1.2865122416197172, + "learning_rate": 2.9759725677349638e-08, + "loss": 0.6513, + "step": 73480 + }, + { + "epoch": 2.906523759615575, + "grad_norm": 1.4803393798680207, + "learning_rate": 2.9509576030012367e-08, + "loss": 0.635, + "step": 73490 + }, + { + "epoch": 2.906919258834464, + "grad_norm": 1.5203096024610656, + "learning_rate": 2.926047904739049e-08, + "loss": 0.6062, + "step": 73500 + }, + { + "epoch": 2.907314758053353, + "grad_norm": 1.6943166581873972, + "learning_rate": 2.9012434782239584e-08, + "loss": 0.6594, + "step": 73510 + }, + { + "epoch": 2.907710257272242, + "grad_norm": 1.2709935165090178, + "learning_rate": 2.8765443287091523e-08, + "loss": 0.663, + "step": 73520 + }, + { + "epoch": 2.908105756491131, + "grad_norm": 1.5260170312306132, + "learning_rate": 2.85195046142539e-08, + "loss": 0.6576, + "step": 73530 + }, + { + "epoch": 2.90850125571002, + "grad_norm": 1.5330157772928157, + "learning_rate": 2.827461881581339e-08, + "loss": 0.6364, + "step": 73540 + }, + { + "epoch": 2.908896754928909, + "grad_norm": 1.9527984201422075, + "learning_rate": 2.8030785943631843e-08, + "loss": 0.6421, + "step": 73550 + }, + { + "epoch": 2.909292254147798, + "grad_norm": 1.5179959299978178, + "learning_rate": 2.7788006049349058e-08, + "loss": 0.6559, + "step": 73560 + }, + { + "epoch": 2.909687753366687, + "grad_norm": 1.3658604850621558, + "learning_rate": 2.7546279184381686e-08, + "loss": 0.6378, + "step": 73570 + }, + { + "epoch": 2.910083252585576, + "grad_norm": 1.5388832562587558, + "learning_rate": 2.7305605399923773e-08, + "loss": 0.6458, + "step": 73580 + }, + { + "epoch": 2.9104787518044652, + "grad_norm": 1.6183400670321177, + "learning_rate": 2.706598474694455e-08, + "loss": 0.6413, + "step": 73590 + }, + { + "epoch": 2.9108742510233543, + "grad_norm": 1.5524208145611658, + "learning_rate": 2.6827417276193423e-08, + "loss": 0.6582, + "step": 73600 + }, + { + "epoch": 2.9112697502422433, + "grad_norm": 1.4780749074187067, + "learning_rate": 2.658990303819331e-08, + "loss": 0.6193, + "step": 73610 + }, + { + "epoch": 2.9116652494611324, + "grad_norm": 1.4384321617070752, + "learning_rate": 2.6353442083247304e-08, + "loss": 0.6314, + "step": 73620 + }, + { + "epoch": 2.9120607486800214, + "grad_norm": 1.3769430558478037, + "learning_rate": 2.6118034461432018e-08, + "loss": 0.6668, + "step": 73630 + }, + { + "epoch": 2.9124562478989104, + "grad_norm": 1.6739059508543381, + "learning_rate": 2.588368022260368e-08, + "loss": 0.6246, + "step": 73640 + }, + { + "epoch": 2.9128517471177995, + "grad_norm": 1.3058256254216958, + "learning_rate": 2.5650379416394256e-08, + "loss": 0.6747, + "step": 73650 + }, + { + "epoch": 2.9132472463366885, + "grad_norm": 1.495369401888642, + "learning_rate": 2.541813209221311e-08, + "loss": 0.6516, + "step": 73660 + }, + { + "epoch": 2.9136427455555776, + "grad_norm": 1.6181070932766162, + "learning_rate": 2.5186938299246456e-08, + "loss": 0.6313, + "step": 73670 + }, + { + "epoch": 2.9140382447744666, + "grad_norm": 1.376908189963363, + "learning_rate": 2.4956798086456234e-08, + "loss": 0.6679, + "step": 73680 + }, + { + "epoch": 2.9144337439933556, + "grad_norm": 1.5525881040933487, + "learning_rate": 2.4727711502582908e-08, + "loss": 0.6403, + "step": 73690 + }, + { + "epoch": 2.9148292432122447, + "grad_norm": 1.6413504376016281, + "learning_rate": 2.449967859614322e-08, + "loss": 0.6577, + "step": 73700 + }, + { + "epoch": 2.9152247424311337, + "grad_norm": 1.4669545664862746, + "learning_rate": 2.4272699415430202e-08, + "loss": 0.6331, + "step": 73710 + }, + { + "epoch": 2.9156202416500228, + "grad_norm": 1.4165551219656751, + "learning_rate": 2.404677400851485e-08, + "loss": 0.6641, + "step": 73720 + }, + { + "epoch": 2.916015740868912, + "grad_norm": 1.33591608303022, + "learning_rate": 2.3821902423243337e-08, + "loss": 0.617, + "step": 73730 + }, + { + "epoch": 2.916411240087801, + "grad_norm": 1.670073083005954, + "learning_rate": 2.3598084707240344e-08, + "loss": 0.6495, + "step": 73740 + }, + { + "epoch": 2.91680673930669, + "grad_norm": 1.336096388620189, + "learning_rate": 2.337532090790573e-08, + "loss": 0.6681, + "step": 73750 + }, + { + "epoch": 2.917202238525579, + "grad_norm": 1.3226340600280069, + "learning_rate": 2.3153611072418424e-08, + "loss": 0.6089, + "step": 73760 + }, + { + "epoch": 2.917597737744468, + "grad_norm": 1.4415691713027183, + "learning_rate": 2.2932955247731425e-08, + "loss": 0.6668, + "step": 73770 + }, + { + "epoch": 2.917993236963357, + "grad_norm": 1.393681057459904, + "learning_rate": 2.2713353480576795e-08, + "loss": 0.671, + "step": 73780 + }, + { + "epoch": 2.918388736182246, + "grad_norm": 1.3608468874762747, + "learning_rate": 2.2494805817461774e-08, + "loss": 0.6759, + "step": 73790 + }, + { + "epoch": 2.918784235401135, + "grad_norm": 1.5722304904607298, + "learning_rate": 2.2277312304671562e-08, + "loss": 0.6968, + "step": 73800 + }, + { + "epoch": 2.919179734620024, + "grad_norm": 1.2158107809001115, + "learning_rate": 2.206087298826709e-08, + "loss": 0.6801, + "step": 73810 + }, + { + "epoch": 2.919575233838913, + "grad_norm": 1.6602405277369507, + "learning_rate": 2.184548791408725e-08, + "loss": 0.6367, + "step": 73820 + }, + { + "epoch": 2.919970733057802, + "grad_norm": 1.5138865023208754, + "learning_rate": 2.1631157127746105e-08, + "loss": 0.6401, + "step": 73830 + }, + { + "epoch": 2.9203662322766912, + "grad_norm": 1.9377118630001589, + "learning_rate": 2.141788067463513e-08, + "loss": 0.6755, + "step": 73840 + }, + { + "epoch": 2.9207617314955803, + "grad_norm": 1.4682798791002638, + "learning_rate": 2.1205658599923183e-08, + "loss": 0.6593, + "step": 73850 + }, + { + "epoch": 2.9211572307144693, + "grad_norm": 1.35992177545572, + "learning_rate": 2.0994490948555434e-08, + "loss": 0.6743, + "step": 73860 + }, + { + "epoch": 2.9215527299333583, + "grad_norm": 1.505064744193168, + "learning_rate": 2.0784377765253326e-08, + "loss": 0.6452, + "step": 73870 + }, + { + "epoch": 2.9219482291522474, + "grad_norm": 1.5116412220022382, + "learning_rate": 2.05753190945146e-08, + "loss": 0.6532, + "step": 73880 + }, + { + "epoch": 2.9223437283711364, + "grad_norm": 1.7212297632257676, + "learning_rate": 2.0367314980615506e-08, + "loss": 0.6303, + "step": 73890 + }, + { + "epoch": 2.9227392275900255, + "grad_norm": 1.727146250896952, + "learning_rate": 2.016036546760636e-08, + "loss": 0.6479, + "step": 73900 + }, + { + "epoch": 2.9231347268089145, + "grad_norm": 1.3123577942848836, + "learning_rate": 1.9954470599316546e-08, + "loss": 0.6127, + "step": 73910 + }, + { + "epoch": 2.9235302260278035, + "grad_norm": 1.4770600814224353, + "learning_rate": 1.974963041935063e-08, + "loss": 0.6071, + "step": 73920 + }, + { + "epoch": 2.9239257252466926, + "grad_norm": 1.310967421526731, + "learning_rate": 1.954584497109058e-08, + "loss": 0.635, + "step": 73930 + }, + { + "epoch": 2.9243212244655816, + "grad_norm": 1.2443510062765006, + "learning_rate": 1.9343114297694643e-08, + "loss": 0.6548, + "step": 73940 + }, + { + "epoch": 2.9247167236844707, + "grad_norm": 1.387982223017377, + "learning_rate": 1.9141438442097372e-08, + "loss": 0.6238, + "step": 73950 + }, + { + "epoch": 2.9251122229033597, + "grad_norm": 1.6110149774713163, + "learning_rate": 1.8940817447010708e-08, + "loss": 0.6349, + "step": 73960 + }, + { + "epoch": 2.9255077221222487, + "grad_norm": 1.5807481061662034, + "learning_rate": 1.8741251354921773e-08, + "loss": 0.6284, + "step": 73970 + }, + { + "epoch": 2.925903221341138, + "grad_norm": 1.4516527096014262, + "learning_rate": 1.8542740208096764e-08, + "loss": 0.643, + "step": 73980 + }, + { + "epoch": 2.926298720560027, + "grad_norm": 1.582427128361681, + "learning_rate": 1.8345284048575385e-08, + "loss": 0.6468, + "step": 73990 + }, + { + "epoch": 2.926694219778916, + "grad_norm": 1.5340072458361254, + "learning_rate": 1.8148882918176404e-08, + "loss": 0.5876, + "step": 74000 + }, + { + "epoch": 2.927089718997805, + "grad_norm": 1.2939608633572102, + "learning_rate": 1.7953536858494326e-08, + "loss": 0.6332, + "step": 74010 + }, + { + "epoch": 2.927485218216694, + "grad_norm": 1.213407587319952, + "learning_rate": 1.7759245910899393e-08, + "loss": 0.665, + "step": 74020 + }, + { + "epoch": 2.927880717435583, + "grad_norm": 1.2327507984166377, + "learning_rate": 1.75660101165398e-08, + "loss": 0.6047, + "step": 74030 + }, + { + "epoch": 2.928276216654472, + "grad_norm": 1.8149716481421807, + "learning_rate": 1.7373829516338926e-08, + "loss": 0.6508, + "step": 74040 + }, + { + "epoch": 2.928671715873361, + "grad_norm": 1.4705837957290206, + "learning_rate": 1.7182704150998096e-08, + "loss": 0.6238, + "step": 74050 + }, + { + "epoch": 2.92906721509225, + "grad_norm": 1.6270558558308765, + "learning_rate": 1.6992634060993828e-08, + "loss": 0.6436, + "step": 74060 + }, + { + "epoch": 2.929462714311139, + "grad_norm": 1.414701533571263, + "learning_rate": 1.6803619286579477e-08, + "loss": 0.657, + "step": 74070 + }, + { + "epoch": 2.929858213530028, + "grad_norm": 1.7333727238938672, + "learning_rate": 1.6615659867785792e-08, + "loss": 0.6392, + "step": 74080 + }, + { + "epoch": 2.930253712748917, + "grad_norm": 1.4597776683518446, + "learning_rate": 1.642875584441872e-08, + "loss": 0.6515, + "step": 74090 + }, + { + "epoch": 2.9306492119678063, + "grad_norm": 1.3049665753359068, + "learning_rate": 1.6242907256062145e-08, + "loss": 0.6263, + "step": 74100 + }, + { + "epoch": 2.9310447111866953, + "grad_norm": 1.4476140312771326, + "learning_rate": 1.6058114142075143e-08, + "loss": 0.6488, + "step": 74110 + }, + { + "epoch": 2.9314402104055843, + "grad_norm": 1.5985985226428492, + "learning_rate": 1.587437654159363e-08, + "loss": 0.6448, + "step": 74120 + }, + { + "epoch": 2.9318357096244734, + "grad_norm": 1.55991870069027, + "learning_rate": 1.569169449352981e-08, + "loss": 0.6202, + "step": 74130 + }, + { + "epoch": 2.9322312088433624, + "grad_norm": 1.4722672469390565, + "learning_rate": 1.5510068036573288e-08, + "loss": 0.6294, + "step": 74140 + }, + { + "epoch": 2.9326267080622515, + "grad_norm": 1.5009381866707456, + "learning_rate": 1.532949720918886e-08, + "loss": 0.664, + "step": 74150 + }, + { + "epoch": 2.9330222072811405, + "grad_norm": 1.4589707556727036, + "learning_rate": 1.514998204961926e-08, + "loss": 0.6611, + "step": 74160 + }, + { + "epoch": 2.9334177065000295, + "grad_norm": 1.7684706653463056, + "learning_rate": 1.4971522595881306e-08, + "loss": 0.6038, + "step": 74170 + }, + { + "epoch": 2.9338132057189186, + "grad_norm": 1.589191622086741, + "learning_rate": 1.4794118885770869e-08, + "loss": 0.6226, + "step": 74180 + }, + { + "epoch": 2.9342087049378076, + "grad_norm": 1.5341608678916385, + "learning_rate": 1.4617770956858457e-08, + "loss": 0.6049, + "step": 74190 + }, + { + "epoch": 2.9346042041566966, + "grad_norm": 1.4119029298173666, + "learning_rate": 1.4442478846491415e-08, + "loss": 0.6299, + "step": 74200 + }, + { + "epoch": 2.9349997033755857, + "grad_norm": 1.4271520548881262, + "learning_rate": 1.426824259179449e-08, + "loss": 0.6512, + "step": 74210 + }, + { + "epoch": 2.9353952025944747, + "grad_norm": 1.6523780942766524, + "learning_rate": 1.40950622296665e-08, + "loss": 0.6275, + "step": 74220 + }, + { + "epoch": 2.9357907018133638, + "grad_norm": 1.4628095509263386, + "learning_rate": 1.392293779678533e-08, + "loss": 0.6738, + "step": 74230 + }, + { + "epoch": 2.936186201032253, + "grad_norm": 1.2881537490943038, + "learning_rate": 1.3751869329603485e-08, + "loss": 0.6493, + "step": 74240 + }, + { + "epoch": 2.936581700251142, + "grad_norm": 1.7929240965417017, + "learning_rate": 1.3581856864350318e-08, + "loss": 0.6521, + "step": 74250 + }, + { + "epoch": 2.936977199470031, + "grad_norm": 1.2842700725791327, + "learning_rate": 1.3412900437031474e-08, + "loss": 0.6903, + "step": 74260 + }, + { + "epoch": 2.93737269868892, + "grad_norm": 1.6397111809128573, + "learning_rate": 1.3245000083429437e-08, + "loss": 0.5999, + "step": 74270 + }, + { + "epoch": 2.9377681979078094, + "grad_norm": 1.4045967808228152, + "learning_rate": 1.3078155839101881e-08, + "loss": 0.639, + "step": 74280 + }, + { + "epoch": 2.938163697126698, + "grad_norm": 1.7004426576513894, + "learning_rate": 1.2912367739384425e-08, + "loss": 0.6115, + "step": 74290 + }, + { + "epoch": 2.9385591963455875, + "grad_norm": 1.559438811440199, + "learning_rate": 1.2747635819387872e-08, + "loss": 0.6443, + "step": 74300 + }, + { + "epoch": 2.938954695564476, + "grad_norm": 1.312465664906808, + "learning_rate": 1.2583960113999316e-08, + "loss": 0.6887, + "step": 74310 + }, + { + "epoch": 2.9393501947833656, + "grad_norm": 1.2728121325403894, + "learning_rate": 1.2421340657882142e-08, + "loss": 0.6453, + "step": 74320 + }, + { + "epoch": 2.939745694002254, + "grad_norm": 1.7718362817399798, + "learning_rate": 1.2259777485477687e-08, + "loss": 0.6556, + "step": 74330 + }, + { + "epoch": 2.9401411932211436, + "grad_norm": 1.1950781798632801, + "learning_rate": 1.2099270631000804e-08, + "loss": 0.6631, + "step": 74340 + }, + { + "epoch": 2.9405366924400322, + "grad_norm": 1.4756367622417887, + "learning_rate": 1.1939820128445411e-08, + "loss": 0.6296, + "step": 74350 + }, + { + "epoch": 2.9409321916589217, + "grad_norm": 1.2909865140881334, + "learning_rate": 1.17814260115795e-08, + "loss": 0.6505, + "step": 74360 + }, + { + "epoch": 2.9413276908778103, + "grad_norm": 1.5841153344593966, + "learning_rate": 1.1624088313948456e-08, + "loss": 0.6488, + "step": 74370 + }, + { + "epoch": 2.9417231900967, + "grad_norm": 1.5552836635504748, + "learning_rate": 1.1467807068873404e-08, + "loss": 0.6152, + "step": 74380 + }, + { + "epoch": 2.9421186893155884, + "grad_norm": 1.4063102864209973, + "learning_rate": 1.1312582309452313e-08, + "loss": 0.6318, + "step": 74390 + }, + { + "epoch": 2.942514188534478, + "grad_norm": 1.795580376106693, + "learning_rate": 1.1158414068559442e-08, + "loss": 0.6172, + "step": 74400 + }, + { + "epoch": 2.9429096877533665, + "grad_norm": 1.3568480648088053, + "learning_rate": 1.100530237884423e-08, + "loss": 0.6983, + "step": 74410 + }, + { + "epoch": 2.943305186972256, + "grad_norm": 1.5291784230981096, + "learning_rate": 1.0853247272734068e-08, + "loss": 0.6517, + "step": 74420 + }, + { + "epoch": 2.9437006861911446, + "grad_norm": 1.2841925560594245, + "learning_rate": 1.070224878243098e-08, + "loss": 0.6613, + "step": 74430 + }, + { + "epoch": 2.944096185410034, + "grad_norm": 1.6613901676953224, + "learning_rate": 1.055230693991438e-08, + "loss": 0.6465, + "step": 74440 + }, + { + "epoch": 2.9444916846289226, + "grad_norm": 1.4687027127998962, + "learning_rate": 1.0403421776938316e-08, + "loss": 0.615, + "step": 74450 + }, + { + "epoch": 2.944887183847812, + "grad_norm": 1.4900023781217484, + "learning_rate": 1.025559332503534e-08, + "loss": 0.6412, + "step": 74460 + }, + { + "epoch": 2.9452826830667007, + "grad_norm": 1.6153747907270886, + "learning_rate": 1.0108821615512077e-08, + "loss": 0.6605, + "step": 74470 + }, + { + "epoch": 2.94567818228559, + "grad_norm": 1.6008286218990273, + "learning_rate": 9.96310667945255e-09, + "loss": 0.6776, + "step": 74480 + }, + { + "epoch": 2.946073681504479, + "grad_norm": 1.9284491025303494, + "learning_rate": 9.818448547717075e-09, + "loss": 0.6389, + "step": 74490 + }, + { + "epoch": 2.9464691807233683, + "grad_norm": 1.576719591308475, + "learning_rate": 9.6748472509417e-09, + "loss": 0.6452, + "step": 74500 + }, + { + "epoch": 2.946864679942257, + "grad_norm": 1.441596405376397, + "learning_rate": 9.532302819538209e-09, + "loss": 0.6607, + "step": 74510 + }, + { + "epoch": 2.9472601791611464, + "grad_norm": 1.5486034709408758, + "learning_rate": 9.390815283695231e-09, + "loss": 0.6556, + "step": 74520 + }, + { + "epoch": 2.947655678380035, + "grad_norm": 1.5559701590231496, + "learning_rate": 9.250384673377689e-09, + "loss": 0.6193, + "step": 74530 + }, + { + "epoch": 2.9480511775989244, + "grad_norm": 1.4564111923399143, + "learning_rate": 9.111011018326233e-09, + "loss": 0.6347, + "step": 74540 + }, + { + "epoch": 2.948446676817813, + "grad_norm": 1.4769508606939148, + "learning_rate": 8.972694348057254e-09, + "loss": 0.6745, + "step": 74550 + }, + { + "epoch": 2.9488421760367025, + "grad_norm": 1.4511134355689703, + "learning_rate": 8.835434691865096e-09, + "loss": 0.6667, + "step": 74560 + }, + { + "epoch": 2.949237675255591, + "grad_norm": 1.3836006836493282, + "learning_rate": 8.69923207881762e-09, + "loss": 0.6416, + "step": 74570 + }, + { + "epoch": 2.9496331744744806, + "grad_norm": 1.5024419916298608, + "learning_rate": 8.56408653776175e-09, + "loss": 0.6266, + "step": 74580 + }, + { + "epoch": 2.950028673693369, + "grad_norm": 1.8031606331327024, + "learning_rate": 8.429998097317372e-09, + "loss": 0.6392, + "step": 74590 + }, + { + "epoch": 2.9504241729122587, + "grad_norm": 1.8460586093353146, + "learning_rate": 8.29696678588343e-09, + "loss": 0.6407, + "step": 74600 + }, + { + "epoch": 2.9508196721311473, + "grad_norm": 1.3365413639143147, + "learning_rate": 8.164992631632396e-09, + "loss": 0.6554, + "step": 74610 + }, + { + "epoch": 2.9512151713500367, + "grad_norm": 1.4961993912429425, + "learning_rate": 8.034075662515795e-09, + "loss": 0.6483, + "step": 74620 + }, + { + "epoch": 2.951610670568926, + "grad_norm": 1.5933648065697557, + "learning_rate": 7.904215906258116e-09, + "loss": 0.6552, + "step": 74630 + }, + { + "epoch": 2.952006169787815, + "grad_norm": 1.3783989761197568, + "learning_rate": 7.775413390361809e-09, + "loss": 0.6548, + "step": 74640 + }, + { + "epoch": 2.952401669006704, + "grad_norm": 1.3570911850653213, + "learning_rate": 7.647668142105603e-09, + "loss": 0.6562, + "step": 74650 + }, + { + "epoch": 2.952797168225593, + "grad_norm": 1.5168896381561028, + "learning_rate": 7.520980188542859e-09, + "loss": 0.6356, + "step": 74660 + }, + { + "epoch": 2.953192667444482, + "grad_norm": 1.2579265897777971, + "learning_rate": 7.395349556504894e-09, + "loss": 0.6326, + "step": 74670 + }, + { + "epoch": 2.953588166663371, + "grad_norm": 1.2619313307887843, + "learning_rate": 7.270776272597646e-09, + "loss": 0.6718, + "step": 74680 + }, + { + "epoch": 2.95398366588226, + "grad_norm": 1.3164945555597718, + "learning_rate": 7.147260363203346e-09, + "loss": 0.6137, + "step": 74690 + }, + { + "epoch": 2.954379165101149, + "grad_norm": 1.6049882415247216, + "learning_rate": 7.024801854481067e-09, + "loss": 0.6212, + "step": 74700 + }, + { + "epoch": 2.954774664320038, + "grad_norm": 1.3623941746428814, + "learning_rate": 6.9034007723645105e-09, + "loss": 0.6644, + "step": 74710 + }, + { + "epoch": 2.955170163538927, + "grad_norm": 1.651786609129843, + "learning_rate": 6.783057142565885e-09, + "loss": 0.6268, + "step": 74720 + }, + { + "epoch": 2.955565662757816, + "grad_norm": 1.6124752174678787, + "learning_rate": 6.663770990570362e-09, + "loss": 0.6267, + "step": 74730 + }, + { + "epoch": 2.955961161976705, + "grad_norm": 1.6464893507930403, + "learning_rate": 6.5455423416416195e-09, + "loss": 0.6388, + "step": 74740 + }, + { + "epoch": 2.9563566611955943, + "grad_norm": 1.522469868820525, + "learning_rate": 6.428371220818519e-09, + "loss": 0.6152, + "step": 74750 + }, + { + "epoch": 2.9567521604144833, + "grad_norm": 1.478830126027556, + "learning_rate": 6.312257652915099e-09, + "loss": 0.6257, + "step": 74760 + }, + { + "epoch": 2.9571476596333723, + "grad_norm": 1.5430585828201524, + "learning_rate": 6.1972016625228e-09, + "loss": 0.6421, + "step": 74770 + }, + { + "epoch": 2.9575431588522614, + "grad_norm": 1.3594554689223473, + "learning_rate": 6.083203274008242e-09, + "loss": 0.634, + "step": 74780 + }, + { + "epoch": 2.9579386580711504, + "grad_norm": 1.7739712528195106, + "learning_rate": 5.97026251151489e-09, + "loss": 0.6465, + "step": 74790 + }, + { + "epoch": 2.9583341572900395, + "grad_norm": 1.4447911825052047, + "learning_rate": 5.858379398960834e-09, + "loss": 0.6489, + "step": 74800 + }, + { + "epoch": 2.9587296565089285, + "grad_norm": 1.655079209568356, + "learning_rate": 5.747553960042118e-09, + "loss": 0.6361, + "step": 74810 + }, + { + "epoch": 2.9591251557278175, + "grad_norm": 1.6122534214013735, + "learning_rate": 5.637786218228303e-09, + "loss": 0.642, + "step": 74820 + }, + { + "epoch": 2.9595206549467066, + "grad_norm": 1.391384922418022, + "learning_rate": 5.529076196767458e-09, + "loss": 0.6222, + "step": 74830 + }, + { + "epoch": 2.9599161541655956, + "grad_norm": 1.3691674914862355, + "learning_rate": 5.421423918681723e-09, + "loss": 0.6322, + "step": 74840 + }, + { + "epoch": 2.9603116533844847, + "grad_norm": 1.7584409136596035, + "learning_rate": 5.314829406770639e-09, + "loss": 0.6701, + "step": 74850 + }, + { + "epoch": 2.9607071526033737, + "grad_norm": 1.6209984492877785, + "learning_rate": 5.209292683608924e-09, + "loss": 0.6302, + "step": 74860 + }, + { + "epoch": 2.9611026518222627, + "grad_norm": 1.5431252051398947, + "learning_rate": 5.1048137715470345e-09, + "loss": 0.6344, + "step": 74870 + }, + { + "epoch": 2.9614981510411518, + "grad_norm": 1.6825723290395584, + "learning_rate": 5.001392692711715e-09, + "loss": 0.6385, + "step": 74880 + }, + { + "epoch": 2.961893650260041, + "grad_norm": 1.4147061765580498, + "learning_rate": 4.899029469006556e-09, + "loss": 0.6649, + "step": 74890 + }, + { + "epoch": 2.96228914947893, + "grad_norm": 1.2492783765009212, + "learning_rate": 4.797724122110325e-09, + "loss": 0.6459, + "step": 74900 + }, + { + "epoch": 2.962684648697819, + "grad_norm": 1.688553028521607, + "learning_rate": 4.697476673476975e-09, + "loss": 0.637, + "step": 74910 + }, + { + "epoch": 2.963080147916708, + "grad_norm": 1.431511383458665, + "learning_rate": 4.598287144337299e-09, + "loss": 0.6447, + "step": 74920 + }, + { + "epoch": 2.963475647135597, + "grad_norm": 1.4545510572725713, + "learning_rate": 4.5001555556983824e-09, + "loss": 0.6129, + "step": 74930 + }, + { + "epoch": 2.963871146354486, + "grad_norm": 1.6186478053074254, + "learning_rate": 4.4030819283430445e-09, + "loss": 0.628, + "step": 74940 + }, + { + "epoch": 2.964266645573375, + "grad_norm": 1.6302149152057273, + "learning_rate": 4.307066282829286e-09, + "loss": 0.6194, + "step": 74950 + }, + { + "epoch": 2.964662144792264, + "grad_norm": 1.4780535461559339, + "learning_rate": 4.212108639491952e-09, + "loss": 0.6521, + "step": 74960 + }, + { + "epoch": 2.965057644011153, + "grad_norm": 1.2493366364575227, + "learning_rate": 4.118209018440511e-09, + "loss": 0.652, + "step": 74970 + }, + { + "epoch": 2.965453143230042, + "grad_norm": 1.2589958731347763, + "learning_rate": 4.025367439562944e-09, + "loss": 0.6955, + "step": 74980 + }, + { + "epoch": 2.965848642448931, + "grad_norm": 1.5853134337464636, + "learning_rate": 3.933583922519635e-09, + "loss": 0.6011, + "step": 74990 + }, + { + "epoch": 2.9662441416678202, + "grad_norm": 1.2605075781945128, + "learning_rate": 3.842858486750034e-09, + "loss": 0.6712, + "step": 75000 + }, + { + "epoch": 2.9666396408867093, + "grad_norm": 1.4068048415604375, + "learning_rate": 3.753191151468216e-09, + "loss": 0.6493, + "step": 75010 + }, + { + "epoch": 2.9670351401055983, + "grad_norm": 1.4678818460843905, + "learning_rate": 3.664581935663436e-09, + "loss": 0.628, + "step": 75020 + }, + { + "epoch": 2.9674306393244874, + "grad_norm": 1.4243209499110554, + "learning_rate": 3.577030858102348e-09, + "loss": 0.6281, + "step": 75030 + }, + { + "epoch": 2.9678261385433764, + "grad_norm": 1.4597063660944907, + "learning_rate": 3.4905379373262327e-09, + "loss": 0.6294, + "step": 75040 + }, + { + "epoch": 2.9682216377622654, + "grad_norm": 1.5420936459573134, + "learning_rate": 3.405103191653214e-09, + "loss": 0.6207, + "step": 75050 + }, + { + "epoch": 2.9686171369811545, + "grad_norm": 1.5678044545490732, + "learning_rate": 3.320726639176597e-09, + "loss": 0.6169, + "step": 75060 + }, + { + "epoch": 2.9690126362000435, + "grad_norm": 1.5837276688561837, + "learning_rate": 3.237408297766531e-09, + "loss": 0.6521, + "step": 75070 + }, + { + "epoch": 2.9694081354189326, + "grad_norm": 1.4567197823453915, + "learning_rate": 3.155148185067236e-09, + "loss": 0.6379, + "step": 75080 + }, + { + "epoch": 2.9698036346378216, + "grad_norm": 1.4121875262162478, + "learning_rate": 3.0739463185008868e-09, + "loss": 0.6278, + "step": 75090 + }, + { + "epoch": 2.9701991338567106, + "grad_norm": 1.2437593009784589, + "learning_rate": 2.9938027152642825e-09, + "loss": 0.6396, + "step": 75100 + }, + { + "epoch": 2.9705946330755997, + "grad_norm": 1.2660020573484911, + "learning_rate": 2.914717392331068e-09, + "loss": 0.6762, + "step": 75110 + }, + { + "epoch": 2.9709901322944887, + "grad_norm": 1.711535282925223, + "learning_rate": 2.8366903664495128e-09, + "loss": 0.6431, + "step": 75120 + }, + { + "epoch": 2.9713856315133778, + "grad_norm": 1.5909848871876575, + "learning_rate": 2.7597216541441763e-09, + "loss": 0.6292, + "step": 75130 + }, + { + "epoch": 2.971781130732267, + "grad_norm": 1.4385085756643556, + "learning_rate": 2.683811271716463e-09, + "loss": 0.6577, + "step": 75140 + }, + { + "epoch": 2.972176629951156, + "grad_norm": 1.32032383793393, + "learning_rate": 2.6089592352424033e-09, + "loss": 0.6148, + "step": 75150 + }, + { + "epoch": 2.972572129170045, + "grad_norm": 1.741420476121603, + "learning_rate": 2.5351655605748704e-09, + "loss": 0.6309, + "step": 75160 + }, + { + "epoch": 2.972967628388934, + "grad_norm": 1.415553982775396, + "learning_rate": 2.4624302633413643e-09, + "loss": 0.6536, + "step": 75170 + }, + { + "epoch": 2.973363127607823, + "grad_norm": 1.3981544815873328, + "learning_rate": 2.3907533589467848e-09, + "loss": 0.6333, + "step": 75180 + }, + { + "epoch": 2.973758626826712, + "grad_norm": 1.5398290473941099, + "learning_rate": 2.3201348625701003e-09, + "loss": 0.5774, + "step": 75190 + }, + { + "epoch": 2.974154126045601, + "grad_norm": 1.4157250252287614, + "learning_rate": 2.250574789167681e-09, + "loss": 0.6457, + "step": 75200 + }, + { + "epoch": 2.97454962526449, + "grad_norm": 1.6423073403184665, + "learning_rate": 2.182073153471631e-09, + "loss": 0.6101, + "step": 75210 + }, + { + "epoch": 2.974945124483379, + "grad_norm": 1.396346183045812, + "learning_rate": 2.1146299699886795e-09, + "loss": 0.632, + "step": 75220 + }, + { + "epoch": 2.975340623702268, + "grad_norm": 1.5078811731332071, + "learning_rate": 2.048245253002401e-09, + "loss": 0.6378, + "step": 75230 + }, + { + "epoch": 2.975736122921157, + "grad_norm": 1.25381496806973, + "learning_rate": 1.9829190165721046e-09, + "loss": 0.6697, + "step": 75240 + }, + { + "epoch": 2.9761316221400462, + "grad_norm": 1.7940583923776858, + "learning_rate": 1.9186512745322796e-09, + "loss": 0.6053, + "step": 75250 + }, + { + "epoch": 2.9765271213589353, + "grad_norm": 1.6043019537142902, + "learning_rate": 1.8554420404942596e-09, + "loss": 0.6436, + "step": 75260 + }, + { + "epoch": 2.9769226205778243, + "grad_norm": 1.521770604423184, + "learning_rate": 1.7932913278440044e-09, + "loss": 0.6837, + "step": 75270 + }, + { + "epoch": 2.9773181197967133, + "grad_norm": 1.4864976927423421, + "learning_rate": 1.7321991497448731e-09, + "loss": 0.6705, + "step": 75280 + }, + { + "epoch": 2.9777136190156024, + "grad_norm": 1.4399478614918257, + "learning_rate": 1.6721655191348497e-09, + "loss": 0.6404, + "step": 75290 + }, + { + "epoch": 2.9781091182344914, + "grad_norm": 1.5446595925732718, + "learning_rate": 1.613190448727653e-09, + "loss": 0.6443, + "step": 75300 + }, + { + "epoch": 2.9785046174533805, + "grad_norm": 1.477823627203876, + "learning_rate": 1.5552739510132919e-09, + "loss": 0.631, + "step": 75310 + }, + { + "epoch": 2.9789001166722695, + "grad_norm": 1.308303481305066, + "learning_rate": 1.4984160382575108e-09, + "loss": 0.6473, + "step": 75320 + }, + { + "epoch": 2.9792956158911585, + "grad_norm": 1.3179339787561173, + "learning_rate": 1.4426167225023436e-09, + "loss": 0.6377, + "step": 75330 + }, + { + "epoch": 2.9796911151100476, + "grad_norm": 1.2635890914942947, + "learning_rate": 1.387876015564449e-09, + "loss": 0.6398, + "step": 75340 + }, + { + "epoch": 2.9800866143289366, + "grad_norm": 1.4692902098728338, + "learning_rate": 1.3341939290373308e-09, + "loss": 0.6674, + "step": 75350 + }, + { + "epoch": 2.9804821135478257, + "grad_norm": 1.5136782957687511, + "learning_rate": 1.2815704742896728e-09, + "loss": 0.6595, + "step": 75360 + }, + { + "epoch": 2.9808776127667147, + "grad_norm": 1.2599673842229004, + "learning_rate": 1.2300056624664492e-09, + "loss": 0.6487, + "step": 75370 + }, + { + "epoch": 2.9812731119856037, + "grad_norm": 1.6242487669552406, + "learning_rate": 1.1794995044883684e-09, + "loss": 0.6342, + "step": 75380 + }, + { + "epoch": 2.981668611204493, + "grad_norm": 1.3829382961041896, + "learning_rate": 1.1300520110513192e-09, + "loss": 0.6327, + "step": 75390 + }, + { + "epoch": 2.982064110423382, + "grad_norm": 1.485347987490532, + "learning_rate": 1.0816631926274802e-09, + "loss": 0.6713, + "step": 75400 + }, + { + "epoch": 2.982459609642271, + "grad_norm": 1.4529475997219343, + "learning_rate": 1.03433305946532e-09, + "loss": 0.6404, + "step": 75410 + }, + { + "epoch": 2.98285510886116, + "grad_norm": 1.6253571361681403, + "learning_rate": 9.880616215879324e-10, + "loss": 0.6524, + "step": 75420 + }, + { + "epoch": 2.983250608080049, + "grad_norm": 1.4962361410135567, + "learning_rate": 9.428488887952558e-10, + "loss": 0.607, + "step": 75430 + }, + { + "epoch": 2.983646107298938, + "grad_norm": 1.3346341948544451, + "learning_rate": 8.986948706624088e-10, + "loss": 0.6686, + "step": 75440 + }, + { + "epoch": 2.984041606517827, + "grad_norm": 1.5262170638424148, + "learning_rate": 8.555995765407999e-10, + "loss": 0.6447, + "step": 75450 + }, + { + "epoch": 2.984437105736716, + "grad_norm": 1.619897785918023, + "learning_rate": 8.135630155564622e-10, + "loss": 0.6289, + "step": 75460 + }, + { + "epoch": 2.984832604955605, + "grad_norm": 1.2956858012182204, + "learning_rate": 7.725851966122744e-10, + "loss": 0.6503, + "step": 75470 + }, + { + "epoch": 2.985228104174494, + "grad_norm": 1.445731249320724, + "learning_rate": 7.326661283874048e-10, + "loss": 0.6682, + "step": 75480 + }, + { + "epoch": 2.985623603393383, + "grad_norm": 1.28110989703659, + "learning_rate": 6.938058193350916e-10, + "loss": 0.662, + "step": 75490 + }, + { + "epoch": 2.986019102612272, + "grad_norm": 1.714521019527429, + "learning_rate": 6.560042776854181e-10, + "loss": 0.6301, + "step": 75500 + }, + { + "epoch": 2.9864146018311613, + "grad_norm": 1.2517103403917649, + "learning_rate": 6.192615114447576e-10, + "loss": 0.6664, + "step": 75510 + }, + { + "epoch": 2.9868101010500503, + "grad_norm": 1.6458483808091464, + "learning_rate": 5.835775283941081e-10, + "loss": 0.6684, + "step": 75520 + }, + { + "epoch": 2.9872056002689393, + "grad_norm": 1.4486338943637045, + "learning_rate": 5.489523360902028e-10, + "loss": 0.6228, + "step": 75530 + }, + { + "epoch": 2.9876010994878284, + "grad_norm": 1.6992803194296027, + "learning_rate": 5.153859418671748e-10, + "loss": 0.651, + "step": 75540 + }, + { + "epoch": 2.9879965987067174, + "grad_norm": 1.5222250377392132, + "learning_rate": 4.828783528332271e-10, + "loss": 0.6176, + "step": 75550 + }, + { + "epoch": 2.9883920979256064, + "grad_norm": 1.4129325805060393, + "learning_rate": 4.5142957587340774e-10, + "loss": 0.6467, + "step": 75560 + }, + { + "epoch": 2.9887875971444955, + "grad_norm": 1.5953229460641893, + "learning_rate": 4.2103961764683457e-10, + "loss": 0.6225, + "step": 75570 + }, + { + "epoch": 2.9891830963633845, + "grad_norm": 1.5002173466125093, + "learning_rate": 3.9170848459058055e-10, + "loss": 0.6155, + "step": 75580 + }, + { + "epoch": 2.9895785955822736, + "grad_norm": 1.426615461288542, + "learning_rate": 3.634361829163435e-10, + "loss": 0.6544, + "step": 75590 + }, + { + "epoch": 2.9899740948011626, + "grad_norm": 1.6223286719494556, + "learning_rate": 3.362227186121114e-10, + "loss": 0.6263, + "step": 75600 + }, + { + "epoch": 2.9903695940200516, + "grad_norm": 1.3958423191500335, + "learning_rate": 3.100680974399417e-10, + "loss": 0.6247, + "step": 75610 + }, + { + "epoch": 2.9907650932389407, + "grad_norm": 1.3907307659766843, + "learning_rate": 2.849723249398473e-10, + "loss": 0.656, + "step": 75620 + }, + { + "epoch": 2.99116059245783, + "grad_norm": 1.5579688916749102, + "learning_rate": 2.6093540642702085e-10, + "loss": 0.6667, + "step": 75630 + }, + { + "epoch": 2.9915560916767188, + "grad_norm": 1.3853201734642822, + "learning_rate": 2.379573469907248e-10, + "loss": 0.6324, + "step": 75640 + }, + { + "epoch": 2.9919515908956082, + "grad_norm": 1.5627128118464633, + "learning_rate": 2.1603815149873196e-10, + "loss": 0.6162, + "step": 75650 + }, + { + "epoch": 2.992347090114497, + "grad_norm": 1.9609497278470716, + "learning_rate": 1.951778245928848e-10, + "loss": 0.6329, + "step": 75660 + }, + { + "epoch": 2.9927425893333863, + "grad_norm": 1.4399687485105364, + "learning_rate": 1.7537637069020563e-10, + "loss": 0.6439, + "step": 75670 + }, + { + "epoch": 2.993138088552275, + "grad_norm": 1.2859001723230108, + "learning_rate": 1.5663379398511702e-10, + "loss": 0.6727, + "step": 75680 + }, + { + "epoch": 2.9935335877711644, + "grad_norm": 1.540621106707812, + "learning_rate": 1.3895009844722142e-10, + "loss": 0.6601, + "step": 75690 + }, + { + "epoch": 2.993929086990053, + "grad_norm": 1.2216806932987352, + "learning_rate": 1.223252878201908e-10, + "loss": 0.6445, + "step": 75700 + }, + { + "epoch": 2.9943245862089425, + "grad_norm": 1.5059383365090029, + "learning_rate": 1.0675936562676292e-10, + "loss": 0.6551, + "step": 75710 + }, + { + "epoch": 2.994720085427831, + "grad_norm": 1.5638408016327519, + "learning_rate": 9.225233516207966e-11, + "loss": 0.655, + "step": 75720 + }, + { + "epoch": 2.9951155846467206, + "grad_norm": 1.5980857472480596, + "learning_rate": 7.880419949868323e-11, + "loss": 0.6517, + "step": 75730 + }, + { + "epoch": 2.995511083865609, + "grad_norm": 1.5165589909400723, + "learning_rate": 6.641496148540594e-11, + "loss": 0.6746, + "step": 75740 + }, + { + "epoch": 2.9959065830844986, + "grad_norm": 1.2567363456933929, + "learning_rate": 5.508462374570478e-11, + "loss": 0.626, + "step": 75750 + }, + { + "epoch": 2.9963020823033872, + "grad_norm": 1.7366048532457277, + "learning_rate": 4.4813188678771715e-11, + "loss": 0.6505, + "step": 75760 + }, + { + "epoch": 2.9966975815222767, + "grad_norm": 1.580052978547302, + "learning_rate": 3.5600658460088756e-11, + "loss": 0.6296, + "step": 75770 + }, + { + "epoch": 2.9970930807411653, + "grad_norm": 1.6119513334127769, + "learning_rate": 2.7447035041427984e-11, + "loss": 0.6422, + "step": 75780 + }, + { + "epoch": 2.997488579960055, + "grad_norm": 1.269290310427461, + "learning_rate": 2.035232014863109e-11, + "loss": 0.625, + "step": 75790 + }, + { + "epoch": 2.9978840791789434, + "grad_norm": 1.3904786274334968, + "learning_rate": 1.4316515284384935e-11, + "loss": 0.65, + "step": 75800 + }, + { + "epoch": 2.998279578397833, + "grad_norm": 1.452136459921812, + "learning_rate": 9.339621727666448e-12, + "loss": 0.6577, + "step": 75810 + }, + { + "epoch": 2.9986750776167215, + "grad_norm": 1.647329898728647, + "learning_rate": 5.421640532077277e-12, + "loss": 0.6398, + "step": 75820 + }, + { + "epoch": 2.999070576835611, + "grad_norm": 1.4807456076782137, + "learning_rate": 2.5625725269540214e-12, + "loss": 0.6591, + "step": 75830 + }, + { + "epoch": 2.9994660760544996, + "grad_norm": 1.4164661919438535, + "learning_rate": 7.624183179233414e-13, + "loss": 0.619, + "step": 75840 + }, + { + "epoch": 2.999861575273389, + "grad_norm": 1.218056581703874, + "learning_rate": 2.1178286901957224e-14, + "loss": 0.6283, + "step": 75850 + } + ], + "logging_steps": 10, + "max_steps": 75852, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 150000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 1.1003829288173568e+16, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}