diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,36450 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.999995191868488, + "eval_steps": 500, + "global_step": 51995, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 1.9232526048052467e-05, + "grad_norm": 340256.8990635632, + "learning_rate": 3.846153846153847e-08, + "loss": 25905.9922, + "step": 1 + }, + { + "epoch": 0.00019232526048052465, + "grad_norm": 329547.13392761885, + "learning_rate": 3.846153846153847e-07, + "loss": 25785.191, + "step": 10 + }, + { + "epoch": 0.0003846505209610493, + "grad_norm": 288839.0962993139, + "learning_rate": 7.692307692307694e-07, + "loss": 23700.6562, + "step": 20 + }, + { + "epoch": 0.000576975781441574, + "grad_norm": 125626.36864796947, + "learning_rate": 1.153846153846154e-06, + "loss": 20190.3344, + "step": 30 + }, + { + "epoch": 0.0007693010419220986, + "grad_norm": 47322.671994916804, + "learning_rate": 1.5384615384615387e-06, + "loss": 16867.6031, + "step": 40 + }, + { + "epoch": 0.0009616263024026233, + "grad_norm": 22843.44389963286, + "learning_rate": 1.9230769230769234e-06, + "loss": 14599.7812, + "step": 50 + }, + { + "epoch": 0.001153951562883148, + "grad_norm": 23203.932058026054, + "learning_rate": 2.307692307692308e-06, + "loss": 12818.4688, + "step": 60 + }, + { + "epoch": 0.0013462768233636726, + "grad_norm": 12005.578799387531, + "learning_rate": 2.6923076923076923e-06, + "loss": 11844.6297, + "step": 70 + }, + { + "epoch": 0.0015386020838441972, + "grad_norm": 32818.750754299195, + "learning_rate": 3.0769230769230774e-06, + "loss": 11162.5469, + "step": 80 + }, + { + "epoch": 0.0017309273443247219, + "grad_norm": 17435.49809016785, + "learning_rate": 3.4615384615384617e-06, + "loss": 10793.7641, + "step": 90 + }, + { + "epoch": 0.0019232526048052465, + "grad_norm": 12701.85980578269, + "learning_rate": 3.846153846153847e-06, + "loss": 10653.1812, + "step": 100 + }, + { + "epoch": 0.0021155778652857714, + "grad_norm": 23037.58616236622, + "learning_rate": 4.230769230769231e-06, + "loss": 10358.1984, + "step": 110 + }, + { + "epoch": 0.002307903125766296, + "grad_norm": 13372.193504961218, + "learning_rate": 4.615384615384616e-06, + "loss": 10103.9813, + "step": 120 + }, + { + "epoch": 0.0025002283862468207, + "grad_norm": 16968.767589773826, + "learning_rate": 5e-06, + "loss": 9850.3547, + "step": 130 + }, + { + "epoch": 0.002692553646727345, + "grad_norm": 17165.642730963893, + "learning_rate": 5.384615384615385e-06, + "loss": 9720.2469, + "step": 140 + }, + { + "epoch": 0.00288487890720787, + "grad_norm": 13265.190772082746, + "learning_rate": 5.769230769230769e-06, + "loss": 9546.9609, + "step": 150 + }, + { + "epoch": 0.0030772041676883945, + "grad_norm": 21469.975105247308, + "learning_rate": 6.153846153846155e-06, + "loss": 9415.2406, + "step": 160 + }, + { + "epoch": 0.0032695294281689193, + "grad_norm": 15056.4294636194, + "learning_rate": 6.538461538461539e-06, + "loss": 9271.8852, + "step": 170 + }, + { + "epoch": 0.0034618546886494438, + "grad_norm": 24981.413385330285, + "learning_rate": 6.923076923076923e-06, + "loss": 9084.2461, + "step": 180 + }, + { + "epoch": 0.0036541799491299687, + "grad_norm": 27927.895744899266, + "learning_rate": 7.307692307692308e-06, + "loss": 8983.4891, + "step": 190 + }, + { + "epoch": 0.003846505209610493, + "grad_norm": 10409.821548080958, + "learning_rate": 7.692307692307694e-06, + "loss": 8836.9023, + "step": 200 + }, + { + "epoch": 0.0040388304700910175, + "grad_norm": 11841.413899636149, + "learning_rate": 8.076923076923077e-06, + "loss": 8635.6063, + "step": 210 + }, + { + "epoch": 0.004231155730571543, + "grad_norm": 16907.549285896923, + "learning_rate": 8.461538461538462e-06, + "loss": 8474.7594, + "step": 220 + }, + { + "epoch": 0.004423480991052067, + "grad_norm": 27817.25135796424, + "learning_rate": 8.846153846153847e-06, + "loss": 8325.1938, + "step": 230 + }, + { + "epoch": 0.004615806251532592, + "grad_norm": 11686.494480442112, + "learning_rate": 9.230769230769232e-06, + "loss": 8167.4578, + "step": 240 + }, + { + "epoch": 0.004808131512013117, + "grad_norm": 14611.295962037319, + "learning_rate": 9.615384615384616e-06, + "loss": 8038.5328, + "step": 250 + }, + { + "epoch": 0.0050004567724936414, + "grad_norm": 17857.53203550568, + "learning_rate": 1e-05, + "loss": 7909.875, + "step": 260 + }, + { + "epoch": 0.005192782032974166, + "grad_norm": 15809.087648960021, + "learning_rate": 1.0384615384615386e-05, + "loss": 7727.9883, + "step": 270 + }, + { + "epoch": 0.00538510729345469, + "grad_norm": 22914.004068667255, + "learning_rate": 1.076923076923077e-05, + "loss": 7617.4031, + "step": 280 + }, + { + "epoch": 0.005577432553935216, + "grad_norm": 19977.21406332264, + "learning_rate": 1.1153846153846154e-05, + "loss": 7466.7016, + "step": 290 + }, + { + "epoch": 0.00576975781441574, + "grad_norm": 15106.688610570774, + "learning_rate": 1.1538461538461538e-05, + "loss": 7329.4734, + "step": 300 + }, + { + "epoch": 0.0059620830748962645, + "grad_norm": 23038.7334332166, + "learning_rate": 1.1923076923076925e-05, + "loss": 7184.3031, + "step": 310 + }, + { + "epoch": 0.006154408335376789, + "grad_norm": 24002.17033078466, + "learning_rate": 1.230769230769231e-05, + "loss": 7171.5922, + "step": 320 + }, + { + "epoch": 0.006346733595857314, + "grad_norm": 18958.9256998183, + "learning_rate": 1.2692307692307693e-05, + "loss": 7098.3914, + "step": 330 + }, + { + "epoch": 0.006539058856337839, + "grad_norm": 21815.718108192887, + "learning_rate": 1.3076923076923078e-05, + "loss": 6975.9375, + "step": 340 + }, + { + "epoch": 0.006731384116818363, + "grad_norm": 13891.644195177581, + "learning_rate": 1.3461538461538463e-05, + "loss": 6760.9484, + "step": 350 + }, + { + "epoch": 0.0069237093772988876, + "grad_norm": 15659.636466050824, + "learning_rate": 1.3846153846153847e-05, + "loss": 6681.1844, + "step": 360 + }, + { + "epoch": 0.007116034637779413, + "grad_norm": 15354.468375663539, + "learning_rate": 1.4230769230769232e-05, + "loss": 6656.9016, + "step": 370 + }, + { + "epoch": 0.007308359898259937, + "grad_norm": 16678.45446714103, + "learning_rate": 1.4615384615384615e-05, + "loss": 6528.625, + "step": 380 + }, + { + "epoch": 0.007500685158740462, + "grad_norm": 14644.542359131423, + "learning_rate": 1.5000000000000002e-05, + "loss": 6349.0469, + "step": 390 + }, + { + "epoch": 0.007693010419220986, + "grad_norm": 12701.420488118676, + "learning_rate": 1.5384615384615387e-05, + "loss": 6235.1816, + "step": 400 + }, + { + "epoch": 0.00788533567970151, + "grad_norm": 20701.149469365322, + "learning_rate": 1.576923076923077e-05, + "loss": 6173.8125, + "step": 410 + }, + { + "epoch": 0.008077660940182035, + "grad_norm": 20562.39806175966, + "learning_rate": 1.6153846153846154e-05, + "loss": 6166.375, + "step": 420 + }, + { + "epoch": 0.008269986200662561, + "grad_norm": 14584.105530767776, + "learning_rate": 1.653846153846154e-05, + "loss": 5970.6586, + "step": 430 + }, + { + "epoch": 0.008462311461143086, + "grad_norm": 16991.435993864558, + "learning_rate": 1.6923076923076924e-05, + "loss": 5935.7461, + "step": 440 + }, + { + "epoch": 0.00865463672162361, + "grad_norm": 14263.18538760599, + "learning_rate": 1.730769230769231e-05, + "loss": 5953.3781, + "step": 450 + }, + { + "epoch": 0.008846961982104135, + "grad_norm": 19145.49418149806, + "learning_rate": 1.7692307692307694e-05, + "loss": 5724.8188, + "step": 460 + }, + { + "epoch": 0.009039287242584659, + "grad_norm": 15563.56027844558, + "learning_rate": 1.807692307692308e-05, + "loss": 5690.4148, + "step": 470 + }, + { + "epoch": 0.009231612503065183, + "grad_norm": 13300.268934789272, + "learning_rate": 1.8461538461538465e-05, + "loss": 5494.1055, + "step": 480 + }, + { + "epoch": 0.009423937763545708, + "grad_norm": 12857.054925207442, + "learning_rate": 1.8846153846153846e-05, + "loss": 5543.3703, + "step": 490 + }, + { + "epoch": 0.009616263024026234, + "grad_norm": 13109.490361871232, + "learning_rate": 1.923076923076923e-05, + "loss": 5420.0109, + "step": 500 + }, + { + "epoch": 0.009808588284506758, + "grad_norm": 15974.71343804961, + "learning_rate": 1.9615384615384617e-05, + "loss": 5325.168, + "step": 510 + }, + { + "epoch": 0.010000913544987283, + "grad_norm": 12954.438121276595, + "learning_rate": 2e-05, + "loss": 5288.568, + "step": 520 + }, + { + "epoch": 0.010193238805467807, + "grad_norm": 11844.914072876223, + "learning_rate": 1.999999813758258e-05, + "loss": 5107.3727, + "step": 530 + }, + { + "epoch": 0.010385564065948332, + "grad_norm": 11648.785363103754, + "learning_rate": 1.9999992550331007e-05, + "loss": 5067.948, + "step": 540 + }, + { + "epoch": 0.010577889326428856, + "grad_norm": 11091.289174630127, + "learning_rate": 1.999998323824736e-05, + "loss": 4981.1875, + "step": 550 + }, + { + "epoch": 0.01077021458690938, + "grad_norm": 11335.166344142004, + "learning_rate": 1.999997020133512e-05, + "loss": 4876.9996, + "step": 560 + }, + { + "epoch": 0.010962539847389905, + "grad_norm": 11190.506875571928, + "learning_rate": 1.9999953439599132e-05, + "loss": 4795.6996, + "step": 570 + }, + { + "epoch": 0.011154865107870431, + "grad_norm": 12522.98405604213, + "learning_rate": 1.9999932953045638e-05, + "loss": 4721.6297, + "step": 580 + }, + { + "epoch": 0.011347190368350956, + "grad_norm": 10446.139123875913, + "learning_rate": 1.999990874168228e-05, + "loss": 4620.8328, + "step": 590 + }, + { + "epoch": 0.01153951562883148, + "grad_norm": 12858.000478299662, + "learning_rate": 1.9999880805518067e-05, + "loss": 4567.8957, + "step": 600 + }, + { + "epoch": 0.011731840889312005, + "grad_norm": 12490.711568372986, + "learning_rate": 1.9999849144563406e-05, + "loss": 4478.7648, + "step": 610 + }, + { + "epoch": 0.011924166149792529, + "grad_norm": 9637.057023518732, + "learning_rate": 1.9999813758830092e-05, + "loss": 4402.8023, + "step": 620 + }, + { + "epoch": 0.012116491410273053, + "grad_norm": 9604.54589302654, + "learning_rate": 1.9999774648331307e-05, + "loss": 4332.5977, + "step": 630 + }, + { + "epoch": 0.012308816670753578, + "grad_norm": 10154.603630411342, + "learning_rate": 1.9999731813081616e-05, + "loss": 4271.5375, + "step": 640 + }, + { + "epoch": 0.012501141931234102, + "grad_norm": 10881.758102529358, + "learning_rate": 1.9999685253096975e-05, + "loss": 4143.4367, + "step": 650 + }, + { + "epoch": 0.012693467191714628, + "grad_norm": 10802.768943722904, + "learning_rate": 1.999963496839473e-05, + "loss": 4074.3352, + "step": 660 + }, + { + "epoch": 0.012885792452195153, + "grad_norm": 10995.061114305476, + "learning_rate": 1.9999580958993606e-05, + "loss": 4057.2422, + "step": 670 + }, + { + "epoch": 0.013078117712675677, + "grad_norm": 10730.355493407538, + "learning_rate": 1.9999523224913722e-05, + "loss": 4027.3887, + "step": 680 + }, + { + "epoch": 0.013270442973156202, + "grad_norm": 10705.574888253108, + "learning_rate": 1.999946176617659e-05, + "loss": 3954.6359, + "step": 690 + }, + { + "epoch": 0.013462768233636726, + "grad_norm": 8168.077792098991, + "learning_rate": 1.999939658280509e-05, + "loss": 3871.0703, + "step": 700 + }, + { + "epoch": 0.01365509349411725, + "grad_norm": 10264.234778721111, + "learning_rate": 1.9999327674823513e-05, + "loss": 3839.8055, + "step": 710 + }, + { + "epoch": 0.013847418754597775, + "grad_norm": 8626.566883187743, + "learning_rate": 1.9999255042257522e-05, + "loss": 3799.2852, + "step": 720 + }, + { + "epoch": 0.0140397440150783, + "grad_norm": 8828.928266683495, + "learning_rate": 1.999917868513417e-05, + "loss": 3700.2406, + "step": 730 + }, + { + "epoch": 0.014232069275558826, + "grad_norm": 9660.58301557004, + "learning_rate": 1.9999098603481895e-05, + "loss": 3615.8523, + "step": 740 + }, + { + "epoch": 0.01442439453603935, + "grad_norm": 7058.506989793753, + "learning_rate": 1.9999014797330536e-05, + "loss": 3648.4711, + "step": 750 + }, + { + "epoch": 0.014616719796519875, + "grad_norm": 8612.382081255484, + "learning_rate": 1.99989272667113e-05, + "loss": 3604.9625, + "step": 760 + }, + { + "epoch": 0.014809045057000399, + "grad_norm": 8236.17495506975, + "learning_rate": 1.99988360116568e-05, + "loss": 3549.2973, + "step": 770 + }, + { + "epoch": 0.015001370317480923, + "grad_norm": 7530.487558770446, + "learning_rate": 1.999874103220102e-05, + "loss": 3491.4262, + "step": 780 + }, + { + "epoch": 0.015193695577961448, + "grad_norm": 9216.484832898406, + "learning_rate": 1.999864232837934e-05, + "loss": 3497.0539, + "step": 790 + }, + { + "epoch": 0.015386020838441972, + "grad_norm": 7153.93457818025, + "learning_rate": 1.9998539900228526e-05, + "loss": 3461.8457, + "step": 800 + }, + { + "epoch": 0.015578346098922499, + "grad_norm": 7611.245340816313, + "learning_rate": 1.999843374778673e-05, + "loss": 3439.5852, + "step": 810 + }, + { + "epoch": 0.01577067135940302, + "grad_norm": 8264.323535675565, + "learning_rate": 1.999832387109349e-05, + "loss": 3387.3648, + "step": 820 + }, + { + "epoch": 0.015962996619883547, + "grad_norm": 6642.2333530382075, + "learning_rate": 1.9998210270189736e-05, + "loss": 3305.0938, + "step": 830 + }, + { + "epoch": 0.01615532188036407, + "grad_norm": 8684.38186788829, + "learning_rate": 1.9998092945117786e-05, + "loss": 3290.0352, + "step": 840 + }, + { + "epoch": 0.016347647140844596, + "grad_norm": 6756.283326498615, + "learning_rate": 1.999797189592134e-05, + "loss": 3261.4875, + "step": 850 + }, + { + "epoch": 0.016539972401325122, + "grad_norm": 26631.43185342854, + "learning_rate": 1.999784712264548e-05, + "loss": 3285.9105, + "step": 860 + }, + { + "epoch": 0.016732297661805645, + "grad_norm": 7161.421118835917, + "learning_rate": 1.9997718625336686e-05, + "loss": 3235.0674, + "step": 870 + }, + { + "epoch": 0.01692462292228617, + "grad_norm": 6496.011346595953, + "learning_rate": 1.9997586404042825e-05, + "loss": 3199.4945, + "step": 880 + }, + { + "epoch": 0.017116948182766694, + "grad_norm": 6761.59679428012, + "learning_rate": 1.9997450458813142e-05, + "loss": 3123.632, + "step": 890 + }, + { + "epoch": 0.01730927344324722, + "grad_norm": 6648.806635648011, + "learning_rate": 1.9997310789698276e-05, + "loss": 3094.5512, + "step": 900 + }, + { + "epoch": 0.017501598703727743, + "grad_norm": 7004.756665221598, + "learning_rate": 1.999716739675025e-05, + "loss": 3095.1094, + "step": 910 + }, + { + "epoch": 0.01769392396420827, + "grad_norm": 6858.459203676452, + "learning_rate": 1.9997020280022482e-05, + "loss": 3063.0963, + "step": 920 + }, + { + "epoch": 0.017886249224688795, + "grad_norm": 6661.144629432038, + "learning_rate": 1.9996869439569767e-05, + "loss": 3016.9559, + "step": 930 + }, + { + "epoch": 0.018078574485169318, + "grad_norm": 7214.218460140278, + "learning_rate": 1.9996714875448286e-05, + "loss": 2984.1994, + "step": 940 + }, + { + "epoch": 0.018270899745649844, + "grad_norm": 6001.130120344852, + "learning_rate": 1.9996556587715617e-05, + "loss": 2966.515, + "step": 950 + }, + { + "epoch": 0.018463225006130367, + "grad_norm": 6732.214693785003, + "learning_rate": 1.9996394576430716e-05, + "loss": 2952.8459, + "step": 960 + }, + { + "epoch": 0.018655550266610893, + "grad_norm": 6264.564572575251, + "learning_rate": 1.9996228841653932e-05, + "loss": 2917.0217, + "step": 970 + }, + { + "epoch": 0.018847875527091416, + "grad_norm": 6697.862055110569, + "learning_rate": 1.9996059383447e-05, + "loss": 2868.5787, + "step": 980 + }, + { + "epoch": 0.019040200787571942, + "grad_norm": 8004.141853268536, + "learning_rate": 1.999588620187303e-05, + "loss": 2838.8414, + "step": 990 + }, + { + "epoch": 0.019232526048052468, + "grad_norm": 8137.132667056839, + "learning_rate": 1.9995709296996545e-05, + "loss": 2848.5451, + "step": 1000 + }, + { + "epoch": 0.01942485130853299, + "grad_norm": 5590.029919016462, + "learning_rate": 1.999552866888343e-05, + "loss": 2827.4316, + "step": 1010 + }, + { + "epoch": 0.019617176569013517, + "grad_norm": 7665.639769622731, + "learning_rate": 1.9995344317600965e-05, + "loss": 2824.509, + "step": 1020 + }, + { + "epoch": 0.01980950182949404, + "grad_norm": 14313.87942231357, + "learning_rate": 1.9995156243217824e-05, + "loss": 2779.2203, + "step": 1030 + }, + { + "epoch": 0.020001827089974566, + "grad_norm": 7240.42830736018, + "learning_rate": 1.9994964445804054e-05, + "loss": 2794.9771, + "step": 1040 + }, + { + "epoch": 0.02019415235045509, + "grad_norm": 6362.123548045883, + "learning_rate": 1.9994768925431104e-05, + "loss": 2734.7777, + "step": 1050 + }, + { + "epoch": 0.020386477610935615, + "grad_norm": 6605.1139500342815, + "learning_rate": 1.9994569682171796e-05, + "loss": 2697.7977, + "step": 1060 + }, + { + "epoch": 0.020578802871416137, + "grad_norm": 5802.405497477909, + "learning_rate": 1.9994366716100346e-05, + "loss": 2666.858, + "step": 1070 + }, + { + "epoch": 0.020771128131896664, + "grad_norm": 7937.578426959362, + "learning_rate": 1.999416002729236e-05, + "loss": 2643.2906, + "step": 1080 + }, + { + "epoch": 0.02096345339237719, + "grad_norm": 7943.748330913041, + "learning_rate": 1.999394961582482e-05, + "loss": 2671.0062, + "step": 1090 + }, + { + "epoch": 0.021155778652857712, + "grad_norm": 7739.035172683172, + "learning_rate": 1.9993735481776105e-05, + "loss": 2579.4945, + "step": 1100 + }, + { + "epoch": 0.02134810391333824, + "grad_norm": 6482.674900184367, + "learning_rate": 1.9993517625225976e-05, + "loss": 2550.8812, + "step": 1110 + }, + { + "epoch": 0.02154042917381876, + "grad_norm": 7039.248626270822, + "learning_rate": 1.9993296046255578e-05, + "loss": 2507.1734, + "step": 1120 + }, + { + "epoch": 0.021732754434299287, + "grad_norm": 8778.22134636785, + "learning_rate": 1.9993070744947447e-05, + "loss": 2565.0559, + "step": 1130 + }, + { + "epoch": 0.02192507969477981, + "grad_norm": 6279.14543835544, + "learning_rate": 1.9992841721385508e-05, + "loss": 2475.0553, + "step": 1140 + }, + { + "epoch": 0.022117404955260336, + "grad_norm": 11392.292370308016, + "learning_rate": 1.999260897565506e-05, + "loss": 2484.5826, + "step": 1150 + }, + { + "epoch": 0.022309730215740863, + "grad_norm": 7052.888046527636, + "learning_rate": 1.9992372507842807e-05, + "loss": 2504.975, + "step": 1160 + }, + { + "epoch": 0.022502055476221385, + "grad_norm": 8786.673756747148, + "learning_rate": 1.9992132318036825e-05, + "loss": 2463.2688, + "step": 1170 + }, + { + "epoch": 0.02269438073670191, + "grad_norm": 6774.941164287866, + "learning_rate": 1.9991888406326575e-05, + "loss": 2469.0289, + "step": 1180 + }, + { + "epoch": 0.022886705997182434, + "grad_norm": 6413.011054953301, + "learning_rate": 1.9991640772802916e-05, + "loss": 2389.6076, + "step": 1190 + }, + { + "epoch": 0.02307903125766296, + "grad_norm": 6644.7145028389905, + "learning_rate": 1.9991389417558088e-05, + "loss": 2368.9789, + "step": 1200 + }, + { + "epoch": 0.023271356518143483, + "grad_norm": 8362.347446753292, + "learning_rate": 1.9991134340685713e-05, + "loss": 2351.3848, + "step": 1210 + }, + { + "epoch": 0.02346368177862401, + "grad_norm": 6096.356291539516, + "learning_rate": 1.999087554228081e-05, + "loss": 2306.2426, + "step": 1220 + }, + { + "epoch": 0.023656007039104535, + "grad_norm": 6804.562440191596, + "learning_rate": 1.999061302243977e-05, + "loss": 2304.1328, + "step": 1230 + }, + { + "epoch": 0.023848332299585058, + "grad_norm": 6952.243204119128, + "learning_rate": 1.9990346781260378e-05, + "loss": 2294.5363, + "step": 1240 + }, + { + "epoch": 0.024040657560065584, + "grad_norm": 8363.314403806107, + "learning_rate": 1.9990076818841805e-05, + "loss": 2245.8201, + "step": 1250 + }, + { + "epoch": 0.024232982820546107, + "grad_norm": 5794.89742816201, + "learning_rate": 1.998980313528461e-05, + "loss": 2252.8531, + "step": 1260 + }, + { + "epoch": 0.024425308081026633, + "grad_norm": 7386.9934412876055, + "learning_rate": 1.9989525730690736e-05, + "loss": 2239.523, + "step": 1270 + }, + { + "epoch": 0.024617633341507156, + "grad_norm": 7705.58379475732, + "learning_rate": 1.998924460516351e-05, + "loss": 2246.0797, + "step": 1280 + }, + { + "epoch": 0.024809958601987682, + "grad_norm": 7680.672029723623, + "learning_rate": 1.9988959758807645e-05, + "loss": 2201.1941, + "step": 1290 + }, + { + "epoch": 0.025002283862468205, + "grad_norm": 6815.1772251894035, + "learning_rate": 1.9988671191729243e-05, + "loss": 2208.9932, + "step": 1300 + }, + { + "epoch": 0.02519460912294873, + "grad_norm": 7069.861677780535, + "learning_rate": 1.998837890403579e-05, + "loss": 2164.1402, + "step": 1310 + }, + { + "epoch": 0.025386934383429257, + "grad_norm": 6750.107580515478, + "learning_rate": 1.998808289583616e-05, + "loss": 2113.6455, + "step": 1320 + }, + { + "epoch": 0.02557925964390978, + "grad_norm": 6892.477862219737, + "learning_rate": 1.998778316724061e-05, + "loss": 2104.1031, + "step": 1330 + }, + { + "epoch": 0.025771584904390306, + "grad_norm": 7111.939884524118, + "learning_rate": 1.998747971836078e-05, + "loss": 2054.852, + "step": 1340 + }, + { + "epoch": 0.02596391016487083, + "grad_norm": 7266.3859092421535, + "learning_rate": 1.9987172549309707e-05, + "loss": 2080.4945, + "step": 1350 + }, + { + "epoch": 0.026156235425351355, + "grad_norm": 7013.185786113792, + "learning_rate": 1.9986861660201802e-05, + "loss": 2046.0879, + "step": 1360 + }, + { + "epoch": 0.026348560685831877, + "grad_norm": 7269.11236414404, + "learning_rate": 1.998654705115287e-05, + "loss": 2090.7902, + "step": 1370 + }, + { + "epoch": 0.026540885946312404, + "grad_norm": 7047.400297873044, + "learning_rate": 1.9986228722280093e-05, + "loss": 2039.8742, + "step": 1380 + }, + { + "epoch": 0.02673321120679293, + "grad_norm": 6332.782863580958, + "learning_rate": 1.998590667370204e-05, + "loss": 2013.5613, + "step": 1390 + }, + { + "epoch": 0.026925536467273452, + "grad_norm": 7449.685427230845, + "learning_rate": 1.998558090553868e-05, + "loss": 2008.0461, + "step": 1400 + }, + { + "epoch": 0.02711786172775398, + "grad_norm": 7413.579686598059, + "learning_rate": 1.9985251417911347e-05, + "loss": 1975.0203, + "step": 1410 + }, + { + "epoch": 0.0273101869882345, + "grad_norm": 7033.093276733983, + "learning_rate": 1.9984918210942776e-05, + "loss": 1940.2811, + "step": 1420 + }, + { + "epoch": 0.027502512248715028, + "grad_norm": 5859.880891146991, + "learning_rate": 1.9984581284757074e-05, + "loss": 1938.102, + "step": 1430 + }, + { + "epoch": 0.02769483750919555, + "grad_norm": 6208.344325728506, + "learning_rate": 1.9984240639479745e-05, + "loss": 1916.9785, + "step": 1440 + }, + { + "epoch": 0.027887162769676076, + "grad_norm": 6477.74340167659, + "learning_rate": 1.9983896275237677e-05, + "loss": 1895.2777, + "step": 1450 + }, + { + "epoch": 0.0280794880301566, + "grad_norm": 7475.619950653028, + "learning_rate": 1.9983548192159132e-05, + "loss": 1879.7949, + "step": 1460 + }, + { + "epoch": 0.028271813290637125, + "grad_norm": 6911.270172786403, + "learning_rate": 1.998319639037377e-05, + "loss": 1878.4859, + "step": 1470 + }, + { + "epoch": 0.02846413855111765, + "grad_norm": 7034.868758955199, + "learning_rate": 1.9982840870012626e-05, + "loss": 1882.0582, + "step": 1480 + }, + { + "epoch": 0.028656463811598174, + "grad_norm": 7342.8147967755285, + "learning_rate": 1.9982481631208138e-05, + "loss": 1831.302, + "step": 1490 + }, + { + "epoch": 0.0288487890720787, + "grad_norm": 5374.418078115465, + "learning_rate": 1.9982118674094104e-05, + "loss": 1820.3521, + "step": 1500 + }, + { + "epoch": 0.029041114332559223, + "grad_norm": 9918.535696965788, + "learning_rate": 1.9981751998805725e-05, + "loss": 1807.5156, + "step": 1510 + }, + { + "epoch": 0.02923343959303975, + "grad_norm": 6994.301939045192, + "learning_rate": 1.998138160547958e-05, + "loss": 1822.6168, + "step": 1520 + }, + { + "epoch": 0.029425764853520272, + "grad_norm": 6988.347234586641, + "learning_rate": 1.9981007494253638e-05, + "loss": 1794.4318, + "step": 1530 + }, + { + "epoch": 0.029618090114000798, + "grad_norm": 7182.174762468267, + "learning_rate": 1.998062966526724e-05, + "loss": 1803.4854, + "step": 1540 + }, + { + "epoch": 0.029810415374481324, + "grad_norm": 6032.49938713834, + "learning_rate": 1.998024811866113e-05, + "loss": 1776.6539, + "step": 1550 + }, + { + "epoch": 0.030002740634961847, + "grad_norm": 8451.537205034321, + "learning_rate": 1.9979862854577427e-05, + "loss": 1767.198, + "step": 1560 + }, + { + "epoch": 0.030195065895442373, + "grad_norm": 6324.559095250308, + "learning_rate": 1.9979473873159635e-05, + "loss": 1739.0559, + "step": 1570 + }, + { + "epoch": 0.030387391155922896, + "grad_norm": 6122.708535706743, + "learning_rate": 1.9979081174552638e-05, + "loss": 1736.2916, + "step": 1580 + }, + { + "epoch": 0.030579716416403422, + "grad_norm": 6163.254308596572, + "learning_rate": 1.997868475890271e-05, + "loss": 1710.7768, + "step": 1590 + }, + { + "epoch": 0.030772041676883945, + "grad_norm": 7482.85673113, + "learning_rate": 1.997828462635752e-05, + "loss": 1675.4146, + "step": 1600 + }, + { + "epoch": 0.03096436693736447, + "grad_norm": 7073.578921549694, + "learning_rate": 1.99778807770661e-05, + "loss": 1677.7027, + "step": 1610 + }, + { + "epoch": 0.031156692197844997, + "grad_norm": 6893.3497557177625, + "learning_rate": 1.9977473211178882e-05, + "loss": 1744.8992, + "step": 1620 + }, + { + "epoch": 0.03134901745832552, + "grad_norm": 6140.95848024039, + "learning_rate": 1.9977061928847676e-05, + "loss": 1692.118, + "step": 1630 + }, + { + "epoch": 0.03154134271880604, + "grad_norm": 6336.2767988987025, + "learning_rate": 1.9976646930225678e-05, + "loss": 1646.1111, + "step": 1640 + }, + { + "epoch": 0.03173366797928657, + "grad_norm": 7557.425027234783, + "learning_rate": 1.997622821546747e-05, + "loss": 1655.5611, + "step": 1650 + }, + { + "epoch": 0.031925993239767095, + "grad_norm": 7583.169303374387, + "learning_rate": 1.9975805784729008e-05, + "loss": 1628.3082, + "step": 1660 + }, + { + "epoch": 0.03211831850024762, + "grad_norm": 7173.653913504125, + "learning_rate": 1.9975379638167654e-05, + "loss": 1637.8015, + "step": 1670 + }, + { + "epoch": 0.03231064376072814, + "grad_norm": 5630.425784082243, + "learning_rate": 1.9974949775942134e-05, + "loss": 1587.5666, + "step": 1680 + }, + { + "epoch": 0.032502969021208666, + "grad_norm": 6960.428709379124, + "learning_rate": 1.997451619821256e-05, + "loss": 1622.5503, + "step": 1690 + }, + { + "epoch": 0.03269529428168919, + "grad_norm": 5666.063717228243, + "learning_rate": 1.997407890514044e-05, + "loss": 1593.3805, + "step": 1700 + }, + { + "epoch": 0.03288761954216972, + "grad_norm": 6681.8675048555015, + "learning_rate": 1.9973637896888652e-05, + "loss": 1558.9164, + "step": 1710 + }, + { + "epoch": 0.033079944802650245, + "grad_norm": 6584.118263819194, + "learning_rate": 1.997319317362147e-05, + "loss": 1554.373, + "step": 1720 + }, + { + "epoch": 0.033272270063130764, + "grad_norm": 5627.724155760312, + "learning_rate": 1.9972744735504542e-05, + "loss": 1531.2031, + "step": 1730 + }, + { + "epoch": 0.03346459532361129, + "grad_norm": 7141.653634460501, + "learning_rate": 1.9972292582704905e-05, + "loss": 1561.4683, + "step": 1740 + }, + { + "epoch": 0.033656920584091816, + "grad_norm": 6324.730794149539, + "learning_rate": 1.997183671539098e-05, + "loss": 1538.1083, + "step": 1750 + }, + { + "epoch": 0.03384924584457234, + "grad_norm": 6152.067748579414, + "learning_rate": 1.9971377133732567e-05, + "loss": 1537.7105, + "step": 1760 + }, + { + "epoch": 0.03404157110505287, + "grad_norm": 6727.948648162338, + "learning_rate": 1.997091383790086e-05, + "loss": 1563.193, + "step": 1770 + }, + { + "epoch": 0.03423389636553339, + "grad_norm": 6505.031008665096, + "learning_rate": 1.997044682806842e-05, + "loss": 1499.8426, + "step": 1780 + }, + { + "epoch": 0.034426221626013914, + "grad_norm": 5950.971205507093, + "learning_rate": 1.9969976104409202e-05, + "loss": 1514.9335, + "step": 1790 + }, + { + "epoch": 0.03461854688649444, + "grad_norm": 6142.555502411284, + "learning_rate": 1.9969501667098547e-05, + "loss": 1519.249, + "step": 1800 + }, + { + "epoch": 0.03481087214697497, + "grad_norm": 6715.306709245462, + "learning_rate": 1.996902351631317e-05, + "loss": 1469.8909, + "step": 1810 + }, + { + "epoch": 0.035003197407455486, + "grad_norm": 5243.438368734838, + "learning_rate": 1.996854165223118e-05, + "loss": 1453.6711, + "step": 1820 + }, + { + "epoch": 0.03519552266793601, + "grad_norm": 5982.293698199709, + "learning_rate": 1.9968056075032058e-05, + "loss": 1455.9143, + "step": 1830 + }, + { + "epoch": 0.03538784792841654, + "grad_norm": 5866.6628889997055, + "learning_rate": 1.9967566784896676e-05, + "loss": 1443.7178, + "step": 1840 + }, + { + "epoch": 0.035580173188897064, + "grad_norm": 6016.273160404695, + "learning_rate": 1.996707378200729e-05, + "loss": 1414.5693, + "step": 1850 + }, + { + "epoch": 0.03577249844937759, + "grad_norm": 6048.548916992061, + "learning_rate": 1.9966577066547526e-05, + "loss": 1422.5311, + "step": 1860 + }, + { + "epoch": 0.03596482370985811, + "grad_norm": 6847.995198951612, + "learning_rate": 1.9966076638702412e-05, + "loss": 1432.3163, + "step": 1870 + }, + { + "epoch": 0.036157148970338636, + "grad_norm": 6846.3147885279, + "learning_rate": 1.9965572498658346e-05, + "loss": 1428.632, + "step": 1880 + }, + { + "epoch": 0.03634947423081916, + "grad_norm": 6367.2467448438965, + "learning_rate": 1.996506464660311e-05, + "loss": 1404.1271, + "step": 1890 + }, + { + "epoch": 0.03654179949129969, + "grad_norm": 6149.422226476783, + "learning_rate": 1.9964553082725873e-05, + "loss": 1391.2855, + "step": 1900 + }, + { + "epoch": 0.03673412475178021, + "grad_norm": 5679.545651188765, + "learning_rate": 1.996403780721718e-05, + "loss": 1393.1797, + "step": 1910 + }, + { + "epoch": 0.036926450012260734, + "grad_norm": 6726.248738159026, + "learning_rate": 1.9963518820268968e-05, + "loss": 1463.1839, + "step": 1920 + }, + { + "epoch": 0.03711877527274126, + "grad_norm": 5812.251798998977, + "learning_rate": 1.996299612207455e-05, + "loss": 1385.4611, + "step": 1930 + }, + { + "epoch": 0.037311100533221786, + "grad_norm": 6481.40575070707, + "learning_rate": 1.9962469712828613e-05, + "loss": 1408.033, + "step": 1940 + }, + { + "epoch": 0.03750342579370231, + "grad_norm": 6217.993583939255, + "learning_rate": 1.996193959272725e-05, + "loss": 1349.4258, + "step": 1950 + }, + { + "epoch": 0.03769575105418283, + "grad_norm": 6332.18771920136, + "learning_rate": 1.9961405761967914e-05, + "loss": 1368.4553, + "step": 1960 + }, + { + "epoch": 0.03788807631466336, + "grad_norm": 6952.58839303712, + "learning_rate": 1.996086822074945e-05, + "loss": 1348.7169, + "step": 1970 + }, + { + "epoch": 0.038080401575143884, + "grad_norm": 6009.994644836221, + "learning_rate": 1.996032696927208e-05, + "loss": 1351.4033, + "step": 1980 + }, + { + "epoch": 0.03827272683562441, + "grad_norm": 6277.628853007842, + "learning_rate": 1.9959782007737418e-05, + "loss": 1340.3785, + "step": 1990 + }, + { + "epoch": 0.038465052096104936, + "grad_norm": 6283.430305222497, + "learning_rate": 1.9959233336348452e-05, + "loss": 1318.3738, + "step": 2000 + }, + { + "epoch": 0.038657377356585455, + "grad_norm": 5268.659543280948, + "learning_rate": 1.9958680955309546e-05, + "loss": 1345.0908, + "step": 2010 + }, + { + "epoch": 0.03884970261706598, + "grad_norm": 6018.533082613178, + "learning_rate": 1.9958124864826457e-05, + "loss": 1335.4022, + "step": 2020 + }, + { + "epoch": 0.03904202787754651, + "grad_norm": 6248.182353178989, + "learning_rate": 1.9957565065106318e-05, + "loss": 1309.4732, + "step": 2030 + }, + { + "epoch": 0.039234353138027034, + "grad_norm": 6065.4802712354995, + "learning_rate": 1.9957001556357652e-05, + "loss": 1308.841, + "step": 2040 + }, + { + "epoch": 0.03942667839850755, + "grad_norm": 5488.531257988972, + "learning_rate": 1.995643433879035e-05, + "loss": 1284.2648, + "step": 2050 + }, + { + "epoch": 0.03961900365898808, + "grad_norm": 6036.815153189847, + "learning_rate": 1.9955863412615693e-05, + "loss": 1301.248, + "step": 2060 + }, + { + "epoch": 0.039811328919468605, + "grad_norm": 6303.194316067486, + "learning_rate": 1.9955288778046338e-05, + "loss": 1294.3275, + "step": 2070 + }, + { + "epoch": 0.04000365417994913, + "grad_norm": 6023.2953845852735, + "learning_rate": 1.995471043529633e-05, + "loss": 1285.6352, + "step": 2080 + }, + { + "epoch": 0.04019597944042966, + "grad_norm": 5384.551949622103, + "learning_rate": 1.99541283845811e-05, + "loss": 1280.3516, + "step": 2090 + }, + { + "epoch": 0.04038830470091018, + "grad_norm": 6758.828042617035, + "learning_rate": 1.9953542626117437e-05, + "loss": 1282.7562, + "step": 2100 + }, + { + "epoch": 0.0405806299613907, + "grad_norm": 5895.0203225542455, + "learning_rate": 1.9952953160123537e-05, + "loss": 1281.4125, + "step": 2110 + }, + { + "epoch": 0.04077295522187123, + "grad_norm": 6065.273403921973, + "learning_rate": 1.995235998681896e-05, + "loss": 1260.6578, + "step": 2120 + }, + { + "epoch": 0.040965280482351756, + "grad_norm": 6293.559800161821, + "learning_rate": 1.9951763106424658e-05, + "loss": 1255.2279, + "step": 2130 + }, + { + "epoch": 0.041157605742832275, + "grad_norm": 4700.336619432639, + "learning_rate": 1.9951162519162962e-05, + "loss": 1239.8465, + "step": 2140 + }, + { + "epoch": 0.0413499310033128, + "grad_norm": 5221.876270859536, + "learning_rate": 1.9950558225257574e-05, + "loss": 1238.9635, + "step": 2150 + }, + { + "epoch": 0.04154225626379333, + "grad_norm": 5565.829122934509, + "learning_rate": 1.9949950224933584e-05, + "loss": 1236.6825, + "step": 2160 + }, + { + "epoch": 0.04173458152427385, + "grad_norm": 6147.523806240359, + "learning_rate": 1.994933851841747e-05, + "loss": 1227.3937, + "step": 2170 + }, + { + "epoch": 0.04192690678475438, + "grad_norm": 5974.770554144452, + "learning_rate": 1.994872310593707e-05, + "loss": 1230.965, + "step": 2180 + }, + { + "epoch": 0.0421192320452349, + "grad_norm": 5330.434141430119, + "learning_rate": 1.994810398772162e-05, + "loss": 1226.8635, + "step": 2190 + }, + { + "epoch": 0.042311557305715425, + "grad_norm": 5436.87738586885, + "learning_rate": 1.994748116400174e-05, + "loss": 1205.6514, + "step": 2200 + }, + { + "epoch": 0.04250388256619595, + "grad_norm": 5126.282475196194, + "learning_rate": 1.994685463500941e-05, + "loss": 1221.4822, + "step": 2210 + }, + { + "epoch": 0.04269620782667648, + "grad_norm": 4848.022230952555, + "learning_rate": 1.9946224400978006e-05, + "loss": 1201.4771, + "step": 2220 + }, + { + "epoch": 0.042888533087157, + "grad_norm": 5211.129666115587, + "learning_rate": 1.994559046214228e-05, + "loss": 1192.5682, + "step": 2230 + }, + { + "epoch": 0.04308085834763752, + "grad_norm": 6304.665333599148, + "learning_rate": 1.9944952818738366e-05, + "loss": 1209.3821, + "step": 2240 + }, + { + "epoch": 0.04327318360811805, + "grad_norm": 4778.190236574797, + "learning_rate": 1.9944311471003775e-05, + "loss": 1210.8347, + "step": 2250 + }, + { + "epoch": 0.043465508868598575, + "grad_norm": 5923.088858141527, + "learning_rate": 1.9943666419177392e-05, + "loss": 1158.5988, + "step": 2260 + }, + { + "epoch": 0.0436578341290791, + "grad_norm": 5677.082358004745, + "learning_rate": 1.9943017663499497e-05, + "loss": 1181.834, + "step": 2270 + }, + { + "epoch": 0.04385015938955962, + "grad_norm": 5018.811106626207, + "learning_rate": 1.9942365204211734e-05, + "loss": 1161.9672, + "step": 2280 + }, + { + "epoch": 0.044042484650040147, + "grad_norm": 5519.412534913629, + "learning_rate": 1.9941709041557134e-05, + "loss": 1165.3775, + "step": 2290 + }, + { + "epoch": 0.04423480991052067, + "grad_norm": 4758.708930344798, + "learning_rate": 1.994104917578011e-05, + "loss": 1189.6666, + "step": 2300 + }, + { + "epoch": 0.0444271351710012, + "grad_norm": 5797.702850920498, + "learning_rate": 1.9940385607126456e-05, + "loss": 1151.2826, + "step": 2310 + }, + { + "epoch": 0.044619460431481725, + "grad_norm": 5412.241982469557, + "learning_rate": 1.9939718335843326e-05, + "loss": 1167.319, + "step": 2320 + }, + { + "epoch": 0.044811785691962244, + "grad_norm": 5565.101511080268, + "learning_rate": 1.9939047362179283e-05, + "loss": 1161.4689, + "step": 2330 + }, + { + "epoch": 0.04500411095244277, + "grad_norm": 5556.710865953506, + "learning_rate": 1.9938372686384244e-05, + "loss": 1127.1202, + "step": 2340 + }, + { + "epoch": 0.0451964362129233, + "grad_norm": 5601.832752266584, + "learning_rate": 1.9937694308709514e-05, + "loss": 1141.9598, + "step": 2350 + }, + { + "epoch": 0.04538876147340382, + "grad_norm": 8889.048734758144, + "learning_rate": 1.9937012229407783e-05, + "loss": 1169.4885, + "step": 2360 + }, + { + "epoch": 0.04558108673388434, + "grad_norm": 6129.912153090955, + "learning_rate": 1.9936326448733107e-05, + "loss": 1172.7059, + "step": 2370 + }, + { + "epoch": 0.04577341199436487, + "grad_norm": 4443.626207141793, + "learning_rate": 1.9935636966940938e-05, + "loss": 1129.7623, + "step": 2380 + }, + { + "epoch": 0.045965737254845394, + "grad_norm": 6621.077972481085, + "learning_rate": 1.993494378428809e-05, + "loss": 1156.3591, + "step": 2390 + }, + { + "epoch": 0.04615806251532592, + "grad_norm": 6061.474638019706, + "learning_rate": 1.9934246901032764e-05, + "loss": 1108.5379, + "step": 2400 + }, + { + "epoch": 0.04635038777580645, + "grad_norm": 6122.716279468376, + "learning_rate": 1.9933546317434536e-05, + "loss": 1131.7629, + "step": 2410 + }, + { + "epoch": 0.046542713036286966, + "grad_norm": 4523.510602373938, + "learning_rate": 1.9932842033754363e-05, + "loss": 1122.6425, + "step": 2420 + }, + { + "epoch": 0.04673503829676749, + "grad_norm": 5025.599416057065, + "learning_rate": 1.9932134050254578e-05, + "loss": 1114.3494, + "step": 2430 + }, + { + "epoch": 0.04692736355724802, + "grad_norm": 4899.485765055181, + "learning_rate": 1.9931422367198893e-05, + "loss": 1099.6018, + "step": 2440 + }, + { + "epoch": 0.047119688817728544, + "grad_norm": 5090.665618145162, + "learning_rate": 1.9930706984852403e-05, + "loss": 1104.125, + "step": 2450 + }, + { + "epoch": 0.04731201407820907, + "grad_norm": 4940.992602008076, + "learning_rate": 1.992998790348157e-05, + "loss": 1115.1102, + "step": 2460 + }, + { + "epoch": 0.04750433933868959, + "grad_norm": 4814.86897503154, + "learning_rate": 1.992926512335424e-05, + "loss": 1116.3245, + "step": 2470 + }, + { + "epoch": 0.047696664599170116, + "grad_norm": 4436.093340243908, + "learning_rate": 1.992853864473964e-05, + "loss": 1084.0676, + "step": 2480 + }, + { + "epoch": 0.04788898985965064, + "grad_norm": 5318.538620492033, + "learning_rate": 1.992780846790837e-05, + "loss": 1099.2612, + "step": 2490 + }, + { + "epoch": 0.04808131512013117, + "grad_norm": 5271.639572188547, + "learning_rate": 1.9927074593132412e-05, + "loss": 1086.9515, + "step": 2500 + }, + { + "epoch": 0.04827364038061169, + "grad_norm": 5691.1346926333035, + "learning_rate": 1.9926337020685114e-05, + "loss": 1096.943, + "step": 2510 + }, + { + "epoch": 0.048465965641092214, + "grad_norm": 4964.253910595249, + "learning_rate": 1.992559575084122e-05, + "loss": 1088.6112, + "step": 2520 + }, + { + "epoch": 0.04865829090157274, + "grad_norm": 5474.318377203567, + "learning_rate": 1.9924850783876832e-05, + "loss": 1078.7441, + "step": 2530 + }, + { + "epoch": 0.048850616162053266, + "grad_norm": 4877.686610194673, + "learning_rate": 1.992410212006944e-05, + "loss": 1075.9105, + "step": 2540 + }, + { + "epoch": 0.04904294142253379, + "grad_norm": 4911.033729811151, + "learning_rate": 1.9923349759697915e-05, + "loss": 1064.0662, + "step": 2550 + }, + { + "epoch": 0.04923526668301431, + "grad_norm": 5749.793109960041, + "learning_rate": 1.992259370304249e-05, + "loss": 1056.484, + "step": 2560 + }, + { + "epoch": 0.04942759194349484, + "grad_norm": 5726.810088024783, + "learning_rate": 1.992183395038479e-05, + "loss": 1068.5598, + "step": 2570 + }, + { + "epoch": 0.049619917203975364, + "grad_norm": 4464.140815954304, + "learning_rate": 1.9921070502007808e-05, + "loss": 1049.9455, + "step": 2580 + }, + { + "epoch": 0.04981224246445589, + "grad_norm": 4412.999068295054, + "learning_rate": 1.9920303358195916e-05, + "loss": 1039.6811, + "step": 2590 + }, + { + "epoch": 0.05000456772493641, + "grad_norm": 5669.913135174593, + "learning_rate": 1.9919532519234858e-05, + "loss": 1058.2753, + "step": 2600 + }, + { + "epoch": 0.050196892985416935, + "grad_norm": 5512.474040181994, + "learning_rate": 1.9918757985411767e-05, + "loss": 1038.1103, + "step": 2610 + }, + { + "epoch": 0.05038921824589746, + "grad_norm": 5568.707613091602, + "learning_rate": 1.991797975701514e-05, + "loss": 1040.8706, + "step": 2620 + }, + { + "epoch": 0.05058154350637799, + "grad_norm": 56860.19354775827, + "learning_rate": 1.9917197834334858e-05, + "loss": 1098.1378, + "step": 2630 + }, + { + "epoch": 0.050773868766858514, + "grad_norm": 4838.380630173328, + "learning_rate": 1.9916412217662162e-05, + "loss": 1032.9431, + "step": 2640 + }, + { + "epoch": 0.05096619402733903, + "grad_norm": 4664.195992308137, + "learning_rate": 1.9915622907289695e-05, + "loss": 1021.9714, + "step": 2650 + }, + { + "epoch": 0.05115851928781956, + "grad_norm": 4568.3497516757625, + "learning_rate": 1.9914829903511458e-05, + "loss": 1014.9593, + "step": 2660 + }, + { + "epoch": 0.051350844548300086, + "grad_norm": 5011.072462941165, + "learning_rate": 1.9914033206622828e-05, + "loss": 1041.5867, + "step": 2670 + }, + { + "epoch": 0.05154316980878061, + "grad_norm": 5292.580046561721, + "learning_rate": 1.9913232816920565e-05, + "loss": 1031.7426, + "step": 2680 + }, + { + "epoch": 0.05173549506926114, + "grad_norm": 4823.289178985006, + "learning_rate": 1.99124287347028e-05, + "loss": 1026.7715, + "step": 2690 + }, + { + "epoch": 0.05192782032974166, + "grad_norm": 4752.049768811952, + "learning_rate": 1.991162096026904e-05, + "loss": 1022.8786, + "step": 2700 + }, + { + "epoch": 0.05212014559022218, + "grad_norm": 4705.213430937199, + "learning_rate": 1.9910809493920172e-05, + "loss": 1017.1159, + "step": 2710 + }, + { + "epoch": 0.05231247085070271, + "grad_norm": 4793.854247136831, + "learning_rate": 1.990999433595845e-05, + "loss": 1019.4643, + "step": 2720 + }, + { + "epoch": 0.052504796111183236, + "grad_norm": 4920.728030054162, + "learning_rate": 1.99091754866875e-05, + "loss": 1007.5629, + "step": 2730 + }, + { + "epoch": 0.052697121371663755, + "grad_norm": 4434.570504555348, + "learning_rate": 1.990835294641234e-05, + "loss": 996.0966, + "step": 2740 + }, + { + "epoch": 0.05288944663214428, + "grad_norm": 4893.77566822937, + "learning_rate": 1.990752671543935e-05, + "loss": 1002.8721, + "step": 2750 + }, + { + "epoch": 0.05308177189262481, + "grad_norm": 5102.66497285508, + "learning_rate": 1.9906696794076284e-05, + "loss": 997.6461, + "step": 2760 + }, + { + "epoch": 0.05327409715310533, + "grad_norm": 4818.975735209405, + "learning_rate": 1.9905863182632285e-05, + "loss": 1007.0695, + "step": 2770 + }, + { + "epoch": 0.05346642241358586, + "grad_norm": 4901.244624847641, + "learning_rate": 1.990502588141784e-05, + "loss": 983.7172, + "step": 2780 + }, + { + "epoch": 0.05365874767406638, + "grad_norm": 4937.250155063634, + "learning_rate": 1.990418489074485e-05, + "loss": 964.584, + "step": 2790 + }, + { + "epoch": 0.053851072934546905, + "grad_norm": 4551.201756040109, + "learning_rate": 1.9903340210926555e-05, + "loss": 959.6879, + "step": 2800 + }, + { + "epoch": 0.05404339819502743, + "grad_norm": 5112.548170122441, + "learning_rate": 1.9902491842277592e-05, + "loss": 1009.366, + "step": 2810 + }, + { + "epoch": 0.05423572345550796, + "grad_norm": 4547.733471475647, + "learning_rate": 1.9901639785113967e-05, + "loss": 989.1688, + "step": 2820 + }, + { + "epoch": 0.05442804871598848, + "grad_norm": 4727.651956402866, + "learning_rate": 1.990078403975305e-05, + "loss": 996.4672, + "step": 2830 + }, + { + "epoch": 0.054620373976469, + "grad_norm": 5037.541830739576, + "learning_rate": 1.9899924606513593e-05, + "loss": 978.5039, + "step": 2840 + }, + { + "epoch": 0.05481269923694953, + "grad_norm": 5393.778198327005, + "learning_rate": 1.9899061485715726e-05, + "loss": 954.7977, + "step": 2850 + }, + { + "epoch": 0.055005024497430055, + "grad_norm": 4468.238799938698, + "learning_rate": 1.9898194677680943e-05, + "loss": 973.9543, + "step": 2860 + }, + { + "epoch": 0.05519734975791058, + "grad_norm": 4833.6617348280215, + "learning_rate": 1.9897324182732118e-05, + "loss": 959.073, + "step": 2870 + }, + { + "epoch": 0.0553896750183911, + "grad_norm": 4360.012760415991, + "learning_rate": 1.989645000119349e-05, + "loss": 950.7967, + "step": 2880 + }, + { + "epoch": 0.05558200027887163, + "grad_norm": 4779.795875742137, + "learning_rate": 1.9895572133390687e-05, + "loss": 951.4395, + "step": 2890 + }, + { + "epoch": 0.05577432553935215, + "grad_norm": 4595.241636924597, + "learning_rate": 1.9894690579650694e-05, + "loss": 953.3818, + "step": 2900 + }, + { + "epoch": 0.05596665079983268, + "grad_norm": 4750.078277224996, + "learning_rate": 1.9893805340301876e-05, + "loss": 972.3777, + "step": 2910 + }, + { + "epoch": 0.0561589760603132, + "grad_norm": 4946.332658418301, + "learning_rate": 1.989291641567397e-05, + "loss": 940.0307, + "step": 2920 + }, + { + "epoch": 0.056351301320793724, + "grad_norm": 4820.671381216139, + "learning_rate": 1.9892023806098083e-05, + "loss": 919.107, + "step": 2930 + }, + { + "epoch": 0.05654362658127425, + "grad_norm": 9364.311838278925, + "learning_rate": 1.9891127511906703e-05, + "loss": 931.4717, + "step": 2940 + }, + { + "epoch": 0.05673595184175478, + "grad_norm": 5521.496200012186, + "learning_rate": 1.9890227533433685e-05, + "loss": 952.6798, + "step": 2950 + }, + { + "epoch": 0.0569282771022353, + "grad_norm": 4134.00927852241, + "learning_rate": 1.988932387101425e-05, + "loss": 953.5514, + "step": 2960 + }, + { + "epoch": 0.05712060236271582, + "grad_norm": 5084.677399940139, + "learning_rate": 1.9888416524985e-05, + "loss": 928.6297, + "step": 2970 + }, + { + "epoch": 0.05731292762319635, + "grad_norm": 4406.793507705554, + "learning_rate": 1.988750549568391e-05, + "loss": 943.6451, + "step": 2980 + }, + { + "epoch": 0.057505252883676874, + "grad_norm": 4417.295293118958, + "learning_rate": 1.9886590783450317e-05, + "loss": 904.7558, + "step": 2990 + }, + { + "epoch": 0.0576975781441574, + "grad_norm": 4043.4875527218483, + "learning_rate": 1.9885672388624942e-05, + "loss": 913.576, + "step": 3000 + }, + { + "epoch": 0.05788990340463793, + "grad_norm": 6053.236380815296, + "learning_rate": 1.9884750311549868e-05, + "loss": 937.1524, + "step": 3010 + }, + { + "epoch": 0.058082228665118446, + "grad_norm": 4789.6869984331615, + "learning_rate": 1.9883824552568557e-05, + "loss": 937.9734, + "step": 3020 + }, + { + "epoch": 0.05827455392559897, + "grad_norm": 4965.655181870662, + "learning_rate": 1.9882895112025835e-05, + "loss": 936.3613, + "step": 3030 + }, + { + "epoch": 0.0584668791860795, + "grad_norm": 4977.478694437648, + "learning_rate": 1.9881961990267906e-05, + "loss": 901.9449, + "step": 3040 + }, + { + "epoch": 0.058659204446560025, + "grad_norm": 4910.648766244268, + "learning_rate": 1.988102518764234e-05, + "loss": 921.7038, + "step": 3050 + }, + { + "epoch": 0.058851529707040544, + "grad_norm": 5225.812550012057, + "learning_rate": 1.9880084704498084e-05, + "loss": 918.5408, + "step": 3060 + }, + { + "epoch": 0.05904385496752107, + "grad_norm": 4083.5240466170785, + "learning_rate": 1.987914054118545e-05, + "loss": 924.0464, + "step": 3070 + }, + { + "epoch": 0.059236180228001596, + "grad_norm": 4274.373196959295, + "learning_rate": 1.9878192698056125e-05, + "loss": 910.9401, + "step": 3080 + }, + { + "epoch": 0.05942850548848212, + "grad_norm": 4719.276107575339, + "learning_rate": 1.9877241175463165e-05, + "loss": 911.0809, + "step": 3090 + }, + { + "epoch": 0.05962083074896265, + "grad_norm": 4557.959688711481, + "learning_rate": 1.9876285973760993e-05, + "loss": 916.6955, + "step": 3100 + }, + { + "epoch": 0.05981315600944317, + "grad_norm": 4377.690465567877, + "learning_rate": 1.9875327093305405e-05, + "loss": 906.8008, + "step": 3110 + }, + { + "epoch": 0.060005481269923694, + "grad_norm": 4464.047345330293, + "learning_rate": 1.9874364534453577e-05, + "loss": 896.7488, + "step": 3120 + }, + { + "epoch": 0.06019780653040422, + "grad_norm": 4934.762156507667, + "learning_rate": 1.9873398297564036e-05, + "loss": 901.9243, + "step": 3130 + }, + { + "epoch": 0.060390131790884746, + "grad_norm": 4174.133922786432, + "learning_rate": 1.9872428382996697e-05, + "loss": 906.8617, + "step": 3140 + }, + { + "epoch": 0.060582457051365265, + "grad_norm": 4555.277081853879, + "learning_rate": 1.987145479111283e-05, + "loss": 872.9479, + "step": 3150 + }, + { + "epoch": 0.06077478231184579, + "grad_norm": 5080.292901313894, + "learning_rate": 1.987047752227509e-05, + "loss": 887.3372, + "step": 3160 + }, + { + "epoch": 0.06096710757232632, + "grad_norm": 4435.566792129146, + "learning_rate": 1.9869496576847488e-05, + "loss": 863.0025, + "step": 3170 + }, + { + "epoch": 0.061159432832806844, + "grad_norm": 4418.891159481613, + "learning_rate": 1.9868511955195407e-05, + "loss": 888.5924, + "step": 3180 + }, + { + "epoch": 0.06135175809328737, + "grad_norm": 6415.926300105252, + "learning_rate": 1.986752365768561e-05, + "loss": 848.7134, + "step": 3190 + }, + { + "epoch": 0.06154408335376789, + "grad_norm": 4401.204702087581, + "learning_rate": 1.986653168468622e-05, + "loss": 879.5176, + "step": 3200 + }, + { + "epoch": 0.061736408614248416, + "grad_norm": 5100.704679489723, + "learning_rate": 1.9865536036566727e-05, + "loss": 873.8359, + "step": 3210 + }, + { + "epoch": 0.06192873387472894, + "grad_norm": 4561.358661768131, + "learning_rate": 1.9864536713697992e-05, + "loss": 905.102, + "step": 3220 + }, + { + "epoch": 0.06212105913520947, + "grad_norm": 4042.3756414071713, + "learning_rate": 1.986353371645225e-05, + "loss": 890.4045, + "step": 3230 + }, + { + "epoch": 0.062313384395689994, + "grad_norm": 4432.64429811606, + "learning_rate": 1.9862527045203105e-05, + "loss": 879.4265, + "step": 3240 + }, + { + "epoch": 0.06250570965617051, + "grad_norm": 4304.964417453636, + "learning_rate": 1.986151670032552e-05, + "loss": 878.1913, + "step": 3250 + }, + { + "epoch": 0.06269803491665105, + "grad_norm": 4929.846602020889, + "learning_rate": 1.986050268219583e-05, + "loss": 856.8271, + "step": 3260 + }, + { + "epoch": 0.06289036017713157, + "grad_norm": 4182.158653745027, + "learning_rate": 1.9859484991191742e-05, + "loss": 874.8608, + "step": 3270 + }, + { + "epoch": 0.06308268543761208, + "grad_norm": 4495.4207670603255, + "learning_rate": 1.985846362769233e-05, + "loss": 880.4451, + "step": 3280 + }, + { + "epoch": 0.06327501069809262, + "grad_norm": 4413.667875279064, + "learning_rate": 1.9857438592078034e-05, + "loss": 856.9678, + "step": 3290 + }, + { + "epoch": 0.06346733595857314, + "grad_norm": 4140.851677182371, + "learning_rate": 1.9856409884730667e-05, + "loss": 859.9239, + "step": 3300 + }, + { + "epoch": 0.06365966121905367, + "grad_norm": 4368.5020687747, + "learning_rate": 1.98553775060334e-05, + "loss": 837.9691, + "step": 3310 + }, + { + "epoch": 0.06385198647953419, + "grad_norm": 4160.2297715238465, + "learning_rate": 1.9854341456370777e-05, + "loss": 823.9158, + "step": 3320 + }, + { + "epoch": 0.06404431174001471, + "grad_norm": 4858.730488652083, + "learning_rate": 1.9853301736128712e-05, + "loss": 846.968, + "step": 3330 + }, + { + "epoch": 0.06423663700049524, + "grad_norm": 4403.117774583476, + "learning_rate": 1.9852258345694486e-05, + "loss": 871.0086, + "step": 3340 + }, + { + "epoch": 0.06442896226097576, + "grad_norm": 5190.530620906509, + "learning_rate": 1.9851211285456738e-05, + "loss": 848.1436, + "step": 3350 + }, + { + "epoch": 0.06462128752145628, + "grad_norm": 5214.395170983605, + "learning_rate": 1.9850160555805485e-05, + "loss": 859.4229, + "step": 3360 + }, + { + "epoch": 0.06481361278193681, + "grad_norm": 5113.537918633996, + "learning_rate": 1.9849106157132105e-05, + "loss": 861.5078, + "step": 3370 + }, + { + "epoch": 0.06500593804241733, + "grad_norm": 4078.461363069817, + "learning_rate": 1.9848048089829347e-05, + "loss": 850.4208, + "step": 3380 + }, + { + "epoch": 0.06519826330289787, + "grad_norm": 3898.4248478860036, + "learning_rate": 1.9846986354291324e-05, + "loss": 831.2916, + "step": 3390 + }, + { + "epoch": 0.06539058856337839, + "grad_norm": 5468.591054006592, + "learning_rate": 1.9845920950913506e-05, + "loss": 853.7027, + "step": 3400 + }, + { + "epoch": 0.0655829138238589, + "grad_norm": 4798.514728157639, + "learning_rate": 1.9844851880092748e-05, + "loss": 817.3862, + "step": 3410 + }, + { + "epoch": 0.06577523908433944, + "grad_norm": 4643.5886323774275, + "learning_rate": 1.9843779142227258e-05, + "loss": 827.4217, + "step": 3420 + }, + { + "epoch": 0.06596756434481996, + "grad_norm": 4856.374126046193, + "learning_rate": 1.984270273771661e-05, + "loss": 825.7255, + "step": 3430 + }, + { + "epoch": 0.06615988960530049, + "grad_norm": 4969.591662398133, + "learning_rate": 1.9841622666961756e-05, + "loss": 836.7778, + "step": 3440 + }, + { + "epoch": 0.06635221486578101, + "grad_norm": 4017.7999361640086, + "learning_rate": 1.9840538930364992e-05, + "loss": 838.4433, + "step": 3450 + }, + { + "epoch": 0.06654454012626153, + "grad_norm": 3960.744107457203, + "learning_rate": 1.983945152833e-05, + "loss": 801.9833, + "step": 3460 + }, + { + "epoch": 0.06673686538674206, + "grad_norm": 4703.169822673688, + "learning_rate": 1.9838360461261817e-05, + "loss": 851.1238, + "step": 3470 + }, + { + "epoch": 0.06692919064722258, + "grad_norm": 4395.211216082489, + "learning_rate": 1.9837265729566853e-05, + "loss": 817.4702, + "step": 3480 + }, + { + "epoch": 0.06712151590770311, + "grad_norm": 4481.923669874384, + "learning_rate": 1.9836167333652866e-05, + "loss": 842.1456, + "step": 3490 + }, + { + "epoch": 0.06731384116818363, + "grad_norm": 4237.612272263026, + "learning_rate": 1.9835065273929002e-05, + "loss": 811.6774, + "step": 3500 + }, + { + "epoch": 0.06750616642866415, + "grad_norm": 4336.09376535591, + "learning_rate": 1.9833959550805754e-05, + "loss": 812.5192, + "step": 3510 + }, + { + "epoch": 0.06769849168914469, + "grad_norm": 4534.430920824958, + "learning_rate": 1.9832850164694983e-05, + "loss": 806.0679, + "step": 3520 + }, + { + "epoch": 0.0678908169496252, + "grad_norm": 4737.744932299016, + "learning_rate": 1.9831737116009924e-05, + "loss": 804.2666, + "step": 3530 + }, + { + "epoch": 0.06808314221010574, + "grad_norm": 4205.116332782815, + "learning_rate": 1.9830620405165164e-05, + "loss": 796.6594, + "step": 3540 + }, + { + "epoch": 0.06827546747058626, + "grad_norm": 4999.150392223975, + "learning_rate": 1.982950003257666e-05, + "loss": 827.5804, + "step": 3550 + }, + { + "epoch": 0.06846779273106678, + "grad_norm": 4407.410357341057, + "learning_rate": 1.9828375998661738e-05, + "loss": 817.3569, + "step": 3560 + }, + { + "epoch": 0.06866011799154731, + "grad_norm": 4218.685212880607, + "learning_rate": 1.9827248303839073e-05, + "loss": 765.9263, + "step": 3570 + }, + { + "epoch": 0.06885244325202783, + "grad_norm": 4192.665753801111, + "learning_rate": 1.982611694852872e-05, + "loss": 792.1777, + "step": 3580 + }, + { + "epoch": 0.06904476851250835, + "grad_norm": 4538.948349765603, + "learning_rate": 1.9824981933152087e-05, + "loss": 777.4994, + "step": 3590 + }, + { + "epoch": 0.06923709377298888, + "grad_norm": 4238.988511745942, + "learning_rate": 1.9823843258131945e-05, + "loss": 796.3801, + "step": 3600 + }, + { + "epoch": 0.0694294190334694, + "grad_norm": 4980.483201880425, + "learning_rate": 1.9822700923892438e-05, + "loss": 818.8832, + "step": 3610 + }, + { + "epoch": 0.06962174429394993, + "grad_norm": 4087.560163038672, + "learning_rate": 1.9821554930859066e-05, + "loss": 794.1205, + "step": 3620 + }, + { + "epoch": 0.06981406955443045, + "grad_norm": 4221.2069398196, + "learning_rate": 1.982040527945869e-05, + "loss": 799.7377, + "step": 3630 + }, + { + "epoch": 0.07000639481491097, + "grad_norm": 4103.723339502749, + "learning_rate": 1.9819251970119534e-05, + "loss": 808.7371, + "step": 3640 + }, + { + "epoch": 0.0701987200753915, + "grad_norm": 4073.157123448541, + "learning_rate": 1.9818095003271193e-05, + "loss": 781.1663, + "step": 3650 + }, + { + "epoch": 0.07039104533587202, + "grad_norm": 4201.542638359091, + "learning_rate": 1.9816934379344613e-05, + "loss": 781.4498, + "step": 3660 + }, + { + "epoch": 0.07058337059635256, + "grad_norm": 4369.081933228246, + "learning_rate": 1.9815770098772108e-05, + "loss": 784.0995, + "step": 3670 + }, + { + "epoch": 0.07077569585683308, + "grad_norm": 3845.900065729217, + "learning_rate": 1.9814602161987354e-05, + "loss": 773.1455, + "step": 3680 + }, + { + "epoch": 0.0709680211173136, + "grad_norm": 3738.811478727106, + "learning_rate": 1.981343056942539e-05, + "loss": 766.453, + "step": 3690 + }, + { + "epoch": 0.07116034637779413, + "grad_norm": 5235.3272466201515, + "learning_rate": 1.9812255321522614e-05, + "loss": 798.5602, + "step": 3700 + }, + { + "epoch": 0.07135267163827465, + "grad_norm": 3979.6886324575103, + "learning_rate": 1.981107641871678e-05, + "loss": 780.4679, + "step": 3710 + }, + { + "epoch": 0.07154499689875518, + "grad_norm": 4022.041531127809, + "learning_rate": 1.980989386144702e-05, + "loss": 774.6857, + "step": 3720 + }, + { + "epoch": 0.0717373221592357, + "grad_norm": 4472.923210364339, + "learning_rate": 1.980870765015381e-05, + "loss": 780.6932, + "step": 3730 + }, + { + "epoch": 0.07192964741971622, + "grad_norm": 3689.7253421859155, + "learning_rate": 1.9807517785278997e-05, + "loss": 772.0474, + "step": 3740 + }, + { + "epoch": 0.07212197268019675, + "grad_norm": 4027.0369553735572, + "learning_rate": 1.9806324267265786e-05, + "loss": 772.4933, + "step": 3750 + }, + { + "epoch": 0.07231429794067727, + "grad_norm": 4300.084726674544, + "learning_rate": 1.9805127096558742e-05, + "loss": 772.58, + "step": 3760 + }, + { + "epoch": 0.0725066232011578, + "grad_norm": 4260.686057345299, + "learning_rate": 1.980392627360379e-05, + "loss": 768.9246, + "step": 3770 + }, + { + "epoch": 0.07269894846163832, + "grad_norm": 4182.971510822694, + "learning_rate": 1.9802721798848225e-05, + "loss": 779.4888, + "step": 3780 + }, + { + "epoch": 0.07289127372211884, + "grad_norm": 4067.8402051621556, + "learning_rate": 1.980151367274068e-05, + "loss": 757.6914, + "step": 3790 + }, + { + "epoch": 0.07308359898259938, + "grad_norm": 4435.119921254233, + "learning_rate": 1.9800301895731172e-05, + "loss": 775.5353, + "step": 3800 + }, + { + "epoch": 0.0732759242430799, + "grad_norm": 4169.998600865379, + "learning_rate": 1.9799086468271065e-05, + "loss": 761.8761, + "step": 3810 + }, + { + "epoch": 0.07346824950356041, + "grad_norm": 4250.334802552328, + "learning_rate": 1.9797867390813086e-05, + "loss": 751.6994, + "step": 3820 + }, + { + "epoch": 0.07366057476404095, + "grad_norm": 4161.694459374591, + "learning_rate": 1.9796644663811318e-05, + "loss": 760.986, + "step": 3830 + }, + { + "epoch": 0.07385290002452147, + "grad_norm": 4071.005938035517, + "learning_rate": 1.9795418287721215e-05, + "loss": 752.5014, + "step": 3840 + }, + { + "epoch": 0.074045225285002, + "grad_norm": 3983.5485763971133, + "learning_rate": 1.9794188262999574e-05, + "loss": 767.3445, + "step": 3850 + }, + { + "epoch": 0.07423755054548252, + "grad_norm": 4351.927589412771, + "learning_rate": 1.979295459010456e-05, + "loss": 752.1044, + "step": 3860 + }, + { + "epoch": 0.07442987580596304, + "grad_norm": 4003.554081765813, + "learning_rate": 1.9791717269495698e-05, + "loss": 735.3016, + "step": 3870 + }, + { + "epoch": 0.07462220106644357, + "grad_norm": 4080.9933243213095, + "learning_rate": 1.979047630163387e-05, + "loss": 740.3427, + "step": 3880 + }, + { + "epoch": 0.07481452632692409, + "grad_norm": 4056.4364237507216, + "learning_rate": 1.9789231686981313e-05, + "loss": 762.9863, + "step": 3890 + }, + { + "epoch": 0.07500685158740462, + "grad_norm": 3779.458008967918, + "learning_rate": 1.978798342600163e-05, + "loss": 739.0848, + "step": 3900 + }, + { + "epoch": 0.07519917684788514, + "grad_norm": 3966.5516719775974, + "learning_rate": 1.978673151915977e-05, + "loss": 748.0446, + "step": 3910 + }, + { + "epoch": 0.07539150210836566, + "grad_norm": 3864.9589702649387, + "learning_rate": 1.9785475966922055e-05, + "loss": 752.2604, + "step": 3920 + }, + { + "epoch": 0.0755838273688462, + "grad_norm": 4668.415651232959, + "learning_rate": 1.9784216769756156e-05, + "loss": 737.1826, + "step": 3930 + }, + { + "epoch": 0.07577615262932672, + "grad_norm": 4384.60448412883, + "learning_rate": 1.97829539281311e-05, + "loss": 731.0745, + "step": 3940 + }, + { + "epoch": 0.07596847788980725, + "grad_norm": 3601.994023223988, + "learning_rate": 1.9781687442517278e-05, + "loss": 736.3513, + "step": 3950 + }, + { + "epoch": 0.07616080315028777, + "grad_norm": 3951.141271493915, + "learning_rate": 1.9780417313386433e-05, + "loss": 750.8261, + "step": 3960 + }, + { + "epoch": 0.07635312841076829, + "grad_norm": 4020.710239625638, + "learning_rate": 1.9779143541211664e-05, + "loss": 733.3558, + "step": 3970 + }, + { + "epoch": 0.07654545367124882, + "grad_norm": 3711.5298841098925, + "learning_rate": 1.9777866126467436e-05, + "loss": 711.2791, + "step": 3980 + }, + { + "epoch": 0.07673777893172934, + "grad_norm": 3600.6900243194355, + "learning_rate": 1.9776585069629566e-05, + "loss": 725.6805, + "step": 3990 + }, + { + "epoch": 0.07693010419220987, + "grad_norm": 3791.0140196145912, + "learning_rate": 1.9775300371175225e-05, + "loss": 731.4292, + "step": 4000 + }, + { + "epoch": 0.07712242945269039, + "grad_norm": 6905.455573319279, + "learning_rate": 1.9774012031582935e-05, + "loss": 717.5897, + "step": 4010 + }, + { + "epoch": 0.07731475471317091, + "grad_norm": 4442.5228710944375, + "learning_rate": 1.9772720051332585e-05, + "loss": 735.1908, + "step": 4020 + }, + { + "epoch": 0.07750707997365144, + "grad_norm": 4432.83144236267, + "learning_rate": 1.977142443090542e-05, + "loss": 727.4589, + "step": 4030 + }, + { + "epoch": 0.07769940523413196, + "grad_norm": 4019.6817616779726, + "learning_rate": 1.9770125170784035e-05, + "loss": 729.0105, + "step": 4040 + }, + { + "epoch": 0.07789173049461248, + "grad_norm": 3970.844433192576, + "learning_rate": 1.9768822271452385e-05, + "loss": 736.296, + "step": 4050 + }, + { + "epoch": 0.07808405575509302, + "grad_norm": 4043.051322501368, + "learning_rate": 1.9767515733395774e-05, + "loss": 717.5287, + "step": 4060 + }, + { + "epoch": 0.07827638101557353, + "grad_norm": 6770.272247577486, + "learning_rate": 1.976620555710087e-05, + "loss": 725.2103, + "step": 4070 + }, + { + "epoch": 0.07846870627605407, + "grad_norm": 4234.052491760217, + "learning_rate": 1.976489174305569e-05, + "loss": 728.0525, + "step": 4080 + }, + { + "epoch": 0.07866103153653459, + "grad_norm": 3533.448407372867, + "learning_rate": 1.9763574291749603e-05, + "loss": 702.9978, + "step": 4090 + }, + { + "epoch": 0.0788533567970151, + "grad_norm": 3589.3971111893, + "learning_rate": 1.9762253203673348e-05, + "loss": 707.1979, + "step": 4100 + }, + { + "epoch": 0.07904568205749564, + "grad_norm": 3689.474090623924, + "learning_rate": 1.9760928479319003e-05, + "loss": 722.2437, + "step": 4110 + }, + { + "epoch": 0.07923800731797616, + "grad_norm": 4424.700623564766, + "learning_rate": 1.9759600119180005e-05, + "loss": 718.8557, + "step": 4120 + }, + { + "epoch": 0.07943033257845669, + "grad_norm": 3669.9009921334696, + "learning_rate": 1.975826812375115e-05, + "loss": 704.1591, + "step": 4130 + }, + { + "epoch": 0.07962265783893721, + "grad_norm": 4337.477909996418, + "learning_rate": 1.9756932493528583e-05, + "loss": 683.2335, + "step": 4140 + }, + { + "epoch": 0.07981498309941773, + "grad_norm": 5126.869874135973, + "learning_rate": 1.97555932290098e-05, + "loss": 764.6667, + "step": 4150 + }, + { + "epoch": 0.08000730835989826, + "grad_norm": 3561.896465127024, + "learning_rate": 1.9754250330693658e-05, + "loss": 708.6442, + "step": 4160 + }, + { + "epoch": 0.08019963362037878, + "grad_norm": 4053.5462185031943, + "learning_rate": 1.9752903799080366e-05, + "loss": 719.9713, + "step": 4170 + }, + { + "epoch": 0.08039195888085932, + "grad_norm": 3528.8884049513335, + "learning_rate": 1.9751553634671485e-05, + "loss": 683.4802, + "step": 4180 + }, + { + "epoch": 0.08058428414133983, + "grad_norm": 3373.3511327501215, + "learning_rate": 1.9750199837969922e-05, + "loss": 690.1696, + "step": 4190 + }, + { + "epoch": 0.08077660940182035, + "grad_norm": 3377.279025351652, + "learning_rate": 1.9748842409479953e-05, + "loss": 698.2177, + "step": 4200 + }, + { + "epoch": 0.08096893466230089, + "grad_norm": 3937.853000288384, + "learning_rate": 1.9747481349707197e-05, + "loss": 708.9531, + "step": 4210 + }, + { + "epoch": 0.0811612599227814, + "grad_norm": 3774.9521491888127, + "learning_rate": 1.9746116659158618e-05, + "loss": 695.156, + "step": 4220 + }, + { + "epoch": 0.08135358518326194, + "grad_norm": 3936.348895644883, + "learning_rate": 1.9744748338342546e-05, + "loss": 692.2083, + "step": 4230 + }, + { + "epoch": 0.08154591044374246, + "grad_norm": 4652.120795122975, + "learning_rate": 1.974337638776866e-05, + "loss": 697.4078, + "step": 4240 + }, + { + "epoch": 0.08173823570422298, + "grad_norm": 3915.7909486183266, + "learning_rate": 1.9742000807947986e-05, + "loss": 684.809, + "step": 4250 + }, + { + "epoch": 0.08193056096470351, + "grad_norm": 3965.057614153028, + "learning_rate": 1.9740621599392907e-05, + "loss": 683.939, + "step": 4260 + }, + { + "epoch": 0.08212288622518403, + "grad_norm": 3550.8897728467455, + "learning_rate": 1.9739238762617155e-05, + "loss": 682.1476, + "step": 4270 + }, + { + "epoch": 0.08231521148566455, + "grad_norm": 3286.498018166622, + "learning_rate": 1.973785229813581e-05, + "loss": 696.0196, + "step": 4280 + }, + { + "epoch": 0.08250753674614508, + "grad_norm": 3933.2518693470192, + "learning_rate": 1.973646220646531e-05, + "loss": 700.2589, + "step": 4290 + }, + { + "epoch": 0.0826998620066256, + "grad_norm": 3547.496296922659, + "learning_rate": 1.973506848812344e-05, + "loss": 682.1439, + "step": 4300 + }, + { + "epoch": 0.08289218726710613, + "grad_norm": 3681.139683691825, + "learning_rate": 1.9733671143629342e-05, + "loss": 715.1272, + "step": 4310 + }, + { + "epoch": 0.08308451252758665, + "grad_norm": 3406.1751920767338, + "learning_rate": 1.9732270173503493e-05, + "loss": 687.8295, + "step": 4320 + }, + { + "epoch": 0.08327683778806717, + "grad_norm": 4216.349904376469, + "learning_rate": 1.9730865578267745e-05, + "loss": 677.5665, + "step": 4330 + }, + { + "epoch": 0.0834691630485477, + "grad_norm": 3599.608471801573, + "learning_rate": 1.972945735844528e-05, + "loss": 688.3549, + "step": 4340 + }, + { + "epoch": 0.08366148830902823, + "grad_norm": 3780.9374100271048, + "learning_rate": 1.972804551456063e-05, + "loss": 690.7379, + "step": 4350 + }, + { + "epoch": 0.08385381356950876, + "grad_norm": 3928.9846954940863, + "learning_rate": 1.9726630047139695e-05, + "loss": 689.8347, + "step": 4360 + }, + { + "epoch": 0.08404613882998928, + "grad_norm": 3504.6145494707266, + "learning_rate": 1.9725210956709707e-05, + "loss": 667.2354, + "step": 4370 + }, + { + "epoch": 0.0842384640904698, + "grad_norm": 3819.37645549846, + "learning_rate": 1.9723788243799253e-05, + "loss": 695.1894, + "step": 4380 + }, + { + "epoch": 0.08443078935095033, + "grad_norm": 7974.730780145998, + "learning_rate": 1.972236190893827e-05, + "loss": 662.5851, + "step": 4390 + }, + { + "epoch": 0.08462311461143085, + "grad_norm": 4913.461322493489, + "learning_rate": 1.972093195265805e-05, + "loss": 696.4384, + "step": 4400 + }, + { + "epoch": 0.08481543987191138, + "grad_norm": 5453.674250298887, + "learning_rate": 1.9719498375491224e-05, + "loss": 678.2224, + "step": 4410 + }, + { + "epoch": 0.0850077651323919, + "grad_norm": 4527.548679595546, + "learning_rate": 1.9718061177971777e-05, + "loss": 681.2826, + "step": 4420 + }, + { + "epoch": 0.08520009039287242, + "grad_norm": 3514.229150281252, + "learning_rate": 1.9716620360635036e-05, + "loss": 684.7965, + "step": 4430 + }, + { + "epoch": 0.08539241565335295, + "grad_norm": 3813.8794069158303, + "learning_rate": 1.971517592401769e-05, + "loss": 689.1514, + "step": 4440 + }, + { + "epoch": 0.08558474091383347, + "grad_norm": 3379.7322737514255, + "learning_rate": 1.9713727868657764e-05, + "loss": 660.6949, + "step": 4450 + }, + { + "epoch": 0.085777066174314, + "grad_norm": 3888.111140438278, + "learning_rate": 1.971227619509463e-05, + "loss": 656.5462, + "step": 4460 + }, + { + "epoch": 0.08596939143479453, + "grad_norm": 3449.1515907494745, + "learning_rate": 1.971082090386902e-05, + "loss": 686.3935, + "step": 4470 + }, + { + "epoch": 0.08616171669527505, + "grad_norm": 4084.203049728152, + "learning_rate": 1.9709361995523e-05, + "loss": 668.6304, + "step": 4480 + }, + { + "epoch": 0.08635404195575558, + "grad_norm": 4052.6042528137004, + "learning_rate": 1.9707899470599998e-05, + "loss": 696.8607, + "step": 4490 + }, + { + "epoch": 0.0865463672162361, + "grad_norm": 3839.1314575799543, + "learning_rate": 1.970643332964477e-05, + "loss": 663.3869, + "step": 4500 + }, + { + "epoch": 0.08673869247671662, + "grad_norm": 3777.0372160906877, + "learning_rate": 1.9704963573203435e-05, + "loss": 662.3137, + "step": 4510 + }, + { + "epoch": 0.08693101773719715, + "grad_norm": 4876.160137903215, + "learning_rate": 1.970349020182345e-05, + "loss": 688.069, + "step": 4520 + }, + { + "epoch": 0.08712334299767767, + "grad_norm": 4395.015874331804, + "learning_rate": 1.9702013216053623e-05, + "loss": 653.012, + "step": 4530 + }, + { + "epoch": 0.0873156682581582, + "grad_norm": 3832.486878076765, + "learning_rate": 1.9700532616444114e-05, + "loss": 676.1902, + "step": 4540 + }, + { + "epoch": 0.08750799351863872, + "grad_norm": 4540.329496125654, + "learning_rate": 1.969904840354641e-05, + "loss": 681.3762, + "step": 4550 + }, + { + "epoch": 0.08770031877911924, + "grad_norm": 5575.001699947538, + "learning_rate": 1.9697560577913358e-05, + "loss": 713.7773, + "step": 4560 + }, + { + "epoch": 0.08789264403959977, + "grad_norm": 4208.441348510918, + "learning_rate": 1.9696069140099152e-05, + "loss": 670.8771, + "step": 4570 + }, + { + "epoch": 0.08808496930008029, + "grad_norm": 3979.707409534953, + "learning_rate": 1.969457409065933e-05, + "loss": 661.7551, + "step": 4580 + }, + { + "epoch": 0.08827729456056083, + "grad_norm": 3554.7321912639827, + "learning_rate": 1.969307543015077e-05, + "loss": 654.7043, + "step": 4590 + }, + { + "epoch": 0.08846961982104135, + "grad_norm": 3829.398406063814, + "learning_rate": 1.9691573159131696e-05, + "loss": 662.6666, + "step": 4600 + }, + { + "epoch": 0.08866194508152186, + "grad_norm": 3498.147132863079, + "learning_rate": 1.9690067278161686e-05, + "loss": 663.5632, + "step": 4610 + }, + { + "epoch": 0.0888542703420024, + "grad_norm": 3414.3920898290144, + "learning_rate": 1.9688557787801647e-05, + "loss": 665.1435, + "step": 4620 + }, + { + "epoch": 0.08904659560248292, + "grad_norm": 3687.0786155832902, + "learning_rate": 1.968704468861385e-05, + "loss": 668.0722, + "step": 4630 + }, + { + "epoch": 0.08923892086296345, + "grad_norm": 3221.0008705613354, + "learning_rate": 1.968552798116189e-05, + "loss": 640.526, + "step": 4640 + }, + { + "epoch": 0.08943124612344397, + "grad_norm": 3774.429813675378, + "learning_rate": 1.9684007666010716e-05, + "loss": 649.5702, + "step": 4650 + }, + { + "epoch": 0.08962357138392449, + "grad_norm": 3967.5309119278195, + "learning_rate": 1.9682483743726624e-05, + "loss": 679.9959, + "step": 4660 + }, + { + "epoch": 0.08981589664440502, + "grad_norm": 3632.2186772085734, + "learning_rate": 1.968095621487725e-05, + "loss": 658.3854, + "step": 4670 + }, + { + "epoch": 0.09000822190488554, + "grad_norm": 3596.7121090116234, + "learning_rate": 1.9679425080031574e-05, + "loss": 643.3561, + "step": 4680 + }, + { + "epoch": 0.09020054716536607, + "grad_norm": 3532.2609192610566, + "learning_rate": 1.9677890339759914e-05, + "loss": 664.0748, + "step": 4690 + }, + { + "epoch": 0.0903928724258466, + "grad_norm": 3681.385507188368, + "learning_rate": 1.967635199463394e-05, + "loss": 639.0781, + "step": 4700 + }, + { + "epoch": 0.09058519768632711, + "grad_norm": 3528.3784190038627, + "learning_rate": 1.9674810045226658e-05, + "loss": 647.9231, + "step": 4710 + }, + { + "epoch": 0.09077752294680765, + "grad_norm": 5344.490306861034, + "learning_rate": 1.967326449211242e-05, + "loss": 650.9944, + "step": 4720 + }, + { + "epoch": 0.09096984820728816, + "grad_norm": 3538.6462529986425, + "learning_rate": 1.9671715335866915e-05, + "loss": 647.9663, + "step": 4730 + }, + { + "epoch": 0.09116217346776868, + "grad_norm": 4344.12153977402, + "learning_rate": 1.9670162577067182e-05, + "loss": 672.18, + "step": 4740 + }, + { + "epoch": 0.09135449872824922, + "grad_norm": 4036.565719761374, + "learning_rate": 1.9668606216291598e-05, + "loss": 647.3205, + "step": 4750 + }, + { + "epoch": 0.09154682398872974, + "grad_norm": 3893.713175141919, + "learning_rate": 1.9667046254119878e-05, + "loss": 630.2145, + "step": 4760 + }, + { + "epoch": 0.09173914924921027, + "grad_norm": 3463.037515331363, + "learning_rate": 1.966548269113309e-05, + "loss": 644.2297, + "step": 4770 + }, + { + "epoch": 0.09193147450969079, + "grad_norm": 3854.619786972281, + "learning_rate": 1.9663915527913628e-05, + "loss": 646.2749, + "step": 4780 + }, + { + "epoch": 0.09212379977017131, + "grad_norm": 3486.178830596917, + "learning_rate": 1.9662344765045237e-05, + "loss": 652.4989, + "step": 4790 + }, + { + "epoch": 0.09231612503065184, + "grad_norm": 3496.403710982168, + "learning_rate": 1.9660770403112996e-05, + "loss": 638.6127, + "step": 4800 + }, + { + "epoch": 0.09250845029113236, + "grad_norm": 3473.9121597119747, + "learning_rate": 1.9659192442703336e-05, + "loss": 642.948, + "step": 4810 + }, + { + "epoch": 0.0927007755516129, + "grad_norm": 5673.242669785963, + "learning_rate": 1.965761088440402e-05, + "loss": 653.6242, + "step": 4820 + }, + { + "epoch": 0.09289310081209341, + "grad_norm": 3597.424668915692, + "learning_rate": 1.9656025728804147e-05, + "loss": 648.2441, + "step": 4830 + }, + { + "epoch": 0.09308542607257393, + "grad_norm": 3240.3352994047696, + "learning_rate": 1.9654436976494165e-05, + "loss": 630.7824, + "step": 4840 + }, + { + "epoch": 0.09327775133305446, + "grad_norm": 3953.519280020802, + "learning_rate": 1.9652844628065857e-05, + "loss": 624.2006, + "step": 4850 + }, + { + "epoch": 0.09347007659353498, + "grad_norm": 3325.9017233701265, + "learning_rate": 1.965124868411235e-05, + "loss": 643.9571, + "step": 4860 + }, + { + "epoch": 0.09366240185401552, + "grad_norm": 3811.625234994872, + "learning_rate": 1.96496491452281e-05, + "loss": 625.1706, + "step": 4870 + }, + { + "epoch": 0.09385472711449604, + "grad_norm": 3540.3198944758788, + "learning_rate": 1.9648046012008916e-05, + "loss": 618.9432, + "step": 4880 + }, + { + "epoch": 0.09404705237497656, + "grad_norm": 3787.62609735396, + "learning_rate": 1.9646439285051936e-05, + "loss": 634.823, + "step": 4890 + }, + { + "epoch": 0.09423937763545709, + "grad_norm": 8037.970807135463, + "learning_rate": 1.9644828964955633e-05, + "loss": 638.2597, + "step": 4900 + }, + { + "epoch": 0.09443170289593761, + "grad_norm": 3236.2324896471923, + "learning_rate": 1.9643215052319836e-05, + "loss": 645.7353, + "step": 4910 + }, + { + "epoch": 0.09462402815641814, + "grad_norm": 4459.467303436796, + "learning_rate": 1.9641597547745694e-05, + "loss": 655.9531, + "step": 4920 + }, + { + "epoch": 0.09481635341689866, + "grad_norm": 3528.380784037965, + "learning_rate": 1.9639976451835698e-05, + "loss": 643.4895, + "step": 4930 + }, + { + "epoch": 0.09500867867737918, + "grad_norm": 3544.20401318225, + "learning_rate": 1.9638351765193685e-05, + "loss": 632.3099, + "step": 4940 + }, + { + "epoch": 0.09520100393785971, + "grad_norm": 3482.587986622822, + "learning_rate": 1.9636723488424823e-05, + "loss": 605.5317, + "step": 4950 + }, + { + "epoch": 0.09539332919834023, + "grad_norm": 3616.758129257622, + "learning_rate": 1.9635091622135616e-05, + "loss": 628.818, + "step": 4960 + }, + { + "epoch": 0.09558565445882075, + "grad_norm": 3540.071237234814, + "learning_rate": 1.963345616693391e-05, + "loss": 617.1661, + "step": 4970 + }, + { + "epoch": 0.09577797971930128, + "grad_norm": 3479.171243388493, + "learning_rate": 1.9631817123428883e-05, + "loss": 628.7106, + "step": 4980 + }, + { + "epoch": 0.0959703049797818, + "grad_norm": 3748.258658698097, + "learning_rate": 1.9630174492231052e-05, + "loss": 619.0021, + "step": 4990 + }, + { + "epoch": 0.09616263024026234, + "grad_norm": 3693.4855798077833, + "learning_rate": 1.962852827395227e-05, + "loss": 643.9448, + "step": 5000 + }, + { + "epoch": 0.09635495550074286, + "grad_norm": 3382.9156635765858, + "learning_rate": 1.962687846920573e-05, + "loss": 616.662, + "step": 5010 + }, + { + "epoch": 0.09654728076122338, + "grad_norm": 3460.71630735885, + "learning_rate": 1.9625225078605946e-05, + "loss": 616.0104, + "step": 5020 + }, + { + "epoch": 0.09673960602170391, + "grad_norm": 3282.016150396431, + "learning_rate": 1.9623568102768792e-05, + "loss": 618.4474, + "step": 5030 + }, + { + "epoch": 0.09693193128218443, + "grad_norm": 3798.3562679617376, + "learning_rate": 1.9621907542311457e-05, + "loss": 623.6288, + "step": 5040 + }, + { + "epoch": 0.09712425654266496, + "grad_norm": 3589.7586622669473, + "learning_rate": 1.9620243397852473e-05, + "loss": 595.6706, + "step": 5050 + }, + { + "epoch": 0.09731658180314548, + "grad_norm": 3335.156327211705, + "learning_rate": 1.9618575670011705e-05, + "loss": 597.4271, + "step": 5060 + }, + { + "epoch": 0.097508907063626, + "grad_norm": 4023.356685481532, + "learning_rate": 1.9616904359410357e-05, + "loss": 605.0412, + "step": 5070 + }, + { + "epoch": 0.09770123232410653, + "grad_norm": 3004.0876038850347, + "learning_rate": 1.9615229466670963e-05, + "loss": 615.485, + "step": 5080 + }, + { + "epoch": 0.09789355758458705, + "grad_norm": 3945.3817207057436, + "learning_rate": 1.9613550992417396e-05, + "loss": 610.2364, + "step": 5090 + }, + { + "epoch": 0.09808588284506758, + "grad_norm": 3604.836739141922, + "learning_rate": 1.961186893727486e-05, + "loss": 612.9978, + "step": 5100 + }, + { + "epoch": 0.0982782081055481, + "grad_norm": 3899.040526298082, + "learning_rate": 1.9610183301869882e-05, + "loss": 593.7491, + "step": 5110 + }, + { + "epoch": 0.09847053336602862, + "grad_norm": 3715.6778759239937, + "learning_rate": 1.9608494086830348e-05, + "loss": 599.9813, + "step": 5120 + }, + { + "epoch": 0.09866285862650916, + "grad_norm": 3599.5731398702455, + "learning_rate": 1.9606801292785452e-05, + "loss": 608.794, + "step": 5130 + }, + { + "epoch": 0.09885518388698968, + "grad_norm": 4871.124936102754, + "learning_rate": 1.960510492036574e-05, + "loss": 610.1908, + "step": 5140 + }, + { + "epoch": 0.09904750914747021, + "grad_norm": 3476.5757955014783, + "learning_rate": 1.9603404970203078e-05, + "loss": 618.4548, + "step": 5150 + }, + { + "epoch": 0.09923983440795073, + "grad_norm": 3663.9674310657874, + "learning_rate": 1.9601701442930667e-05, + "loss": 627.1068, + "step": 5160 + }, + { + "epoch": 0.09943215966843125, + "grad_norm": 3325.530184562212, + "learning_rate": 1.9599994339183047e-05, + "loss": 615.4588, + "step": 5170 + }, + { + "epoch": 0.09962448492891178, + "grad_norm": 3582.445627097957, + "learning_rate": 1.9598283659596084e-05, + "loss": 598.4591, + "step": 5180 + }, + { + "epoch": 0.0998168101893923, + "grad_norm": 3783.664326256545, + "learning_rate": 1.9596569404806983e-05, + "loss": 597.5333, + "step": 5190 + }, + { + "epoch": 0.10000913544987282, + "grad_norm": 3347.1457913067775, + "learning_rate": 1.9594851575454266e-05, + "loss": 596.7927, + "step": 5200 + }, + { + "epoch": 0.10020146071035335, + "grad_norm": 3355.4591492034247, + "learning_rate": 1.9593130172177806e-05, + "loss": 622.0267, + "step": 5210 + }, + { + "epoch": 0.10039378597083387, + "grad_norm": 3391.3074568265647, + "learning_rate": 1.959140519561879e-05, + "loss": 610.5841, + "step": 5220 + }, + { + "epoch": 0.1005861112313144, + "grad_norm": 3483.8305438194234, + "learning_rate": 1.9589676646419744e-05, + "loss": 606.7154, + "step": 5230 + }, + { + "epoch": 0.10077843649179492, + "grad_norm": 3343.385466805086, + "learning_rate": 1.958794452522453e-05, + "loss": 611.2106, + "step": 5240 + }, + { + "epoch": 0.10097076175227544, + "grad_norm": 3245.0048993151086, + "learning_rate": 1.9586208832678328e-05, + "loss": 592.585, + "step": 5250 + }, + { + "epoch": 0.10116308701275598, + "grad_norm": 3345.2030475594847, + "learning_rate": 1.958446956942766e-05, + "loss": 605.1469, + "step": 5260 + }, + { + "epoch": 0.1013554122732365, + "grad_norm": 3533.133322503176, + "learning_rate": 1.9582726736120365e-05, + "loss": 596.0807, + "step": 5270 + }, + { + "epoch": 0.10154773753371703, + "grad_norm": 3489.0660001743527, + "learning_rate": 1.958098033340563e-05, + "loss": 598.1188, + "step": 5280 + }, + { + "epoch": 0.10174006279419755, + "grad_norm": 4186.174151433306, + "learning_rate": 1.9579230361933952e-05, + "loss": 618.7304, + "step": 5290 + }, + { + "epoch": 0.10193238805467807, + "grad_norm": 3583.333919258522, + "learning_rate": 1.9577476822357174e-05, + "loss": 587.8064, + "step": 5300 + }, + { + "epoch": 0.1021247133151586, + "grad_norm": 3086.966666664932, + "learning_rate": 1.9575719715328457e-05, + "loss": 609.2442, + "step": 5310 + }, + { + "epoch": 0.10231703857563912, + "grad_norm": 3363.5775793498015, + "learning_rate": 1.957395904150229e-05, + "loss": 586.4506, + "step": 5320 + }, + { + "epoch": 0.10250936383611965, + "grad_norm": 3694.9590168318045, + "learning_rate": 1.9572194801534504e-05, + "loss": 605.8757, + "step": 5330 + }, + { + "epoch": 0.10270168909660017, + "grad_norm": 3332.6618107292793, + "learning_rate": 1.9570426996082238e-05, + "loss": 599.57, + "step": 5340 + }, + { + "epoch": 0.10289401435708069, + "grad_norm": 3317.555584286339, + "learning_rate": 1.9568655625803982e-05, + "loss": 584.7927, + "step": 5350 + }, + { + "epoch": 0.10308633961756122, + "grad_norm": 3586.203961575145, + "learning_rate": 1.956688069135954e-05, + "loss": 595.3776, + "step": 5360 + }, + { + "epoch": 0.10327866487804174, + "grad_norm": 3101.3052556150315, + "learning_rate": 1.9565102193410035e-05, + "loss": 585.8616, + "step": 5370 + }, + { + "epoch": 0.10347099013852228, + "grad_norm": 3142.8410306050705, + "learning_rate": 1.956332013261794e-05, + "loss": 590.7093, + "step": 5380 + }, + { + "epoch": 0.1036633153990028, + "grad_norm": 3237.828944361127, + "learning_rate": 1.9561534509647038e-05, + "loss": 574.7191, + "step": 5390 + }, + { + "epoch": 0.10385564065948331, + "grad_norm": 3487.212973888483, + "learning_rate": 1.9559745325162445e-05, + "loss": 599.8927, + "step": 5400 + }, + { + "epoch": 0.10404796591996385, + "grad_norm": 3320.812445371658, + "learning_rate": 1.9557952579830604e-05, + "loss": 591.8003, + "step": 5410 + }, + { + "epoch": 0.10424029118044437, + "grad_norm": 3088.4325268442735, + "learning_rate": 1.955615627431928e-05, + "loss": 589.8753, + "step": 5420 + }, + { + "epoch": 0.10443261644092489, + "grad_norm": 3059.9743898672727, + "learning_rate": 1.955435640929757e-05, + "loss": 574.5125, + "step": 5430 + }, + { + "epoch": 0.10462494170140542, + "grad_norm": 3230.902278524589, + "learning_rate": 1.9552552985435893e-05, + "loss": 574.9331, + "step": 5440 + }, + { + "epoch": 0.10481726696188594, + "grad_norm": 3112.9865505029543, + "learning_rate": 1.9550746003405996e-05, + "loss": 581.6879, + "step": 5450 + }, + { + "epoch": 0.10500959222236647, + "grad_norm": 3604.150746681098, + "learning_rate": 1.9548935463880945e-05, + "loss": 590.3337, + "step": 5460 + }, + { + "epoch": 0.10520191748284699, + "grad_norm": 3835.5960064615783, + "learning_rate": 1.9547121367535143e-05, + "loss": 599.1407, + "step": 5470 + }, + { + "epoch": 0.10539424274332751, + "grad_norm": 3017.553777298891, + "learning_rate": 1.9545303715044305e-05, + "loss": 592.7393, + "step": 5480 + }, + { + "epoch": 0.10558656800380804, + "grad_norm": 3633.178854381708, + "learning_rate": 1.9543482507085484e-05, + "loss": 587.8829, + "step": 5490 + }, + { + "epoch": 0.10577889326428856, + "grad_norm": 3410.959353056924, + "learning_rate": 1.9541657744337038e-05, + "loss": 577.7798, + "step": 5500 + }, + { + "epoch": 0.1059712185247691, + "grad_norm": 3288.9087004370126, + "learning_rate": 1.9539829427478675e-05, + "loss": 593.8099, + "step": 5510 + }, + { + "epoch": 0.10616354378524961, + "grad_norm": 3389.1495245639303, + "learning_rate": 1.95379975571914e-05, + "loss": 599.0841, + "step": 5520 + }, + { + "epoch": 0.10635586904573013, + "grad_norm": 3408.571500066902, + "learning_rate": 1.953616213415756e-05, + "loss": 579.6134, + "step": 5530 + }, + { + "epoch": 0.10654819430621067, + "grad_norm": 3330.4242393144054, + "learning_rate": 1.9534323159060824e-05, + "loss": 597.6208, + "step": 5540 + }, + { + "epoch": 0.10674051956669119, + "grad_norm": 3452.7173047969713, + "learning_rate": 1.9532480632586175e-05, + "loss": 587.3513, + "step": 5550 + }, + { + "epoch": 0.10693284482717172, + "grad_norm": 3222.0340313059646, + "learning_rate": 1.953063455541992e-05, + "loss": 593.5667, + "step": 5560 + }, + { + "epoch": 0.10712517008765224, + "grad_norm": 3427.3239416696665, + "learning_rate": 1.9528784928249703e-05, + "loss": 577.5851, + "step": 5570 + }, + { + "epoch": 0.10731749534813276, + "grad_norm": 3464.050210020691, + "learning_rate": 1.9526931751764467e-05, + "loss": 568.4066, + "step": 5580 + }, + { + "epoch": 0.10750982060861329, + "grad_norm": 3356.0561712492536, + "learning_rate": 1.95250750266545e-05, + "loss": 577.8563, + "step": 5590 + }, + { + "epoch": 0.10770214586909381, + "grad_norm": 3256.3685934272603, + "learning_rate": 1.9523214753611398e-05, + "loss": 565.334, + "step": 5600 + }, + { + "epoch": 0.10789447112957434, + "grad_norm": 3156.9367460709013, + "learning_rate": 1.952135093332808e-05, + "loss": 590.5764, + "step": 5610 + }, + { + "epoch": 0.10808679639005486, + "grad_norm": 2982.937853922636, + "learning_rate": 1.9519483566498788e-05, + "loss": 574.5607, + "step": 5620 + }, + { + "epoch": 0.10827912165053538, + "grad_norm": 3318.9312930539973, + "learning_rate": 1.9517612653819088e-05, + "loss": 576.3303, + "step": 5630 + }, + { + "epoch": 0.10847144691101591, + "grad_norm": 3292.9260569229145, + "learning_rate": 1.9515738195985868e-05, + "loss": 597.8799, + "step": 5640 + }, + { + "epoch": 0.10866377217149643, + "grad_norm": 3249.3652138353023, + "learning_rate": 1.951386019369732e-05, + "loss": 566.7925, + "step": 5650 + }, + { + "epoch": 0.10885609743197695, + "grad_norm": 2919.023618232392, + "learning_rate": 1.9511978647652984e-05, + "loss": 566.1437, + "step": 5660 + }, + { + "epoch": 0.10904842269245749, + "grad_norm": 3276.7094214119365, + "learning_rate": 1.9510093558553687e-05, + "loss": 574.3124, + "step": 5670 + }, + { + "epoch": 0.109240747952938, + "grad_norm": 2857.4942428475388, + "learning_rate": 1.950820492710161e-05, + "loss": 574.6187, + "step": 5680 + }, + { + "epoch": 0.10943307321341854, + "grad_norm": 3223.1164687825885, + "learning_rate": 1.9506312754000235e-05, + "loss": 572.9128, + "step": 5690 + }, + { + "epoch": 0.10962539847389906, + "grad_norm": 3911.5760853073984, + "learning_rate": 1.9504417039954357e-05, + "loss": 564.8175, + "step": 5700 + }, + { + "epoch": 0.10981772373437958, + "grad_norm": 3277.1211744842003, + "learning_rate": 1.9502517785670098e-05, + "loss": 583.1017, + "step": 5710 + }, + { + "epoch": 0.11001004899486011, + "grad_norm": 3365.049538501484, + "learning_rate": 1.950061499185491e-05, + "loss": 582.307, + "step": 5720 + }, + { + "epoch": 0.11020237425534063, + "grad_norm": 2752.615269421733, + "learning_rate": 1.9498708659217542e-05, + "loss": 567.6096, + "step": 5730 + }, + { + "epoch": 0.11039469951582116, + "grad_norm": 3272.294026166464, + "learning_rate": 1.9496798788468077e-05, + "loss": 556.4454, + "step": 5740 + }, + { + "epoch": 0.11058702477630168, + "grad_norm": 3162.9779601410396, + "learning_rate": 1.9494885380317906e-05, + "loss": 561.9178, + "step": 5750 + }, + { + "epoch": 0.1107793500367822, + "grad_norm": 3087.8642492405775, + "learning_rate": 1.9492968435479744e-05, + "loss": 558.3578, + "step": 5760 + }, + { + "epoch": 0.11097167529726273, + "grad_norm": 3005.3395772840045, + "learning_rate": 1.949104795466762e-05, + "loss": 560.4387, + "step": 5770 + }, + { + "epoch": 0.11116400055774325, + "grad_norm": 3141.6960210526568, + "learning_rate": 1.9489123938596886e-05, + "loss": 547.4087, + "step": 5780 + }, + { + "epoch": 0.11135632581822379, + "grad_norm": 3268.057652349369, + "learning_rate": 1.94871963879842e-05, + "loss": 566.6952, + "step": 5790 + }, + { + "epoch": 0.1115486510787043, + "grad_norm": 3083.8145921229366, + "learning_rate": 1.9485265303547547e-05, + "loss": 555.4364, + "step": 5800 + }, + { + "epoch": 0.11174097633918482, + "grad_norm": 3448.086082615801, + "learning_rate": 1.9483330686006223e-05, + "loss": 566.9604, + "step": 5810 + }, + { + "epoch": 0.11193330159966536, + "grad_norm": 3247.335309053214, + "learning_rate": 1.948139253608084e-05, + "loss": 594.0994, + "step": 5820 + }, + { + "epoch": 0.11212562686014588, + "grad_norm": 3568.8222684517814, + "learning_rate": 1.9479450854493327e-05, + "loss": 558.641, + "step": 5830 + }, + { + "epoch": 0.1123179521206264, + "grad_norm": 3145.708312082184, + "learning_rate": 1.9477505641966933e-05, + "loss": 552.1804, + "step": 5840 + }, + { + "epoch": 0.11251027738110693, + "grad_norm": 3129.4430725024577, + "learning_rate": 1.9475556899226213e-05, + "loss": 558.4812, + "step": 5850 + }, + { + "epoch": 0.11270260264158745, + "grad_norm": 3558.355236191525, + "learning_rate": 1.9473604626997037e-05, + "loss": 550.7903, + "step": 5860 + }, + { + "epoch": 0.11289492790206798, + "grad_norm": 3372.746032867043, + "learning_rate": 1.94716488260066e-05, + "loss": 564.3837, + "step": 5870 + }, + { + "epoch": 0.1130872531625485, + "grad_norm": 2857.0565136248692, + "learning_rate": 1.946968949698341e-05, + "loss": 556.8066, + "step": 5880 + }, + { + "epoch": 0.11327957842302902, + "grad_norm": 3096.655666199947, + "learning_rate": 1.9467726640657277e-05, + "loss": 547.5038, + "step": 5890 + }, + { + "epoch": 0.11347190368350955, + "grad_norm": 3851.2534685833766, + "learning_rate": 1.9465760257759336e-05, + "loss": 553.2814, + "step": 5900 + }, + { + "epoch": 0.11366422894399007, + "grad_norm": 2864.5611039769974, + "learning_rate": 1.9463790349022027e-05, + "loss": 541.3181, + "step": 5910 + }, + { + "epoch": 0.1138565542044706, + "grad_norm": 3540.314022208594, + "learning_rate": 1.9461816915179117e-05, + "loss": 555.9016, + "step": 5920 + }, + { + "epoch": 0.11404887946495113, + "grad_norm": 2959.323534297358, + "learning_rate": 1.945983995696567e-05, + "loss": 560.4891, + "step": 5930 + }, + { + "epoch": 0.11424120472543164, + "grad_norm": 3245.220000408806, + "learning_rate": 1.9457859475118077e-05, + "loss": 551.865, + "step": 5940 + }, + { + "epoch": 0.11443352998591218, + "grad_norm": 3259.892769071125, + "learning_rate": 1.9455875470374027e-05, + "loss": 572.4262, + "step": 5950 + }, + { + "epoch": 0.1146258552463927, + "grad_norm": 3186.270876043077, + "learning_rate": 1.9453887943472532e-05, + "loss": 533.9896, + "step": 5960 + }, + { + "epoch": 0.11481818050687323, + "grad_norm": 3405.081323223352, + "learning_rate": 1.945189689515392e-05, + "loss": 577.4116, + "step": 5970 + }, + { + "epoch": 0.11501050576735375, + "grad_norm": 3057.157431474911, + "learning_rate": 1.9449902326159815e-05, + "loss": 539.4978, + "step": 5980 + }, + { + "epoch": 0.11520283102783427, + "grad_norm": 3090.1708643033476, + "learning_rate": 1.9447904237233164e-05, + "loss": 552.6425, + "step": 5990 + }, + { + "epoch": 0.1153951562883148, + "grad_norm": 3370.0202219698854, + "learning_rate": 1.9445902629118223e-05, + "loss": 543.0637, + "step": 6000 + }, + { + "epoch": 0.11558748154879532, + "grad_norm": 3353.0985821305353, + "learning_rate": 1.9443897502560555e-05, + "loss": 569.4459, + "step": 6010 + }, + { + "epoch": 0.11577980680927585, + "grad_norm": 3128.586725412783, + "learning_rate": 1.9441888858307042e-05, + "loss": 577.1365, + "step": 6020 + }, + { + "epoch": 0.11597213206975637, + "grad_norm": 3093.328840960538, + "learning_rate": 1.943987669710586e-05, + "loss": 540.1195, + "step": 6030 + }, + { + "epoch": 0.11616445733023689, + "grad_norm": 2811.620026745984, + "learning_rate": 1.9437861019706522e-05, + "loss": 541.466, + "step": 6040 + }, + { + "epoch": 0.11635678259071743, + "grad_norm": 3218.77476644157, + "learning_rate": 1.943584182685982e-05, + "loss": 561.2151, + "step": 6050 + }, + { + "epoch": 0.11654910785119794, + "grad_norm": 4422.75107123467, + "learning_rate": 1.9433819119317878e-05, + "loss": 556.3842, + "step": 6060 + }, + { + "epoch": 0.11674143311167846, + "grad_norm": 3247.898932738754, + "learning_rate": 1.9431792897834115e-05, + "loss": 541.4021, + "step": 6070 + }, + { + "epoch": 0.116933758372159, + "grad_norm": 3405.2837613700676, + "learning_rate": 1.9429763163163273e-05, + "loss": 559.3579, + "step": 6080 + }, + { + "epoch": 0.11712608363263952, + "grad_norm": 3133.3314475673415, + "learning_rate": 1.942772991606139e-05, + "loss": 534.2391, + "step": 6090 + }, + { + "epoch": 0.11731840889312005, + "grad_norm": 3360.2424027132392, + "learning_rate": 1.9425693157285816e-05, + "loss": 545.0727, + "step": 6100 + }, + { + "epoch": 0.11751073415360057, + "grad_norm": 3263.1690456164156, + "learning_rate": 1.942365288759521e-05, + "loss": 535.2984, + "step": 6110 + }, + { + "epoch": 0.11770305941408109, + "grad_norm": 3334.7830618196663, + "learning_rate": 1.9421609107749542e-05, + "loss": 560.9981, + "step": 6120 + }, + { + "epoch": 0.11789538467456162, + "grad_norm": 3400.2054641704676, + "learning_rate": 1.9419561818510085e-05, + "loss": 546.2139, + "step": 6130 + }, + { + "epoch": 0.11808770993504214, + "grad_norm": 2909.570257548634, + "learning_rate": 1.9417511020639416e-05, + "loss": 540.974, + "step": 6140 + }, + { + "epoch": 0.11828003519552267, + "grad_norm": 3034.877627466283, + "learning_rate": 1.9415456714901432e-05, + "loss": 554.5541, + "step": 6150 + }, + { + "epoch": 0.11847236045600319, + "grad_norm": 2997.168419008984, + "learning_rate": 1.941339890206132e-05, + "loss": 554.274, + "step": 6160 + }, + { + "epoch": 0.11866468571648371, + "grad_norm": 3087.6845287312244, + "learning_rate": 1.9411337582885587e-05, + "loss": 543.0096, + "step": 6170 + }, + { + "epoch": 0.11885701097696424, + "grad_norm": 3144.6303973691224, + "learning_rate": 1.9409272758142034e-05, + "loss": 531.5663, + "step": 6180 + }, + { + "epoch": 0.11904933623744476, + "grad_norm": 3103.4789362058136, + "learning_rate": 1.940720442859978e-05, + "loss": 546.0656, + "step": 6190 + }, + { + "epoch": 0.1192416614979253, + "grad_norm": 3025.615510900712, + "learning_rate": 1.940513259502924e-05, + "loss": 545.3059, + "step": 6200 + }, + { + "epoch": 0.11943398675840582, + "grad_norm": 3334.150702693289, + "learning_rate": 1.9403057258202144e-05, + "loss": 538.4965, + "step": 6210 + }, + { + "epoch": 0.11962631201888634, + "grad_norm": 3347.3229811869683, + "learning_rate": 1.940097841889151e-05, + "loss": 536.4763, + "step": 6220 + }, + { + "epoch": 0.11981863727936687, + "grad_norm": 2998.9760673185115, + "learning_rate": 1.939889607787168e-05, + "loss": 548.1066, + "step": 6230 + }, + { + "epoch": 0.12001096253984739, + "grad_norm": 3109.4795743976388, + "learning_rate": 1.9396810235918287e-05, + "loss": 543.2923, + "step": 6240 + }, + { + "epoch": 0.12020328780032792, + "grad_norm": 3232.870774546269, + "learning_rate": 1.939472089380828e-05, + "loss": 534.96, + "step": 6250 + }, + { + "epoch": 0.12039561306080844, + "grad_norm": 2978.4002590683954, + "learning_rate": 1.9392628052319895e-05, + "loss": 543.118, + "step": 6260 + }, + { + "epoch": 0.12058793832128896, + "grad_norm": 2919.3960325806106, + "learning_rate": 1.9390531712232687e-05, + "loss": 542.5694, + "step": 6270 + }, + { + "epoch": 0.12078026358176949, + "grad_norm": 2927.486233081042, + "learning_rate": 1.9388431874327504e-05, + "loss": 564.2301, + "step": 6280 + }, + { + "epoch": 0.12097258884225001, + "grad_norm": 3158.569297084666, + "learning_rate": 1.9386328539386502e-05, + "loss": 549.4159, + "step": 6290 + }, + { + "epoch": 0.12116491410273053, + "grad_norm": 2810.2562785000573, + "learning_rate": 1.938422170819314e-05, + "loss": 543.4707, + "step": 6300 + }, + { + "epoch": 0.12135723936321106, + "grad_norm": 3278.6095574682113, + "learning_rate": 1.938211138153218e-05, + "loss": 530.399, + "step": 6310 + }, + { + "epoch": 0.12154956462369158, + "grad_norm": 3300.947726198494, + "learning_rate": 1.9379997560189677e-05, + "loss": 533.7925, + "step": 6320 + }, + { + "epoch": 0.12174188988417212, + "grad_norm": 2896.6190321511403, + "learning_rate": 1.9377880244953e-05, + "loss": 530.0928, + "step": 6330 + }, + { + "epoch": 0.12193421514465264, + "grad_norm": 3128.138150461743, + "learning_rate": 1.9375759436610813e-05, + "loss": 539.4151, + "step": 6340 + }, + { + "epoch": 0.12212654040513315, + "grad_norm": 3188.0923666919643, + "learning_rate": 1.937363513595308e-05, + "loss": 536.5022, + "step": 6350 + }, + { + "epoch": 0.12231886566561369, + "grad_norm": 3007.3963405043382, + "learning_rate": 1.937150734377107e-05, + "loss": 520.4052, + "step": 6360 + }, + { + "epoch": 0.12251119092609421, + "grad_norm": 3084.954357271496, + "learning_rate": 1.9369376060857354e-05, + "loss": 520.9371, + "step": 6370 + }, + { + "epoch": 0.12270351618657474, + "grad_norm": 3083.7171664235193, + "learning_rate": 1.936724128800579e-05, + "loss": 543.9652, + "step": 6380 + }, + { + "epoch": 0.12289584144705526, + "grad_norm": 2909.9761982180416, + "learning_rate": 1.9365103026011555e-05, + "loss": 535.6667, + "step": 6390 + }, + { + "epoch": 0.12308816670753578, + "grad_norm": 4861.786739150577, + "learning_rate": 1.9362961275671112e-05, + "loss": 545.8405, + "step": 6400 + }, + { + "epoch": 0.12328049196801631, + "grad_norm": 3752.910423446671, + "learning_rate": 1.936081603778223e-05, + "loss": 555.2303, + "step": 6410 + }, + { + "epoch": 0.12347281722849683, + "grad_norm": 3161.1223359526, + "learning_rate": 1.9358667313143972e-05, + "loss": 533.0413, + "step": 6420 + }, + { + "epoch": 0.12366514248897736, + "grad_norm": 3084.112356224589, + "learning_rate": 1.93565151025567e-05, + "loss": 533.5978, + "step": 6430 + }, + { + "epoch": 0.12385746774945788, + "grad_norm": 2753.7211396992348, + "learning_rate": 1.9354359406822084e-05, + "loss": 538.9962, + "step": 6440 + }, + { + "epoch": 0.1240497930099384, + "grad_norm": 2830.5405856034, + "learning_rate": 1.935220022674308e-05, + "loss": 524.1744, + "step": 6450 + }, + { + "epoch": 0.12424211827041894, + "grad_norm": 2651.3192158256934, + "learning_rate": 1.9350037563123947e-05, + "loss": 510.2089, + "step": 6460 + }, + { + "epoch": 0.12443444353089946, + "grad_norm": 3168.1823016936264, + "learning_rate": 1.9347871416770245e-05, + "loss": 536.1155, + "step": 6470 + }, + { + "epoch": 0.12462676879137999, + "grad_norm": 3202.03309123853, + "learning_rate": 1.9345701788488825e-05, + "loss": 520.9955, + "step": 6480 + }, + { + "epoch": 0.12481909405186051, + "grad_norm": 2840.598505873904, + "learning_rate": 1.9343528679087837e-05, + "loss": 527.8342, + "step": 6490 + }, + { + "epoch": 0.12501141931234103, + "grad_norm": 3211.6566188190445, + "learning_rate": 1.934135208937673e-05, + "loss": 530.155, + "step": 6500 + }, + { + "epoch": 0.12520374457282155, + "grad_norm": 2955.8310508464415, + "learning_rate": 1.9339172020166245e-05, + "loss": 539.0327, + "step": 6510 + }, + { + "epoch": 0.1253960698333021, + "grad_norm": 2971.960397576802, + "learning_rate": 1.933698847226843e-05, + "loss": 531.7926, + "step": 6520 + }, + { + "epoch": 0.1255883950937826, + "grad_norm": 2881.037520428358, + "learning_rate": 1.9334801446496606e-05, + "loss": 518.636, + "step": 6530 + }, + { + "epoch": 0.12578072035426313, + "grad_norm": 2676.6782659120877, + "learning_rate": 1.933261094366542e-05, + "loss": 527.1354, + "step": 6540 + }, + { + "epoch": 0.12597304561474365, + "grad_norm": 2821.330151632555, + "learning_rate": 1.9330416964590787e-05, + "loss": 506.6257, + "step": 6550 + }, + { + "epoch": 0.12616537087522417, + "grad_norm": 3257.4503429609927, + "learning_rate": 1.932821951008993e-05, + "loss": 538.6153, + "step": 6560 + }, + { + "epoch": 0.12635769613570472, + "grad_norm": 2939.282591331584, + "learning_rate": 1.9326018580981373e-05, + "loss": 523.2369, + "step": 6570 + }, + { + "epoch": 0.12655002139618524, + "grad_norm": 2841.0672894326, + "learning_rate": 1.9323814178084914e-05, + "loss": 516.4375, + "step": 6580 + }, + { + "epoch": 0.12674234665666576, + "grad_norm": 3673.70106053849, + "learning_rate": 1.9321606302221662e-05, + "loss": 531.7859, + "step": 6590 + }, + { + "epoch": 0.12693467191714627, + "grad_norm": 2939.5291681456047, + "learning_rate": 1.9319394954214013e-05, + "loss": 529.4131, + "step": 6600 + }, + { + "epoch": 0.1271269971776268, + "grad_norm": 3374.2767657512536, + "learning_rate": 1.931718013488566e-05, + "loss": 525.3339, + "step": 6610 + }, + { + "epoch": 0.12731932243810734, + "grad_norm": 3028.5653610600807, + "learning_rate": 1.9314961845061584e-05, + "loss": 529.326, + "step": 6620 + }, + { + "epoch": 0.12751164769858786, + "grad_norm": 4369.605625187626, + "learning_rate": 1.9312740085568063e-05, + "loss": 537.9184, + "step": 6630 + }, + { + "epoch": 0.12770397295906838, + "grad_norm": 3241.5922469882184, + "learning_rate": 1.9310514857232666e-05, + "loss": 513.9442, + "step": 6640 + }, + { + "epoch": 0.1278962982195489, + "grad_norm": 2876.8227695115215, + "learning_rate": 1.930828616088425e-05, + "loss": 504.4727, + "step": 6650 + }, + { + "epoch": 0.12808862348002942, + "grad_norm": 2964.3805792671037, + "learning_rate": 1.9306053997352973e-05, + "loss": 513.8322, + "step": 6660 + }, + { + "epoch": 0.12828094874050994, + "grad_norm": 3096.749657798393, + "learning_rate": 1.9303818367470274e-05, + "loss": 511.4317, + "step": 6670 + }, + { + "epoch": 0.12847327400099048, + "grad_norm": 2890.5227620763667, + "learning_rate": 1.9301579272068894e-05, + "loss": 521.1064, + "step": 6680 + }, + { + "epoch": 0.128665599261471, + "grad_norm": 3158.701263857356, + "learning_rate": 1.9299336711982853e-05, + "loss": 518.0244, + "step": 6690 + }, + { + "epoch": 0.12885792452195152, + "grad_norm": 2940.1574261384253, + "learning_rate": 1.9297090688047473e-05, + "loss": 520.0789, + "step": 6700 + }, + { + "epoch": 0.12905024978243204, + "grad_norm": 2992.618765154942, + "learning_rate": 1.929484120109936e-05, + "loss": 514.3528, + "step": 6710 + }, + { + "epoch": 0.12924257504291256, + "grad_norm": 3020.590448166519, + "learning_rate": 1.9292588251976404e-05, + "loss": 508.5652, + "step": 6720 + }, + { + "epoch": 0.1294349003033931, + "grad_norm": 2856.572759086012, + "learning_rate": 1.92903318415178e-05, + "loss": 519.9399, + "step": 6730 + }, + { + "epoch": 0.12962722556387363, + "grad_norm": 2926.8951867528344, + "learning_rate": 1.9288071970564015e-05, + "loss": 532.937, + "step": 6740 + }, + { + "epoch": 0.12981955082435415, + "grad_norm": 3132.1509036620314, + "learning_rate": 1.9285808639956823e-05, + "loss": 522.9871, + "step": 6750 + }, + { + "epoch": 0.13001187608483467, + "grad_norm": 3174.114298022321, + "learning_rate": 1.9283541850539272e-05, + "loss": 515.1804, + "step": 6760 + }, + { + "epoch": 0.13020420134531518, + "grad_norm": 2742.0318887407143, + "learning_rate": 1.92812716031557e-05, + "loss": 509.1974, + "step": 6770 + }, + { + "epoch": 0.13039652660579573, + "grad_norm": 3301.8920060888313, + "learning_rate": 1.9278997898651746e-05, + "loss": 517.4712, + "step": 6780 + }, + { + "epoch": 0.13058885186627625, + "grad_norm": 2984.211272567207, + "learning_rate": 1.9276720737874327e-05, + "loss": 515.5563, + "step": 6790 + }, + { + "epoch": 0.13078117712675677, + "grad_norm": 2876.1911518086067, + "learning_rate": 1.9274440121671637e-05, + "loss": 503.4981, + "step": 6800 + }, + { + "epoch": 0.1309735023872373, + "grad_norm": 3045.9755222607732, + "learning_rate": 1.9272156050893173e-05, + "loss": 513.2415, + "step": 6810 + }, + { + "epoch": 0.1311658276477178, + "grad_norm": 3285.03843223816, + "learning_rate": 1.926986852638972e-05, + "loss": 513.935, + "step": 6820 + }, + { + "epoch": 0.13135815290819836, + "grad_norm": 3552.897797910797, + "learning_rate": 1.926757754901333e-05, + "loss": 506.6312, + "step": 6830 + }, + { + "epoch": 0.13155047816867887, + "grad_norm": 2892.9720519584675, + "learning_rate": 1.926528311961737e-05, + "loss": 511.6736, + "step": 6840 + }, + { + "epoch": 0.1317428034291594, + "grad_norm": 3018.4668902078124, + "learning_rate": 1.9262985239056463e-05, + "loss": 516.399, + "step": 6850 + }, + { + "epoch": 0.1319351286896399, + "grad_norm": 3492.7703250436643, + "learning_rate": 1.9260683908186544e-05, + "loss": 508.8771, + "step": 6860 + }, + { + "epoch": 0.13212745395012043, + "grad_norm": 2737.099406740913, + "learning_rate": 1.9258379127864808e-05, + "loss": 500.0156, + "step": 6870 + }, + { + "epoch": 0.13231977921060098, + "grad_norm": 2758.72034557184, + "learning_rate": 1.925607089894976e-05, + "loss": 502.6882, + "step": 6880 + }, + { + "epoch": 0.1325121044710815, + "grad_norm": 2729.260977484528, + "learning_rate": 1.9253759222301168e-05, + "loss": 504.7563, + "step": 6890 + }, + { + "epoch": 0.13270442973156202, + "grad_norm": 2790.5658413151054, + "learning_rate": 1.9251444098780095e-05, + "loss": 505.7114, + "step": 6900 + }, + { + "epoch": 0.13289675499204254, + "grad_norm": 2753.152180961606, + "learning_rate": 1.924912552924889e-05, + "loss": 506.7776, + "step": 6910 + }, + { + "epoch": 0.13308908025252306, + "grad_norm": 2947.5010065928886, + "learning_rate": 1.924680351457118e-05, + "loss": 512.5181, + "step": 6920 + }, + { + "epoch": 0.1332814055130036, + "grad_norm": 2686.4308328155576, + "learning_rate": 1.9244478055611875e-05, + "loss": 496.111, + "step": 6930 + }, + { + "epoch": 0.13347373077348412, + "grad_norm": 3210.7156628482157, + "learning_rate": 1.9242149153237175e-05, + "loss": 517.4425, + "step": 6940 + }, + { + "epoch": 0.13366605603396464, + "grad_norm": 3073.847999215363, + "learning_rate": 1.923981680831455e-05, + "loss": 494.8016, + "step": 6950 + }, + { + "epoch": 0.13385838129444516, + "grad_norm": 2991.491196704255, + "learning_rate": 1.923748102171277e-05, + "loss": 512.0045, + "step": 6960 + }, + { + "epoch": 0.13405070655492568, + "grad_norm": 3021.09219279553, + "learning_rate": 1.9235141794301867e-05, + "loss": 507.9525, + "step": 6970 + }, + { + "epoch": 0.13424303181540623, + "grad_norm": 3291.2066943333184, + "learning_rate": 1.9232799126953173e-05, + "loss": 508.618, + "step": 6980 + }, + { + "epoch": 0.13443535707588675, + "grad_norm": 3267.4846526856877, + "learning_rate": 1.9230453020539285e-05, + "loss": 510.1736, + "step": 6990 + }, + { + "epoch": 0.13462768233636727, + "grad_norm": 2889.519349382989, + "learning_rate": 1.9228103475934096e-05, + "loss": 493.2681, + "step": 7000 + }, + { + "epoch": 0.13482000759684779, + "grad_norm": 3029.048574747814, + "learning_rate": 1.9225750494012767e-05, + "loss": 514.1222, + "step": 7010 + }, + { + "epoch": 0.1350123328573283, + "grad_norm": 2980.365910026588, + "learning_rate": 1.9223394075651748e-05, + "loss": 513.1449, + "step": 7020 + }, + { + "epoch": 0.13520465811780885, + "grad_norm": 3209.5849732677316, + "learning_rate": 1.9221034221728764e-05, + "loss": 501.2741, + "step": 7030 + }, + { + "epoch": 0.13539698337828937, + "grad_norm": 2934.9755109061402, + "learning_rate": 1.9218670933122826e-05, + "loss": 508.1023, + "step": 7040 + }, + { + "epoch": 0.1355893086387699, + "grad_norm": 2669.4887685364515, + "learning_rate": 1.9216304210714213e-05, + "loss": 494.1592, + "step": 7050 + }, + { + "epoch": 0.1357816338992504, + "grad_norm": 3165.3816885313468, + "learning_rate": 1.9213934055384498e-05, + "loss": 527.2488, + "step": 7060 + }, + { + "epoch": 0.13597395915973093, + "grad_norm": 2931.5462485311364, + "learning_rate": 1.9211560468016516e-05, + "loss": 512.6491, + "step": 7070 + }, + { + "epoch": 0.13616628442021148, + "grad_norm": 3087.9201699188498, + "learning_rate": 1.9209183449494397e-05, + "loss": 498.9521, + "step": 7080 + }, + { + "epoch": 0.136358609680692, + "grad_norm": 2972.71779101732, + "learning_rate": 1.9206803000703534e-05, + "loss": 505.3806, + "step": 7090 + }, + { + "epoch": 0.1365509349411725, + "grad_norm": 3005.4594268142446, + "learning_rate": 1.9204419122530614e-05, + "loss": 502.5931, + "step": 7100 + }, + { + "epoch": 0.13674326020165303, + "grad_norm": 2924.4183496973938, + "learning_rate": 1.9202031815863583e-05, + "loss": 501.4909, + "step": 7110 + }, + { + "epoch": 0.13693558546213355, + "grad_norm": 2793.8027483759365, + "learning_rate": 1.919964108159168e-05, + "loss": 496.083, + "step": 7120 + }, + { + "epoch": 0.13712791072261407, + "grad_norm": 2695.7998871082573, + "learning_rate": 1.9197246920605408e-05, + "loss": 485.6467, + "step": 7130 + }, + { + "epoch": 0.13732023598309462, + "grad_norm": 2966.9919464025984, + "learning_rate": 1.9194849333796557e-05, + "loss": 496.4655, + "step": 7140 + }, + { + "epoch": 0.13751256124357514, + "grad_norm": 2909.544915209876, + "learning_rate": 1.9192448322058187e-05, + "loss": 505.7636, + "step": 7150 + }, + { + "epoch": 0.13770488650405566, + "grad_norm": 2969.974367324485, + "learning_rate": 1.9190043886284635e-05, + "loss": 495.1523, + "step": 7160 + }, + { + "epoch": 0.13789721176453618, + "grad_norm": 3199.5117549484075, + "learning_rate": 1.9187636027371518e-05, + "loss": 486.4546, + "step": 7170 + }, + { + "epoch": 0.1380895370250167, + "grad_norm": 3568.3250452673274, + "learning_rate": 1.9185224746215714e-05, + "loss": 513.3837, + "step": 7180 + }, + { + "epoch": 0.13828186228549724, + "grad_norm": 3190.2351081798324, + "learning_rate": 1.9182810043715394e-05, + "loss": 492.3429, + "step": 7190 + }, + { + "epoch": 0.13847418754597776, + "grad_norm": 3010.424687603582, + "learning_rate": 1.9180391920769993e-05, + "loss": 491.3337, + "step": 7200 + }, + { + "epoch": 0.13866651280645828, + "grad_norm": 2856.1887310591183, + "learning_rate": 1.9177970378280215e-05, + "loss": 498.1321, + "step": 7210 + }, + { + "epoch": 0.1388588380669388, + "grad_norm": 3170.294558194702, + "learning_rate": 1.9175545417148056e-05, + "loss": 508.6399, + "step": 7220 + }, + { + "epoch": 0.13905116332741932, + "grad_norm": 2831.0199704246793, + "learning_rate": 1.9173117038276766e-05, + "loss": 501.2726, + "step": 7230 + }, + { + "epoch": 0.13924348858789987, + "grad_norm": 3381.9769536278372, + "learning_rate": 1.9170685242570878e-05, + "loss": 508.0261, + "step": 7240 + }, + { + "epoch": 0.13943581384838039, + "grad_norm": 2629.6630742994976, + "learning_rate": 1.9168250030936195e-05, + "loss": 489.3669, + "step": 7250 + }, + { + "epoch": 0.1396281391088609, + "grad_norm": 3515.715591808194, + "learning_rate": 1.91658114042798e-05, + "loss": 497.8372, + "step": 7260 + }, + { + "epoch": 0.13982046436934142, + "grad_norm": 3297.661932599785, + "learning_rate": 1.9163369363510026e-05, + "loss": 493.3461, + "step": 7270 + }, + { + "epoch": 0.14001278962982194, + "grad_norm": 2840.2594851903527, + "learning_rate": 1.916092390953651e-05, + "loss": 498.9034, + "step": 7280 + }, + { + "epoch": 0.1402051148903025, + "grad_norm": 2999.0256055345612, + "learning_rate": 1.915847504327013e-05, + "loss": 493.2006, + "step": 7290 + }, + { + "epoch": 0.140397440150783, + "grad_norm": 2751.6678060885597, + "learning_rate": 1.9156022765623057e-05, + "loss": 490.7306, + "step": 7300 + }, + { + "epoch": 0.14058976541126353, + "grad_norm": 3201.114508962532, + "learning_rate": 1.9153567077508718e-05, + "loss": 483.6938, + "step": 7310 + }, + { + "epoch": 0.14078209067174405, + "grad_norm": 2792.437226101636, + "learning_rate": 1.9151107979841824e-05, + "loss": 489.7729, + "step": 7320 + }, + { + "epoch": 0.14097441593222457, + "grad_norm": 2962.013811565237, + "learning_rate": 1.9148645473538338e-05, + "loss": 478.1479, + "step": 7330 + }, + { + "epoch": 0.14116674119270511, + "grad_norm": 2908.3890528948004, + "learning_rate": 1.9146179559515507e-05, + "loss": 484.4061, + "step": 7340 + }, + { + "epoch": 0.14135906645318563, + "grad_norm": 2962.249440808847, + "learning_rate": 1.9143710238691847e-05, + "loss": 485.9969, + "step": 7350 + }, + { + "epoch": 0.14155139171366615, + "grad_norm": 2513.2790256078592, + "learning_rate": 1.9141237511987137e-05, + "loss": 477.8858, + "step": 7360 + }, + { + "epoch": 0.14174371697414667, + "grad_norm": 2706.841797558357, + "learning_rate": 1.9138761380322425e-05, + "loss": 497.3392, + "step": 7370 + }, + { + "epoch": 0.1419360422346272, + "grad_norm": 2925.6397387295415, + "learning_rate": 1.913628184462003e-05, + "loss": 489.3872, + "step": 7380 + }, + { + "epoch": 0.14212836749510774, + "grad_norm": 4446.849487432294, + "learning_rate": 1.913379890580354e-05, + "loss": 495.8265, + "step": 7390 + }, + { + "epoch": 0.14232069275558826, + "grad_norm": 2791.091878429826, + "learning_rate": 1.9131312564797805e-05, + "loss": 489.4853, + "step": 7400 + }, + { + "epoch": 0.14251301801606878, + "grad_norm": 2626.5949527078046, + "learning_rate": 1.912882282252895e-05, + "loss": 475.7432, + "step": 7410 + }, + { + "epoch": 0.1427053432765493, + "grad_norm": 3383.8341351294794, + "learning_rate": 1.9126329679924364e-05, + "loss": 496.1304, + "step": 7420 + }, + { + "epoch": 0.14289766853702981, + "grad_norm": 2978.863005353464, + "learning_rate": 1.9123833137912693e-05, + "loss": 489.6156, + "step": 7430 + }, + { + "epoch": 0.14308999379751036, + "grad_norm": 2849.684134046931, + "learning_rate": 1.9121333197423867e-05, + "loss": 512.0472, + "step": 7440 + }, + { + "epoch": 0.14328231905799088, + "grad_norm": 3088.67894949292, + "learning_rate": 1.9118829859389067e-05, + "loss": 502.16, + "step": 7450 + }, + { + "epoch": 0.1434746443184714, + "grad_norm": 2819.8772951772203, + "learning_rate": 1.9116323124740748e-05, + "loss": 485.5934, + "step": 7460 + }, + { + "epoch": 0.14366696957895192, + "grad_norm": 2773.713141370265, + "learning_rate": 1.9113812994412627e-05, + "loss": 490.0072, + "step": 7470 + }, + { + "epoch": 0.14385929483943244, + "grad_norm": 2833.560775422032, + "learning_rate": 1.911129946933968e-05, + "loss": 492.3943, + "step": 7480 + }, + { + "epoch": 0.14405162009991299, + "grad_norm": 2598.159765312937, + "learning_rate": 1.9108782550458164e-05, + "loss": 501.1888, + "step": 7490 + }, + { + "epoch": 0.1442439453603935, + "grad_norm": 2947.8731284449723, + "learning_rate": 1.9106262238705583e-05, + "loss": 495.5496, + "step": 7500 + }, + { + "epoch": 0.14443627062087402, + "grad_norm": 2674.6330467886787, + "learning_rate": 1.9103738535020713e-05, + "loss": 494.459, + "step": 7510 + }, + { + "epoch": 0.14462859588135454, + "grad_norm": 2855.64514682562, + "learning_rate": 1.910121144034359e-05, + "loss": 478.4898, + "step": 7520 + }, + { + "epoch": 0.14482092114183506, + "grad_norm": 3014.0201415361366, + "learning_rate": 1.909868095561552e-05, + "loss": 481.9886, + "step": 7530 + }, + { + "epoch": 0.1450132464023156, + "grad_norm": 2834.7522007383313, + "learning_rate": 1.9096147081779063e-05, + "loss": 494.806, + "step": 7540 + }, + { + "epoch": 0.14520557166279613, + "grad_norm": 2843.4863006339915, + "learning_rate": 1.9093609819778044e-05, + "loss": 482.2569, + "step": 7550 + }, + { + "epoch": 0.14539789692327665, + "grad_norm": 2734.9134302578213, + "learning_rate": 1.9091069170557554e-05, + "loss": 489.1965, + "step": 7560 + }, + { + "epoch": 0.14559022218375717, + "grad_norm": 2970.862474311478, + "learning_rate": 1.9088525135063944e-05, + "loss": 484.0156, + "step": 7570 + }, + { + "epoch": 0.1457825474442377, + "grad_norm": 2527.0690158392335, + "learning_rate": 1.9085977714244822e-05, + "loss": 482.1195, + "step": 7580 + }, + { + "epoch": 0.1459748727047182, + "grad_norm": 3162.881517394372, + "learning_rate": 1.908342690904906e-05, + "loss": 492.1695, + "step": 7590 + }, + { + "epoch": 0.14616719796519875, + "grad_norm": 3077.277460070853, + "learning_rate": 1.9080872720426793e-05, + "loss": 483.6169, + "step": 7600 + }, + { + "epoch": 0.14635952322567927, + "grad_norm": 2742.044614693097, + "learning_rate": 1.9078315149329413e-05, + "loss": 461.6744, + "step": 7610 + }, + { + "epoch": 0.1465518484861598, + "grad_norm": 2681.675491050549, + "learning_rate": 1.9075754196709574e-05, + "loss": 490.2057, + "step": 7620 + }, + { + "epoch": 0.1467441737466403, + "grad_norm": 2626.59863424605, + "learning_rate": 1.9073189863521184e-05, + "loss": 477.6793, + "step": 7630 + }, + { + "epoch": 0.14693649900712083, + "grad_norm": 2643.2531040384692, + "learning_rate": 1.9070622150719423e-05, + "loss": 482.2477, + "step": 7640 + }, + { + "epoch": 0.14712882426760138, + "grad_norm": 3172.2098393406486, + "learning_rate": 1.9068051059260716e-05, + "loss": 481.8371, + "step": 7650 + }, + { + "epoch": 0.1473211495280819, + "grad_norm": 3256.479733327193, + "learning_rate": 1.9065476590102752e-05, + "loss": 491.9813, + "step": 7660 + }, + { + "epoch": 0.14751347478856242, + "grad_norm": 2630.5854410247307, + "learning_rate": 1.906289874420448e-05, + "loss": 498.6755, + "step": 7670 + }, + { + "epoch": 0.14770580004904293, + "grad_norm": 2748.258611251968, + "learning_rate": 1.9060317522526105e-05, + "loss": 470.4862, + "step": 7680 + }, + { + "epoch": 0.14789812530952345, + "grad_norm": 2775.263570459274, + "learning_rate": 1.905773292602909e-05, + "loss": 485.3623, + "step": 7690 + }, + { + "epoch": 0.148090450570004, + "grad_norm": 2886.085140627227, + "learning_rate": 1.905514495567615e-05, + "loss": 476.224, + "step": 7700 + }, + { + "epoch": 0.14828277583048452, + "grad_norm": 2702.3882118156494, + "learning_rate": 1.9052553612431268e-05, + "loss": 496.2374, + "step": 7710 + }, + { + "epoch": 0.14847510109096504, + "grad_norm": 2755.746818929295, + "learning_rate": 1.9049958897259674e-05, + "loss": 473.6089, + "step": 7720 + }, + { + "epoch": 0.14866742635144556, + "grad_norm": 2893.7558399190953, + "learning_rate": 1.904736081112785e-05, + "loss": 482.383, + "step": 7730 + }, + { + "epoch": 0.14885975161192608, + "grad_norm": 2613.4543216423335, + "learning_rate": 1.9044759355003552e-05, + "loss": 487.0586, + "step": 7740 + }, + { + "epoch": 0.14905207687240662, + "grad_norm": 2880.935110610246, + "learning_rate": 1.904215452985577e-05, + "loss": 473.3734, + "step": 7750 + }, + { + "epoch": 0.14924440213288714, + "grad_norm": 3768.0153704315408, + "learning_rate": 1.9039546336654765e-05, + "loss": 503.639, + "step": 7760 + }, + { + "epoch": 0.14943672739336766, + "grad_norm": 3132.526013928591, + "learning_rate": 1.903693477637204e-05, + "loss": 470.5541, + "step": 7770 + }, + { + "epoch": 0.14962905265384818, + "grad_norm": 2774.32343686497, + "learning_rate": 1.903431984998036e-05, + "loss": 474.7524, + "step": 7780 + }, + { + "epoch": 0.1498213779143287, + "grad_norm": 2938.971764833828, + "learning_rate": 1.9031701558453747e-05, + "loss": 498.0345, + "step": 7790 + }, + { + "epoch": 0.15001370317480925, + "grad_norm": 3693.358094860785, + "learning_rate": 1.902907990276746e-05, + "loss": 458.7667, + "step": 7800 + }, + { + "epoch": 0.15020602843528977, + "grad_norm": 3306.4578035535965, + "learning_rate": 1.9026454883898036e-05, + "loss": 486.2284, + "step": 7810 + }, + { + "epoch": 0.1503983536957703, + "grad_norm": 3100.144077778144, + "learning_rate": 1.902382650282324e-05, + "loss": 467.8271, + "step": 7820 + }, + { + "epoch": 0.1505906789562508, + "grad_norm": 2732.474309499422, + "learning_rate": 1.902119476052211e-05, + "loss": 461.508, + "step": 7830 + }, + { + "epoch": 0.15078300421673133, + "grad_norm": 2770.421004144993, + "learning_rate": 1.9018559657974918e-05, + "loss": 476.7328, + "step": 7840 + }, + { + "epoch": 0.15097532947721187, + "grad_norm": 2909.490670250935, + "learning_rate": 1.90159211961632e-05, + "loss": 530.7372, + "step": 7850 + }, + { + "epoch": 0.1511676547376924, + "grad_norm": 3188.832267172363, + "learning_rate": 1.901327937606974e-05, + "loss": 502.5419, + "step": 7860 + }, + { + "epoch": 0.1513599799981729, + "grad_norm": 2612.9993956686194, + "learning_rate": 1.901063419867857e-05, + "loss": 486.2078, + "step": 7870 + }, + { + "epoch": 0.15155230525865343, + "grad_norm": 2747.7614659293913, + "learning_rate": 1.900798566497498e-05, + "loss": 474.4527, + "step": 7880 + }, + { + "epoch": 0.15174463051913395, + "grad_norm": 2509.4355263152893, + "learning_rate": 1.9005333775945496e-05, + "loss": 479.4521, + "step": 7890 + }, + { + "epoch": 0.1519369557796145, + "grad_norm": 2732.3112735518544, + "learning_rate": 1.9002678532577915e-05, + "loss": 469.8596, + "step": 7900 + }, + { + "epoch": 0.15212928104009502, + "grad_norm": 2649.1751809974876, + "learning_rate": 1.900001993586126e-05, + "loss": 481.8939, + "step": 7910 + }, + { + "epoch": 0.15232160630057553, + "grad_norm": 2677.0014270735664, + "learning_rate": 1.8997357986785822e-05, + "loss": 489.0299, + "step": 7920 + }, + { + "epoch": 0.15251393156105605, + "grad_norm": 2700.5217249604525, + "learning_rate": 1.899469268634313e-05, + "loss": 481.1306, + "step": 7930 + }, + { + "epoch": 0.15270625682153657, + "grad_norm": 2508.298814733503, + "learning_rate": 1.8992024035525964e-05, + "loss": 463.1267, + "step": 7940 + }, + { + "epoch": 0.15289858208201712, + "grad_norm": 2981.2789852308065, + "learning_rate": 1.8989352035328352e-05, + "loss": 477.5609, + "step": 7950 + }, + { + "epoch": 0.15309090734249764, + "grad_norm": 2891.7242881516418, + "learning_rate": 1.8986676686745572e-05, + "loss": 473.6297, + "step": 7960 + }, + { + "epoch": 0.15328323260297816, + "grad_norm": 2879.0509933380317, + "learning_rate": 1.8983997990774145e-05, + "loss": 494.6183, + "step": 7970 + }, + { + "epoch": 0.15347555786345868, + "grad_norm": 2763.657335141747, + "learning_rate": 1.8981315948411842e-05, + "loss": 486.7721, + "step": 7980 + }, + { + "epoch": 0.1536678831239392, + "grad_norm": 3176.949500486039, + "learning_rate": 1.897863056065768e-05, + "loss": 490.7333, + "step": 7990 + }, + { + "epoch": 0.15386020838441974, + "grad_norm": 2831.83018369395, + "learning_rate": 1.8975941828511923e-05, + "loss": 474.9769, + "step": 8000 + }, + { + "epoch": 0.15405253364490026, + "grad_norm": 2898.392383273504, + "learning_rate": 1.8973249752976075e-05, + "loss": 474.2976, + "step": 8010 + }, + { + "epoch": 0.15424485890538078, + "grad_norm": 2836.3775964522815, + "learning_rate": 1.8970554335052897e-05, + "loss": 464.9511, + "step": 8020 + }, + { + "epoch": 0.1544371841658613, + "grad_norm": 2788.359892750998, + "learning_rate": 1.8967855575746375e-05, + "loss": 467.5968, + "step": 8030 + }, + { + "epoch": 0.15462950942634182, + "grad_norm": 2674.441048869643, + "learning_rate": 1.8965153476061763e-05, + "loss": 475.4356, + "step": 8040 + }, + { + "epoch": 0.15482183468682234, + "grad_norm": 2820.9973278343778, + "learning_rate": 1.896244803700555e-05, + "loss": 470.7019, + "step": 8050 + }, + { + "epoch": 0.1550141599473029, + "grad_norm": 2833.5184081204247, + "learning_rate": 1.8959739259585458e-05, + "loss": 467.0874, + "step": 8060 + }, + { + "epoch": 0.1552064852077834, + "grad_norm": 2670.554800576522, + "learning_rate": 1.895702714481047e-05, + "loss": 454.6197, + "step": 8070 + }, + { + "epoch": 0.15539881046826393, + "grad_norm": 2654.2861147762346, + "learning_rate": 1.8954311693690798e-05, + "loss": 450.6652, + "step": 8080 + }, + { + "epoch": 0.15559113572874445, + "grad_norm": 2885.771301584405, + "learning_rate": 1.8951592907237906e-05, + "loss": 493.7055, + "step": 8090 + }, + { + "epoch": 0.15578346098922496, + "grad_norm": 3661.354185149759, + "learning_rate": 1.8948870786464496e-05, + "loss": 486.4298, + "step": 8100 + }, + { + "epoch": 0.1559757862497055, + "grad_norm": 2794.957543952636, + "learning_rate": 1.8946145332384515e-05, + "loss": 455.7606, + "step": 8110 + }, + { + "epoch": 0.15616811151018603, + "grad_norm": 2789.9608572044913, + "learning_rate": 1.8943416546013148e-05, + "loss": 482.6221, + "step": 8120 + }, + { + "epoch": 0.15636043677066655, + "grad_norm": 2402.1141955166977, + "learning_rate": 1.894068442836682e-05, + "loss": 466.4746, + "step": 8130 + }, + { + "epoch": 0.15655276203114707, + "grad_norm": 3050.247166384424, + "learning_rate": 1.8937948980463207e-05, + "loss": 463.4685, + "step": 8140 + }, + { + "epoch": 0.1567450872916276, + "grad_norm": 2976.425797495626, + "learning_rate": 1.893521020332121e-05, + "loss": 481.4321, + "step": 8150 + }, + { + "epoch": 0.15693741255210814, + "grad_norm": 3450.0250925142286, + "learning_rate": 1.8932468097960988e-05, + "loss": 462.0546, + "step": 8160 + }, + { + "epoch": 0.15712973781258865, + "grad_norm": 2702.7549440163066, + "learning_rate": 1.892972266540392e-05, + "loss": 448.9424, + "step": 8170 + }, + { + "epoch": 0.15732206307306917, + "grad_norm": 2596.001292073025, + "learning_rate": 1.8926973906672635e-05, + "loss": 454.9815, + "step": 8180 + }, + { + "epoch": 0.1575143883335497, + "grad_norm": 3009.8509523185962, + "learning_rate": 1.892422182279101e-05, + "loss": 471.6159, + "step": 8190 + }, + { + "epoch": 0.1577067135940302, + "grad_norm": 2652.4358538229417, + "learning_rate": 1.892146641478414e-05, + "loss": 467.1265, + "step": 8200 + }, + { + "epoch": 0.15789903885451076, + "grad_norm": 2495.2359093695, + "learning_rate": 1.8918707683678376e-05, + "loss": 452.5336, + "step": 8210 + }, + { + "epoch": 0.15809136411499128, + "grad_norm": 3007.1314531523863, + "learning_rate": 1.8915945630501296e-05, + "loss": 462.3906, + "step": 8220 + }, + { + "epoch": 0.1582836893754718, + "grad_norm": 2798.396673862514, + "learning_rate": 1.8913180256281723e-05, + "loss": 463.8158, + "step": 8230 + }, + { + "epoch": 0.15847601463595232, + "grad_norm": 2544.692471398825, + "learning_rate": 1.8910411562049706e-05, + "loss": 454.6521, + "step": 8240 + }, + { + "epoch": 0.15866833989643284, + "grad_norm": 2527.1761584761894, + "learning_rate": 1.8907639548836548e-05, + "loss": 459.9999, + "step": 8250 + }, + { + "epoch": 0.15886066515691338, + "grad_norm": 2723.1644502431964, + "learning_rate": 1.8904864217674766e-05, + "loss": 484.4615, + "step": 8260 + }, + { + "epoch": 0.1590529904173939, + "grad_norm": 2496.344877980351, + "learning_rate": 1.8902085569598136e-05, + "loss": 452.6649, + "step": 8270 + }, + { + "epoch": 0.15924531567787442, + "grad_norm": 2496.2472071614447, + "learning_rate": 1.889930360564165e-05, + "loss": 454.2822, + "step": 8280 + }, + { + "epoch": 0.15943764093835494, + "grad_norm": 2593.1348017621613, + "learning_rate": 1.8896518326841554e-05, + "loss": 447.8645, + "step": 8290 + }, + { + "epoch": 0.15962996619883546, + "grad_norm": 2661.784519983517, + "learning_rate": 1.889372973423531e-05, + "loss": 460.3713, + "step": 8300 + }, + { + "epoch": 0.159822291459316, + "grad_norm": 2438.8354895739203, + "learning_rate": 1.889093782886162e-05, + "loss": 457.8413, + "step": 8310 + }, + { + "epoch": 0.16001461671979653, + "grad_norm": 2714.4516476368362, + "learning_rate": 1.8888142611760433e-05, + "loss": 453.8086, + "step": 8320 + }, + { + "epoch": 0.16020694198027705, + "grad_norm": 3906.6661312224155, + "learning_rate": 1.8885344083972912e-05, + "loss": 453.6644, + "step": 8330 + }, + { + "epoch": 0.16039926724075756, + "grad_norm": 3294.581965773663, + "learning_rate": 1.8882542246541468e-05, + "loss": 487.4707, + "step": 8340 + }, + { + "epoch": 0.16059159250123808, + "grad_norm": 2935.270245095748, + "learning_rate": 1.887973710050974e-05, + "loss": 477.0995, + "step": 8350 + }, + { + "epoch": 0.16078391776171863, + "grad_norm": 2575.6822947435367, + "learning_rate": 1.887692864692259e-05, + "loss": 451.8498, + "step": 8360 + }, + { + "epoch": 0.16097624302219915, + "grad_norm": 2546.8471243887843, + "learning_rate": 1.887411688682613e-05, + "loss": 471.3553, + "step": 8370 + }, + { + "epoch": 0.16116856828267967, + "grad_norm": 2804.982756489293, + "learning_rate": 1.887130182126769e-05, + "loss": 469.4985, + "step": 8380 + }, + { + "epoch": 0.1613608935431602, + "grad_norm": 2934.456138274911, + "learning_rate": 1.8868483451295835e-05, + "loss": 463.9745, + "step": 8390 + }, + { + "epoch": 0.1615532188036407, + "grad_norm": 2794.9079840955537, + "learning_rate": 1.8865661777960366e-05, + "loss": 455.6153, + "step": 8400 + }, + { + "epoch": 0.16174554406412125, + "grad_norm": 2621.9513638770195, + "learning_rate": 1.88628368023123e-05, + "loss": 463.6387, + "step": 8410 + }, + { + "epoch": 0.16193786932460177, + "grad_norm": 7409.282526206722, + "learning_rate": 1.8860008525403903e-05, + "loss": 488.0506, + "step": 8420 + }, + { + "epoch": 0.1621301945850823, + "grad_norm": 2552.0709512542307, + "learning_rate": 1.885717694828866e-05, + "loss": 460.1391, + "step": 8430 + }, + { + "epoch": 0.1623225198455628, + "grad_norm": 2642.468928233332, + "learning_rate": 1.8854342072021282e-05, + "loss": 444.1377, + "step": 8440 + }, + { + "epoch": 0.16251484510604333, + "grad_norm": 2516.648950037554, + "learning_rate": 1.8851503897657717e-05, + "loss": 459.1028, + "step": 8450 + }, + { + "epoch": 0.16270717036652388, + "grad_norm": 2536.2265636656502, + "learning_rate": 1.8848662426255135e-05, + "loss": 468.7705, + "step": 8460 + }, + { + "epoch": 0.1628994956270044, + "grad_norm": 2614.4965010157607, + "learning_rate": 1.8845817658871942e-05, + "loss": 461.9733, + "step": 8470 + }, + { + "epoch": 0.16309182088748492, + "grad_norm": 2486.652627734411, + "learning_rate": 1.8842969596567765e-05, + "loss": 470.0281, + "step": 8480 + }, + { + "epoch": 0.16328414614796544, + "grad_norm": 2544.035398439454, + "learning_rate": 1.884011824040346e-05, + "loss": 479.6681, + "step": 8490 + }, + { + "epoch": 0.16347647140844596, + "grad_norm": 3201.5775268815228, + "learning_rate": 1.883726359144111e-05, + "loss": 464.7123, + "step": 8500 + }, + { + "epoch": 0.16366879666892648, + "grad_norm": 2701.606297904168, + "learning_rate": 1.8834405650744023e-05, + "loss": 454.8366, + "step": 8510 + }, + { + "epoch": 0.16386112192940702, + "grad_norm": 2836.1064810811786, + "learning_rate": 1.883154441937674e-05, + "loss": 465.3839, + "step": 8520 + }, + { + "epoch": 0.16405344718988754, + "grad_norm": 2525.191898053846, + "learning_rate": 1.8828679898405015e-05, + "loss": 444.021, + "step": 8530 + }, + { + "epoch": 0.16424577245036806, + "grad_norm": 2684.247548099907, + "learning_rate": 1.8825812088895835e-05, + "loss": 463.5473, + "step": 8540 + }, + { + "epoch": 0.16443809771084858, + "grad_norm": 2726.5514179263896, + "learning_rate": 1.882294099191742e-05, + "loss": 463.1741, + "step": 8550 + }, + { + "epoch": 0.1646304229713291, + "grad_norm": 2651.7242239650836, + "learning_rate": 1.88200666085392e-05, + "loss": 452.2353, + "step": 8560 + }, + { + "epoch": 0.16482274823180965, + "grad_norm": 2604.6174059849163, + "learning_rate": 1.8817188939831838e-05, + "loss": 462.9798, + "step": 8570 + }, + { + "epoch": 0.16501507349229017, + "grad_norm": 3034.7929991319465, + "learning_rate": 1.8814307986867214e-05, + "loss": 442.6848, + "step": 8580 + }, + { + "epoch": 0.16520739875277068, + "grad_norm": 2748.234644255372, + "learning_rate": 1.881142375071844e-05, + "loss": 435.3604, + "step": 8590 + }, + { + "epoch": 0.1653997240132512, + "grad_norm": 2939.0726535785348, + "learning_rate": 1.8808536232459844e-05, + "loss": 463.7852, + "step": 8600 + }, + { + "epoch": 0.16559204927373172, + "grad_norm": 2461.040963357909, + "learning_rate": 1.8805645433166976e-05, + "loss": 467.5215, + "step": 8610 + }, + { + "epoch": 0.16578437453421227, + "grad_norm": 2889.624399623839, + "learning_rate": 1.8802751353916618e-05, + "loss": 462.004, + "step": 8620 + }, + { + "epoch": 0.1659766997946928, + "grad_norm": 2518.8337305606265, + "learning_rate": 1.8799853995786763e-05, + "loss": 454.456, + "step": 8630 + }, + { + "epoch": 0.1661690250551733, + "grad_norm": 2468.759031477647, + "learning_rate": 1.8796953359856626e-05, + "loss": 433.6745, + "step": 8640 + }, + { + "epoch": 0.16636135031565383, + "grad_norm": 2723.029927952221, + "learning_rate": 1.879404944720665e-05, + "loss": 451.1816, + "step": 8650 + }, + { + "epoch": 0.16655367557613435, + "grad_norm": 2688.333294514795, + "learning_rate": 1.8791142258918496e-05, + "loss": 452.7185, + "step": 8660 + }, + { + "epoch": 0.1667460008366149, + "grad_norm": 2525.239445850532, + "learning_rate": 1.8788231796075037e-05, + "loss": 468.3976, + "step": 8670 + }, + { + "epoch": 0.1669383260970954, + "grad_norm": 2394.840633479949, + "learning_rate": 1.8785318059760384e-05, + "loss": 440.3838, + "step": 8680 + }, + { + "epoch": 0.16713065135757593, + "grad_norm": 2657.3516753061485, + "learning_rate": 1.8782401051059838e-05, + "loss": 446.2088, + "step": 8690 + }, + { + "epoch": 0.16732297661805645, + "grad_norm": 2572.4322494026887, + "learning_rate": 1.8779480771059954e-05, + "loss": 433.9772, + "step": 8700 + }, + { + "epoch": 0.16751530187853697, + "grad_norm": 2532.5718843841155, + "learning_rate": 1.8776557220848477e-05, + "loss": 445.3365, + "step": 8710 + }, + { + "epoch": 0.16770762713901752, + "grad_norm": 22990.43305437463, + "learning_rate": 1.8773630401514388e-05, + "loss": 457.1438, + "step": 8720 + }, + { + "epoch": 0.16789995239949804, + "grad_norm": 3069.2404349042918, + "learning_rate": 1.877070031414787e-05, + "loss": 459.5575, + "step": 8730 + }, + { + "epoch": 0.16809227765997856, + "grad_norm": 3377.1212825848525, + "learning_rate": 1.876776695984034e-05, + "loss": 444.4089, + "step": 8740 + }, + { + "epoch": 0.16828460292045908, + "grad_norm": 2746.360379939743, + "learning_rate": 1.8764830339684426e-05, + "loss": 441.5095, + "step": 8750 + }, + { + "epoch": 0.1684769281809396, + "grad_norm": 2821.7195242780617, + "learning_rate": 1.8761890454773965e-05, + "loss": 449.9007, + "step": 8760 + }, + { + "epoch": 0.16866925344142014, + "grad_norm": 2504.713284221407, + "learning_rate": 1.8758947306204012e-05, + "loss": 457.6672, + "step": 8770 + }, + { + "epoch": 0.16886157870190066, + "grad_norm": 2618.0904141710457, + "learning_rate": 1.8756000895070854e-05, + "loss": 431.5223, + "step": 8780 + }, + { + "epoch": 0.16905390396238118, + "grad_norm": 2377.2910977937076, + "learning_rate": 1.8753051222471968e-05, + "loss": 454.6217, + "step": 8790 + }, + { + "epoch": 0.1692462292228617, + "grad_norm": 2761.4493946217513, + "learning_rate": 1.8750098289506066e-05, + "loss": 442.4448, + "step": 8800 + }, + { + "epoch": 0.16943855448334222, + "grad_norm": 2633.4902607595054, + "learning_rate": 1.8747142097273057e-05, + "loss": 439.2786, + "step": 8810 + }, + { + "epoch": 0.16963087974382277, + "grad_norm": 2495.2262974002697, + "learning_rate": 1.8744182646874085e-05, + "loss": 448.5426, + "step": 8820 + }, + { + "epoch": 0.16982320500430328, + "grad_norm": 2628.0239127628934, + "learning_rate": 1.8741219939411494e-05, + "loss": 461.3018, + "step": 8830 + }, + { + "epoch": 0.1700155302647838, + "grad_norm": 2860.2357493755394, + "learning_rate": 1.873825397598884e-05, + "loss": 457.4539, + "step": 8840 + }, + { + "epoch": 0.17020785552526432, + "grad_norm": 2594.084372111208, + "learning_rate": 1.8735284757710897e-05, + "loss": 452.0995, + "step": 8850 + }, + { + "epoch": 0.17040018078574484, + "grad_norm": 2710.2131400228295, + "learning_rate": 1.873231228568365e-05, + "loss": 455.7155, + "step": 8860 + }, + { + "epoch": 0.1705925060462254, + "grad_norm": 2665.492809143777, + "learning_rate": 1.8729336561014294e-05, + "loss": 455.2856, + "step": 8870 + }, + { + "epoch": 0.1707848313067059, + "grad_norm": 2561.649795156198, + "learning_rate": 1.8726357584811242e-05, + "loss": 441.5276, + "step": 8880 + }, + { + "epoch": 0.17097715656718643, + "grad_norm": 2415.065192258531, + "learning_rate": 1.8723375358184107e-05, + "loss": 454.0223, + "step": 8890 + }, + { + "epoch": 0.17116948182766695, + "grad_norm": 2746.989536334842, + "learning_rate": 1.8720389882243722e-05, + "loss": 441.6445, + "step": 8900 + }, + { + "epoch": 0.17136180708814747, + "grad_norm": 2780.5009929539547, + "learning_rate": 1.871740115810213e-05, + "loss": 468.2937, + "step": 8910 + }, + { + "epoch": 0.171554132348628, + "grad_norm": 2632.9240695650924, + "learning_rate": 1.8714409186872578e-05, + "loss": 453.4708, + "step": 8920 + }, + { + "epoch": 0.17174645760910853, + "grad_norm": 18478.258419009937, + "learning_rate": 1.871141396966953e-05, + "loss": 458.452, + "step": 8930 + }, + { + "epoch": 0.17193878286958905, + "grad_norm": 2573.099238197207, + "learning_rate": 1.8708415507608647e-05, + "loss": 443.7543, + "step": 8940 + }, + { + "epoch": 0.17213110813006957, + "grad_norm": 2470.2442302466566, + "learning_rate": 1.8705413801806817e-05, + "loss": 446.0874, + "step": 8950 + }, + { + "epoch": 0.1723234333905501, + "grad_norm": 2944.5258247958905, + "learning_rate": 1.8702408853382114e-05, + "loss": 436.7016, + "step": 8960 + }, + { + "epoch": 0.1725157586510306, + "grad_norm": 2428.471223091907, + "learning_rate": 1.8699400663453842e-05, + "loss": 439.9705, + "step": 8970 + }, + { + "epoch": 0.17270808391151116, + "grad_norm": 2779.5787718497054, + "learning_rate": 1.8696389233142498e-05, + "loss": 436.2561, + "step": 8980 + }, + { + "epoch": 0.17290040917199168, + "grad_norm": 2801.7844262610247, + "learning_rate": 1.8693374563569792e-05, + "loss": 440.0648, + "step": 8990 + }, + { + "epoch": 0.1730927344324722, + "grad_norm": 2468.3211507447713, + "learning_rate": 1.8690356655858634e-05, + "loss": 444.2379, + "step": 9000 + }, + { + "epoch": 0.17328505969295271, + "grad_norm": 2950.9049283145578, + "learning_rate": 1.868733551113315e-05, + "loss": 457.8609, + "step": 9010 + }, + { + "epoch": 0.17347738495343323, + "grad_norm": 2623.2794604038363, + "learning_rate": 1.8684311130518663e-05, + "loss": 440.1009, + "step": 9020 + }, + { + "epoch": 0.17366971021391378, + "grad_norm": 2555.9618966907633, + "learning_rate": 1.8681283515141705e-05, + "loss": 437.3212, + "step": 9030 + }, + { + "epoch": 0.1738620354743943, + "grad_norm": 2927.0106727794473, + "learning_rate": 1.8678252666130016e-05, + "loss": 445.1681, + "step": 9040 + }, + { + "epoch": 0.17405436073487482, + "grad_norm": 2550.7522383571295, + "learning_rate": 1.8675218584612534e-05, + "loss": 437.728, + "step": 9050 + }, + { + "epoch": 0.17424668599535534, + "grad_norm": 2649.529870631337, + "learning_rate": 1.8672181271719406e-05, + "loss": 446.4435, + "step": 9060 + }, + { + "epoch": 0.17443901125583586, + "grad_norm": 2720.653259222087, + "learning_rate": 1.866914072858198e-05, + "loss": 455.3857, + "step": 9070 + }, + { + "epoch": 0.1746313365163164, + "grad_norm": 2804.8181920843176, + "learning_rate": 1.8666096956332805e-05, + "loss": 443.0305, + "step": 9080 + }, + { + "epoch": 0.17482366177679692, + "grad_norm": 2729.2129717363414, + "learning_rate": 1.8663049956105642e-05, + "loss": 440.492, + "step": 9090 + }, + { + "epoch": 0.17501598703727744, + "grad_norm": 2609.3562559386232, + "learning_rate": 1.8659999729035445e-05, + "loss": 437.4828, + "step": 9100 + }, + { + "epoch": 0.17520831229775796, + "grad_norm": 2451.155422918878, + "learning_rate": 1.8656946276258373e-05, + "loss": 434.4389, + "step": 9110 + }, + { + "epoch": 0.17540063755823848, + "grad_norm": 2512.828164974454, + "learning_rate": 1.8653889598911787e-05, + "loss": 446.8909, + "step": 9120 + }, + { + "epoch": 0.17559296281871903, + "grad_norm": 2806.0533918149017, + "learning_rate": 1.865082969813425e-05, + "loss": 435.8528, + "step": 9130 + }, + { + "epoch": 0.17578528807919955, + "grad_norm": 2548.2490140445034, + "learning_rate": 1.8647766575065523e-05, + "loss": 442.6416, + "step": 9140 + }, + { + "epoch": 0.17597761333968007, + "grad_norm": 2803.8978902199083, + "learning_rate": 1.864470023084657e-05, + "loss": 462.2274, + "step": 9150 + }, + { + "epoch": 0.17616993860016059, + "grad_norm": 2623.634212275952, + "learning_rate": 1.864163066661955e-05, + "loss": 442.0449, + "step": 9160 + }, + { + "epoch": 0.1763622638606411, + "grad_norm": 2470.2873050666863, + "learning_rate": 1.8638557883527833e-05, + "loss": 435.2117, + "step": 9170 + }, + { + "epoch": 0.17655458912112165, + "grad_norm": 2408.3928470769733, + "learning_rate": 1.8635481882715975e-05, + "loss": 426.3458, + "step": 9180 + }, + { + "epoch": 0.17674691438160217, + "grad_norm": 2335.9253478763662, + "learning_rate": 1.863240266532973e-05, + "loss": 426.5856, + "step": 9190 + }, + { + "epoch": 0.1769392396420827, + "grad_norm": 2516.6826056669256, + "learning_rate": 1.8629320232516063e-05, + "loss": 444.586, + "step": 9200 + }, + { + "epoch": 0.1771315649025632, + "grad_norm": 2295.24317998568, + "learning_rate": 1.862623458542313e-05, + "loss": 440.7661, + "step": 9210 + }, + { + "epoch": 0.17732389016304373, + "grad_norm": 2402.3695694836456, + "learning_rate": 1.862314572520028e-05, + "loss": 434.8042, + "step": 9220 + }, + { + "epoch": 0.17751621542352428, + "grad_norm": 2393.47139003244, + "learning_rate": 1.862005365299806e-05, + "loss": 428.6981, + "step": 9230 + }, + { + "epoch": 0.1777085406840048, + "grad_norm": 2601.4400613546713, + "learning_rate": 1.8616958369968223e-05, + "loss": 457.2905, + "step": 9240 + }, + { + "epoch": 0.17790086594448531, + "grad_norm": 2598.4376595452686, + "learning_rate": 1.8613859877263708e-05, + "loss": 437.9435, + "step": 9250 + }, + { + "epoch": 0.17809319120496583, + "grad_norm": 2480.4261551646964, + "learning_rate": 1.8610758176038647e-05, + "loss": 424.9386, + "step": 9260 + }, + { + "epoch": 0.17828551646544635, + "grad_norm": 2725.104556430378, + "learning_rate": 1.860765326744838e-05, + "loss": 431.2261, + "step": 9270 + }, + { + "epoch": 0.1784778417259269, + "grad_norm": 2712.507553065823, + "learning_rate": 1.8604545152649426e-05, + "loss": 443.1223, + "step": 9280 + }, + { + "epoch": 0.17867016698640742, + "grad_norm": 2372.062633725075, + "learning_rate": 1.860143383279952e-05, + "loss": 443.9025, + "step": 9290 + }, + { + "epoch": 0.17886249224688794, + "grad_norm": 2683.398947528759, + "learning_rate": 1.859831930905756e-05, + "loss": 430.8124, + "step": 9300 + }, + { + "epoch": 0.17905481750736846, + "grad_norm": 2387.22497436682, + "learning_rate": 1.8595201582583668e-05, + "loss": 442.3059, + "step": 9310 + }, + { + "epoch": 0.17924714276784898, + "grad_norm": 2537.502536146372, + "learning_rate": 1.859208065453914e-05, + "loss": 428.5348, + "step": 9320 + }, + { + "epoch": 0.17943946802832952, + "grad_norm": 2647.7503285995936, + "learning_rate": 1.8588956526086472e-05, + "loss": 460.5251, + "step": 9330 + }, + { + "epoch": 0.17963179328881004, + "grad_norm": 2761.5945195069867, + "learning_rate": 1.8585829198389347e-05, + "loss": 430.1305, + "step": 9340 + }, + { + "epoch": 0.17982411854929056, + "grad_norm": 2527.638340963992, + "learning_rate": 1.8582698672612646e-05, + "loss": 427.3626, + "step": 9350 + }, + { + "epoch": 0.18001644380977108, + "grad_norm": 2681.904507082835, + "learning_rate": 1.8579564949922438e-05, + "loss": 427.4606, + "step": 9360 + }, + { + "epoch": 0.1802087690702516, + "grad_norm": 2520.4384581669665, + "learning_rate": 1.8576428031485984e-05, + "loss": 422.3001, + "step": 9370 + }, + { + "epoch": 0.18040109433073215, + "grad_norm": 2282.314596930165, + "learning_rate": 1.8573287918471728e-05, + "loss": 432.4351, + "step": 9380 + }, + { + "epoch": 0.18059341959121267, + "grad_norm": 2695.8164683961345, + "learning_rate": 1.8570144612049322e-05, + "loss": 430.8169, + "step": 9390 + }, + { + "epoch": 0.1807857448516932, + "grad_norm": 3000.4497862681146, + "learning_rate": 1.856699811338958e-05, + "loss": 430.5135, + "step": 9400 + }, + { + "epoch": 0.1809780701121737, + "grad_norm": 2584.238550307117, + "learning_rate": 1.8563848423664536e-05, + "loss": 442.1215, + "step": 9410 + }, + { + "epoch": 0.18117039537265422, + "grad_norm": 2200.2629717432324, + "learning_rate": 1.8560695544047388e-05, + "loss": 452.2628, + "step": 9420 + }, + { + "epoch": 0.18136272063313474, + "grad_norm": 2675.380971957908, + "learning_rate": 1.8557539475712538e-05, + "loss": 453.2036, + "step": 9430 + }, + { + "epoch": 0.1815550458936153, + "grad_norm": 2785.1580728918234, + "learning_rate": 1.855438021983556e-05, + "loss": 430.1964, + "step": 9440 + }, + { + "epoch": 0.1817473711540958, + "grad_norm": 2756.895002991828, + "learning_rate": 1.8551217777593233e-05, + "loss": 430.8823, + "step": 9450 + }, + { + "epoch": 0.18193969641457633, + "grad_norm": 2614.708846604072, + "learning_rate": 1.8548052150163514e-05, + "loss": 427.9357, + "step": 9460 + }, + { + "epoch": 0.18213202167505685, + "grad_norm": 2599.5556389036838, + "learning_rate": 1.8544883338725544e-05, + "loss": 416.8458, + "step": 9470 + }, + { + "epoch": 0.18232434693553737, + "grad_norm": 3151.279742287895, + "learning_rate": 1.8541711344459652e-05, + "loss": 430.7875, + "step": 9480 + }, + { + "epoch": 0.18251667219601792, + "grad_norm": 3042.373648887261, + "learning_rate": 1.8538536168547353e-05, + "loss": 443.7286, + "step": 9490 + }, + { + "epoch": 0.18270899745649843, + "grad_norm": 2903.0010357803503, + "learning_rate": 1.8535357812171356e-05, + "loss": 446.3522, + "step": 9500 + }, + { + "epoch": 0.18290132271697895, + "grad_norm": 2767.1010436271704, + "learning_rate": 1.8532176276515538e-05, + "loss": 429.4558, + "step": 9510 + }, + { + "epoch": 0.18309364797745947, + "grad_norm": 2603.6546380581676, + "learning_rate": 1.8528991562764967e-05, + "loss": 431.4535, + "step": 9520 + }, + { + "epoch": 0.18328597323794, + "grad_norm": 2559.83977407247, + "learning_rate": 1.85258036721059e-05, + "loss": 425.6351, + "step": 9530 + }, + { + "epoch": 0.18347829849842054, + "grad_norm": 2771.59820476445, + "learning_rate": 1.8522612605725777e-05, + "loss": 436.8646, + "step": 9540 + }, + { + "epoch": 0.18367062375890106, + "grad_norm": 2550.3542997155123, + "learning_rate": 1.8519418364813215e-05, + "loss": 429.8599, + "step": 9550 + }, + { + "epoch": 0.18386294901938158, + "grad_norm": 2349.4890345142303, + "learning_rate": 1.851622095055801e-05, + "loss": 425.3041, + "step": 9560 + }, + { + "epoch": 0.1840552742798621, + "grad_norm": 2356.380351723672, + "learning_rate": 1.8513020364151155e-05, + "loss": 428.1639, + "step": 9570 + }, + { + "epoch": 0.18424759954034262, + "grad_norm": 3117.0554903200723, + "learning_rate": 1.850981660678481e-05, + "loss": 427.749, + "step": 9580 + }, + { + "epoch": 0.18443992480082316, + "grad_norm": 2663.287913875028, + "learning_rate": 1.8506609679652323e-05, + "loss": 426.2586, + "step": 9590 + }, + { + "epoch": 0.18463225006130368, + "grad_norm": 2538.139707007356, + "learning_rate": 1.8503399583948224e-05, + "loss": 438.3786, + "step": 9600 + }, + { + "epoch": 0.1848245753217842, + "grad_norm": 2473.0098406620523, + "learning_rate": 1.8500186320868215e-05, + "loss": 419.1407, + "step": 9610 + }, + { + "epoch": 0.18501690058226472, + "grad_norm": 2508.558120856946, + "learning_rate": 1.8496969891609186e-05, + "loss": 444.3701, + "step": 9620 + }, + { + "epoch": 0.18520922584274524, + "grad_norm": 2503.30195361973, + "learning_rate": 1.8493750297369208e-05, + "loss": 442.5193, + "step": 9630 + }, + { + "epoch": 0.1854015511032258, + "grad_norm": 2408.15371260309, + "learning_rate": 1.849052753934752e-05, + "loss": 438.0247, + "step": 9640 + }, + { + "epoch": 0.1855938763637063, + "grad_norm": 2994.015716679271, + "learning_rate": 1.8487301618744552e-05, + "loss": 440.4727, + "step": 9650 + }, + { + "epoch": 0.18578620162418683, + "grad_norm": 2383.8264213177185, + "learning_rate": 1.84840725367619e-05, + "loss": 415.5592, + "step": 9660 + }, + { + "epoch": 0.18597852688466734, + "grad_norm": 2437.0946812610778, + "learning_rate": 1.8480840294602352e-05, + "loss": 421.3766, + "step": 9670 + }, + { + "epoch": 0.18617085214514786, + "grad_norm": 2591.1241802281756, + "learning_rate": 1.8477604893469857e-05, + "loss": 438.2731, + "step": 9680 + }, + { + "epoch": 0.1863631774056284, + "grad_norm": 2426.706370070809, + "learning_rate": 1.847436633456955e-05, + "loss": 436.0906, + "step": 9690 + }, + { + "epoch": 0.18655550266610893, + "grad_norm": 2249.9550569722387, + "learning_rate": 1.8471124619107744e-05, + "loss": 416.2272, + "step": 9700 + }, + { + "epoch": 0.18674782792658945, + "grad_norm": 2218.356740664979, + "learning_rate": 1.846787974829192e-05, + "loss": 422.8797, + "step": 9710 + }, + { + "epoch": 0.18694015318706997, + "grad_norm": 2675.610068113475, + "learning_rate": 1.8464631723330745e-05, + "loss": 432.1363, + "step": 9720 + }, + { + "epoch": 0.1871324784475505, + "grad_norm": 2308.209685452142, + "learning_rate": 1.8461380545434054e-05, + "loss": 415.4586, + "step": 9730 + }, + { + "epoch": 0.18732480370803103, + "grad_norm": 2387.1713189371644, + "learning_rate": 1.8458126215812848e-05, + "loss": 415.1146, + "step": 9740 + }, + { + "epoch": 0.18751712896851155, + "grad_norm": 2676.7830843197266, + "learning_rate": 1.845486873567932e-05, + "loss": 424.1718, + "step": 9750 + }, + { + "epoch": 0.18770945422899207, + "grad_norm": 2472.5215166530497, + "learning_rate": 1.8451608106246822e-05, + "loss": 414.0548, + "step": 9760 + }, + { + "epoch": 0.1879017794894726, + "grad_norm": 10329.26524510405, + "learning_rate": 1.8448344328729893e-05, + "loss": 433.7043, + "step": 9770 + }, + { + "epoch": 0.1880941047499531, + "grad_norm": 2483.168622703879, + "learning_rate": 1.8445077404344226e-05, + "loss": 413.1561, + "step": 9780 + }, + { + "epoch": 0.18828643001043366, + "grad_norm": 2367.901590900312, + "learning_rate": 1.8441807334306702e-05, + "loss": 419.4771, + "step": 9790 + }, + { + "epoch": 0.18847875527091418, + "grad_norm": 2407.46881799832, + "learning_rate": 1.8438534119835365e-05, + "loss": 409.0065, + "step": 9800 + }, + { + "epoch": 0.1886710805313947, + "grad_norm": 2634.4653171152536, + "learning_rate": 1.8435257762149436e-05, + "loss": 426.1817, + "step": 9810 + }, + { + "epoch": 0.18886340579187522, + "grad_norm": 2282.70870343972, + "learning_rate": 1.8431978262469305e-05, + "loss": 431.1925, + "step": 9820 + }, + { + "epoch": 0.18905573105235574, + "grad_norm": 2429.9620623316027, + "learning_rate": 1.8428695622016532e-05, + "loss": 414.264, + "step": 9830 + }, + { + "epoch": 0.18924805631283628, + "grad_norm": 2594.6612237535387, + "learning_rate": 1.8425409842013843e-05, + "loss": 428.9275, + "step": 9840 + }, + { + "epoch": 0.1894403815733168, + "grad_norm": 2650.7523909612446, + "learning_rate": 1.8422120923685135e-05, + "loss": 410.5828, + "step": 9850 + }, + { + "epoch": 0.18963270683379732, + "grad_norm": 2444.278485671017, + "learning_rate": 1.8418828868255484e-05, + "loss": 418.427, + "step": 9860 + }, + { + "epoch": 0.18982503209427784, + "grad_norm": 2456.8782042718963, + "learning_rate": 1.8415533676951117e-05, + "loss": 426.9792, + "step": 9870 + }, + { + "epoch": 0.19001735735475836, + "grad_norm": 2491.4483740263368, + "learning_rate": 1.8412235350999444e-05, + "loss": 432.1644, + "step": 9880 + }, + { + "epoch": 0.19020968261523888, + "grad_norm": 2476.0588561727045, + "learning_rate": 1.840893389162903e-05, + "loss": 462.9826, + "step": 9890 + }, + { + "epoch": 0.19040200787571943, + "grad_norm": 2732.8547628771053, + "learning_rate": 1.8405629300069626e-05, + "loss": 427.2529, + "step": 9900 + }, + { + "epoch": 0.19059433313619994, + "grad_norm": 2595.2345543319143, + "learning_rate": 1.840232157755213e-05, + "loss": 426.8976, + "step": 9910 + }, + { + "epoch": 0.19078665839668046, + "grad_norm": 2884.291239899689, + "learning_rate": 1.8399010725308616e-05, + "loss": 415.4808, + "step": 9920 + }, + { + "epoch": 0.19097898365716098, + "grad_norm": 2383.5731911539187, + "learning_rate": 1.839569674457232e-05, + "loss": 424.0588, + "step": 9930 + }, + { + "epoch": 0.1911713089176415, + "grad_norm": 2524.5105683796714, + "learning_rate": 1.8392379636577647e-05, + "loss": 439.3013, + "step": 9940 + }, + { + "epoch": 0.19136363417812205, + "grad_norm": 2566.1891981854415, + "learning_rate": 1.8389059402560165e-05, + "loss": 427.5253, + "step": 9950 + }, + { + "epoch": 0.19155595943860257, + "grad_norm": 2357.3355139382893, + "learning_rate": 1.8385736043756605e-05, + "loss": 421.2647, + "step": 9960 + }, + { + "epoch": 0.1917482846990831, + "grad_norm": 2578.7377395645394, + "learning_rate": 1.838240956140486e-05, + "loss": 427.3051, + "step": 9970 + }, + { + "epoch": 0.1919406099595636, + "grad_norm": 2396.6699178415674, + "learning_rate": 1.8379079956743996e-05, + "loss": 441.4921, + "step": 9980 + }, + { + "epoch": 0.19213293522004413, + "grad_norm": 28306.667678786922, + "learning_rate": 1.8375747231014233e-05, + "loss": 425.5845, + "step": 9990 + }, + { + "epoch": 0.19232526048052467, + "grad_norm": 2911.719873512512, + "learning_rate": 1.8372411385456956e-05, + "loss": 427.8772, + "step": 10000 + }, + { + "epoch": 0.1925175857410052, + "grad_norm": 2397.3024696914417, + "learning_rate": 1.8369072421314717e-05, + "loss": 415.5873, + "step": 10010 + }, + { + "epoch": 0.1927099110014857, + "grad_norm": 2596.583132820523, + "learning_rate": 1.8365730339831212e-05, + "loss": 412.709, + "step": 10020 + }, + { + "epoch": 0.19290223626196623, + "grad_norm": 2447.7325883524313, + "learning_rate": 1.8362385142251328e-05, + "loss": 424.9756, + "step": 10030 + }, + { + "epoch": 0.19309456152244675, + "grad_norm": 2610.1759248125973, + "learning_rate": 1.8359036829821085e-05, + "loss": 415.7236, + "step": 10040 + }, + { + "epoch": 0.1932868867829273, + "grad_norm": 2568.3642489728477, + "learning_rate": 1.8355685403787677e-05, + "loss": 410.082, + "step": 10050 + }, + { + "epoch": 0.19347921204340782, + "grad_norm": 2297.950766179293, + "learning_rate": 1.8352330865399457e-05, + "loss": 407.8644, + "step": 10060 + }, + { + "epoch": 0.19367153730388834, + "grad_norm": 2746.3368471131, + "learning_rate": 1.834897321590593e-05, + "loss": 415.3305, + "step": 10070 + }, + { + "epoch": 0.19386386256436886, + "grad_norm": 2174.4847725716004, + "learning_rate": 1.8345612456557767e-05, + "loss": 404.8042, + "step": 10080 + }, + { + "epoch": 0.19405618782484937, + "grad_norm": 2282.3197924485125, + "learning_rate": 1.83422485886068e-05, + "loss": 408.8012, + "step": 10090 + }, + { + "epoch": 0.19424851308532992, + "grad_norm": 2823.9083708204867, + "learning_rate": 1.833888161330601e-05, + "loss": 435.0374, + "step": 10100 + }, + { + "epoch": 0.19444083834581044, + "grad_norm": 2285.562841291326, + "learning_rate": 1.833551153190954e-05, + "loss": 410.8455, + "step": 10110 + }, + { + "epoch": 0.19463316360629096, + "grad_norm": 2394.9424523734256, + "learning_rate": 1.8332138345672686e-05, + "loss": 408.7798, + "step": 10120 + }, + { + "epoch": 0.19482548886677148, + "grad_norm": 2483.776180779221, + "learning_rate": 1.832876205585191e-05, + "loss": 419.2998, + "step": 10130 + }, + { + "epoch": 0.195017814127252, + "grad_norm": 2348.916678173444, + "learning_rate": 1.8325382663704826e-05, + "loss": 414.339, + "step": 10140 + }, + { + "epoch": 0.19521013938773255, + "grad_norm": 2257.972693658309, + "learning_rate": 1.8322000170490194e-05, + "loss": 408.5225, + "step": 10150 + }, + { + "epoch": 0.19540246464821306, + "grad_norm": 2546.3267920016447, + "learning_rate": 1.831861457746794e-05, + "loss": 417.0976, + "step": 10160 + }, + { + "epoch": 0.19559478990869358, + "grad_norm": 2321.8695956707966, + "learning_rate": 1.8315225885899144e-05, + "loss": 422.5677, + "step": 10170 + }, + { + "epoch": 0.1957871151691741, + "grad_norm": 2483.177157201622, + "learning_rate": 1.8311834097046038e-05, + "loss": 407.5825, + "step": 10180 + }, + { + "epoch": 0.19597944042965462, + "grad_norm": 3199.911756595318, + "learning_rate": 1.8308439212172e-05, + "loss": 420.1181, + "step": 10190 + }, + { + "epoch": 0.19617176569013517, + "grad_norm": 2405.0842741328424, + "learning_rate": 1.830504123254158e-05, + "loss": 428.5215, + "step": 10200 + }, + { + "epoch": 0.1963640909506157, + "grad_norm": 2853.6141960433793, + "learning_rate": 1.830164015942046e-05, + "loss": 416.0092, + "step": 10210 + }, + { + "epoch": 0.1965564162110962, + "grad_norm": 6150.725113057045, + "learning_rate": 1.8298235994075488e-05, + "loss": 407.6205, + "step": 10220 + }, + { + "epoch": 0.19674874147157673, + "grad_norm": 2941.968311887401, + "learning_rate": 1.829482873777466e-05, + "loss": 425.1357, + "step": 10230 + }, + { + "epoch": 0.19694106673205725, + "grad_norm": 13541.236660979861, + "learning_rate": 1.8291418391787116e-05, + "loss": 430.1037, + "step": 10240 + }, + { + "epoch": 0.1971333919925378, + "grad_norm": 2687.732781345709, + "learning_rate": 1.8288004957383162e-05, + "loss": 416.6701, + "step": 10250 + }, + { + "epoch": 0.1973257172530183, + "grad_norm": 2488.094560088087, + "learning_rate": 1.8284588435834242e-05, + "loss": 415.4613, + "step": 10260 + }, + { + "epoch": 0.19751804251349883, + "grad_norm": 2585.5638282418886, + "learning_rate": 1.828116882841295e-05, + "loss": 416.3445, + "step": 10270 + }, + { + "epoch": 0.19771036777397935, + "grad_norm": 2391.2602293948394, + "learning_rate": 1.8277746136393042e-05, + "loss": 428.4682, + "step": 10280 + }, + { + "epoch": 0.19790269303445987, + "grad_norm": 2629.0570084707033, + "learning_rate": 1.827432036104941e-05, + "loss": 411.7794, + "step": 10290 + }, + { + "epoch": 0.19809501829494042, + "grad_norm": 2444.479259928908, + "learning_rate": 1.8270891503658096e-05, + "loss": 411.8743, + "step": 10300 + }, + { + "epoch": 0.19828734355542094, + "grad_norm": 2421.4364067604934, + "learning_rate": 1.8267459565496298e-05, + "loss": 417.5099, + "step": 10310 + }, + { + "epoch": 0.19847966881590146, + "grad_norm": 2475.220447059186, + "learning_rate": 1.8264024547842346e-05, + "loss": 412.6634, + "step": 10320 + }, + { + "epoch": 0.19867199407638197, + "grad_norm": 2148.9450390888987, + "learning_rate": 1.8260586451975745e-05, + "loss": 406.1828, + "step": 10330 + }, + { + "epoch": 0.1988643193368625, + "grad_norm": 2668.7152817319225, + "learning_rate": 1.825714527917711e-05, + "loss": 422.5888, + "step": 10340 + }, + { + "epoch": 0.199056644597343, + "grad_norm": 2846.3524448522394, + "learning_rate": 1.8253701030728235e-05, + "loss": 417.4071, + "step": 10350 + }, + { + "epoch": 0.19924896985782356, + "grad_norm": 2663.3106333984706, + "learning_rate": 1.8250253707912036e-05, + "loss": 421.4405, + "step": 10360 + }, + { + "epoch": 0.19944129511830408, + "grad_norm": 2278.902298079059, + "learning_rate": 1.8246803312012593e-05, + "loss": 399.4237, + "step": 10370 + }, + { + "epoch": 0.1996336203787846, + "grad_norm": 2258.4201981721167, + "learning_rate": 1.8243349844315116e-05, + "loss": 404.1829, + "step": 10380 + }, + { + "epoch": 0.19982594563926512, + "grad_norm": 2696.685765780273, + "learning_rate": 1.8239893306105966e-05, + "loss": 404.6022, + "step": 10390 + }, + { + "epoch": 0.20001827089974564, + "grad_norm": 2250.8881015934917, + "learning_rate": 1.823643369867264e-05, + "loss": 405.7177, + "step": 10400 + }, + { + "epoch": 0.20021059616022618, + "grad_norm": 2173.0644028037655, + "learning_rate": 1.8232971023303798e-05, + "loss": 396.2673, + "step": 10410 + }, + { + "epoch": 0.2004029214207067, + "grad_norm": 2752.758966792303, + "learning_rate": 1.8229505281289216e-05, + "loss": 419.2991, + "step": 10420 + }, + { + "epoch": 0.20059524668118722, + "grad_norm": 2492.381749055844, + "learning_rate": 1.8226036473919836e-05, + "loss": 398.8003, + "step": 10430 + }, + { + "epoch": 0.20078757194166774, + "grad_norm": 2327.00730066168, + "learning_rate": 1.8222564602487724e-05, + "loss": 408.9353, + "step": 10440 + }, + { + "epoch": 0.20097989720214826, + "grad_norm": 2359.3698107997043, + "learning_rate": 1.82190896682861e-05, + "loss": 406.7283, + "step": 10450 + }, + { + "epoch": 0.2011722224626288, + "grad_norm": 2594.1865103408004, + "learning_rate": 1.8215611672609316e-05, + "loss": 392.2045, + "step": 10460 + }, + { + "epoch": 0.20136454772310933, + "grad_norm": 2727.18681436104, + "learning_rate": 1.821213061675287e-05, + "loss": 406.7791, + "step": 10470 + }, + { + "epoch": 0.20155687298358985, + "grad_norm": 2739.1610554666677, + "learning_rate": 1.8208646502013395e-05, + "loss": 407.5338, + "step": 10480 + }, + { + "epoch": 0.20174919824407037, + "grad_norm": 2392.15464023485, + "learning_rate": 1.820515932968867e-05, + "loss": 406.5358, + "step": 10490 + }, + { + "epoch": 0.20194152350455088, + "grad_norm": 2452.371794680503, + "learning_rate": 1.8201669101077608e-05, + "loss": 409.1662, + "step": 10500 + }, + { + "epoch": 0.20213384876503143, + "grad_norm": 2394.1563961392794, + "learning_rate": 1.819817581748026e-05, + "loss": 426.1443, + "step": 10510 + }, + { + "epoch": 0.20232617402551195, + "grad_norm": 2527.0573621370754, + "learning_rate": 1.8194679480197817e-05, + "loss": 408.445, + "step": 10520 + }, + { + "epoch": 0.20251849928599247, + "grad_norm": 2493.0292909377595, + "learning_rate": 1.8191180090532608e-05, + "loss": 411.8142, + "step": 10530 + }, + { + "epoch": 0.202710824546473, + "grad_norm": 2706.38906962919, + "learning_rate": 1.8187677649788097e-05, + "loss": 401.5184, + "step": 10540 + }, + { + "epoch": 0.2029031498069535, + "grad_norm": 2332.4821868204363, + "learning_rate": 1.8184172159268884e-05, + "loss": 417.2245, + "step": 10550 + }, + { + "epoch": 0.20309547506743406, + "grad_norm": 2431.6873975187805, + "learning_rate": 1.818066362028071e-05, + "loss": 408.8991, + "step": 10560 + }, + { + "epoch": 0.20328780032791458, + "grad_norm": 2241.9349140611075, + "learning_rate": 1.8177152034130442e-05, + "loss": 420.855, + "step": 10570 + }, + { + "epoch": 0.2034801255883951, + "grad_norm": 2713.146169055902, + "learning_rate": 1.8173637402126093e-05, + "loss": 421.2833, + "step": 10580 + }, + { + "epoch": 0.2036724508488756, + "grad_norm": 2450.1724149154816, + "learning_rate": 1.8170119725576808e-05, + "loss": 411.6564, + "step": 10590 + }, + { + "epoch": 0.20386477610935613, + "grad_norm": 2394.9678777535923, + "learning_rate": 1.8166599005792852e-05, + "loss": 405.5297, + "step": 10600 + }, + { + "epoch": 0.20405710136983668, + "grad_norm": 2388.8301954499243, + "learning_rate": 1.816307524408565e-05, + "loss": 398.6396, + "step": 10610 + }, + { + "epoch": 0.2042494266303172, + "grad_norm": 2616.6871594197464, + "learning_rate": 1.8159548441767732e-05, + "loss": 403.034, + "step": 10620 + }, + { + "epoch": 0.20444175189079772, + "grad_norm": 2345.9857639268953, + "learning_rate": 1.8156018600152777e-05, + "loss": 408.3324, + "step": 10630 + }, + { + "epoch": 0.20463407715127824, + "grad_norm": 2600.5443813452302, + "learning_rate": 1.81524857205556e-05, + "loss": 410.9663, + "step": 10640 + }, + { + "epoch": 0.20482640241175876, + "grad_norm": 2491.259876868476, + "learning_rate": 1.814894980429213e-05, + "loss": 428.4407, + "step": 10650 + }, + { + "epoch": 0.2050187276722393, + "grad_norm": 2227.1327928258866, + "learning_rate": 1.8145410852679447e-05, + "loss": 409.756, + "step": 10660 + }, + { + "epoch": 0.20521105293271982, + "grad_norm": 2400.673689900987, + "learning_rate": 1.8141868867035745e-05, + "loss": 400.873, + "step": 10670 + }, + { + "epoch": 0.20540337819320034, + "grad_norm": 2463.62094591511, + "learning_rate": 1.8138323848680354e-05, + "loss": 399.8795, + "step": 10680 + }, + { + "epoch": 0.20559570345368086, + "grad_norm": 2262.0422015220383, + "learning_rate": 1.813477579893374e-05, + "loss": 402.7038, + "step": 10690 + }, + { + "epoch": 0.20578802871416138, + "grad_norm": 2747.5668526902605, + "learning_rate": 1.8131224719117497e-05, + "loss": 403.4552, + "step": 10700 + }, + { + "epoch": 0.20598035397464193, + "grad_norm": 2403.8496516176165, + "learning_rate": 1.8127670610554332e-05, + "loss": 406.5367, + "step": 10710 + }, + { + "epoch": 0.20617267923512245, + "grad_norm": 2206.889261105498, + "learning_rate": 1.81241134745681e-05, + "loss": 399.0284, + "step": 10720 + }, + { + "epoch": 0.20636500449560297, + "grad_norm": 2559.568540180695, + "learning_rate": 1.812055331248377e-05, + "loss": 411.964, + "step": 10730 + }, + { + "epoch": 0.20655732975608349, + "grad_norm": 2581.72622460684, + "learning_rate": 1.811699012562745e-05, + "loss": 410.2328, + "step": 10740 + }, + { + "epoch": 0.206749655016564, + "grad_norm": 2680.00126818257, + "learning_rate": 1.8113423915326362e-05, + "loss": 419.1748, + "step": 10750 + }, + { + "epoch": 0.20694198027704455, + "grad_norm": 2513.56967677966, + "learning_rate": 1.8109854682908864e-05, + "loss": 418.2968, + "step": 10760 + }, + { + "epoch": 0.20713430553752507, + "grad_norm": 2623.4003108539373, + "learning_rate": 1.8106282429704436e-05, + "loss": 410.6109, + "step": 10770 + }, + { + "epoch": 0.2073266307980056, + "grad_norm": 2405.1786942492386, + "learning_rate": 1.810270715704368e-05, + "loss": 420.2175, + "step": 10780 + }, + { + "epoch": 0.2075189560584861, + "grad_norm": 2570.270819036147, + "learning_rate": 1.809912886625833e-05, + "loss": 404.7516, + "step": 10790 + }, + { + "epoch": 0.20771128131896663, + "grad_norm": 2297.8982421671208, + "learning_rate": 1.8095547558681243e-05, + "loss": 420.8726, + "step": 10800 + }, + { + "epoch": 0.20790360657944715, + "grad_norm": 2471.2642815329295, + "learning_rate": 1.809196323564639e-05, + "loss": 416.5768, + "step": 10810 + }, + { + "epoch": 0.2080959318399277, + "grad_norm": 2485.2638635002304, + "learning_rate": 1.8088375898488873e-05, + "loss": 397.1862, + "step": 10820 + }, + { + "epoch": 0.20828825710040821, + "grad_norm": 2240.332621458524, + "learning_rate": 1.808478554854492e-05, + "loss": 409.2945, + "step": 10830 + }, + { + "epoch": 0.20848058236088873, + "grad_norm": 2310.0913790318946, + "learning_rate": 1.8081192187151873e-05, + "loss": 403.7675, + "step": 10840 + }, + { + "epoch": 0.20867290762136925, + "grad_norm": 2197.1117063416928, + "learning_rate": 1.8077595815648202e-05, + "loss": 397.342, + "step": 10850 + }, + { + "epoch": 0.20886523288184977, + "grad_norm": 2195.730145545457, + "learning_rate": 1.8073996435373494e-05, + "loss": 407.2368, + "step": 10860 + }, + { + "epoch": 0.20905755814233032, + "grad_norm": 2427.4795901523935, + "learning_rate": 1.8070394047668466e-05, + "loss": 403.1966, + "step": 10870 + }, + { + "epoch": 0.20924988340281084, + "grad_norm": 2226.296377914443, + "learning_rate": 1.8066788653874936e-05, + "loss": 390.1516, + "step": 10880 + }, + { + "epoch": 0.20944220866329136, + "grad_norm": 2542.001444994491, + "learning_rate": 1.806318025533586e-05, + "loss": 403.8007, + "step": 10890 + }, + { + "epoch": 0.20963453392377188, + "grad_norm": 2509.4441559480447, + "learning_rate": 1.805956885339531e-05, + "loss": 413.4315, + "step": 10900 + }, + { + "epoch": 0.2098268591842524, + "grad_norm": 2531.563606008494, + "learning_rate": 1.8055954449398472e-05, + "loss": 419.1451, + "step": 10910 + }, + { + "epoch": 0.21001918444473294, + "grad_norm": 3092.553750298103, + "learning_rate": 1.8052337044691648e-05, + "loss": 400.9566, + "step": 10920 + }, + { + "epoch": 0.21021150970521346, + "grad_norm": 2382.377838914781, + "learning_rate": 1.8048716640622262e-05, + "loss": 408.889, + "step": 10930 + }, + { + "epoch": 0.21040383496569398, + "grad_norm": 2490.8763516619233, + "learning_rate": 1.8045093238538856e-05, + "loss": 413.3729, + "step": 10940 + }, + { + "epoch": 0.2105961602261745, + "grad_norm": 2660.505139688422, + "learning_rate": 1.8041466839791087e-05, + "loss": 398.592, + "step": 10950 + }, + { + "epoch": 0.21078848548665502, + "grad_norm": 2164.5340715385973, + "learning_rate": 1.8037837445729733e-05, + "loss": 398.3774, + "step": 10960 + }, + { + "epoch": 0.21098081074713557, + "grad_norm": 2349.449614351497, + "learning_rate": 1.803420505770668e-05, + "loss": 395.1659, + "step": 10970 + }, + { + "epoch": 0.21117313600761609, + "grad_norm": 2435.3497667190113, + "learning_rate": 1.803056967707493e-05, + "loss": 396.0531, + "step": 10980 + }, + { + "epoch": 0.2113654612680966, + "grad_norm": 2246.816610446081, + "learning_rate": 1.8026931305188603e-05, + "loss": 410.702, + "step": 10990 + }, + { + "epoch": 0.21155778652857712, + "grad_norm": 2356.254917816634, + "learning_rate": 1.802328994340294e-05, + "loss": 404.5293, + "step": 11000 + }, + { + "epoch": 0.21175011178905764, + "grad_norm": 3024.863226923607, + "learning_rate": 1.8019645593074275e-05, + "loss": 401.4539, + "step": 11010 + }, + { + "epoch": 0.2119424370495382, + "grad_norm": 2458.6315749528885, + "learning_rate": 1.8015998255560082e-05, + "loss": 383.7941, + "step": 11020 + }, + { + "epoch": 0.2121347623100187, + "grad_norm": 2482.937050752312, + "learning_rate": 1.801234793221892e-05, + "loss": 390.7949, + "step": 11030 + }, + { + "epoch": 0.21232708757049923, + "grad_norm": 2130.659983516312, + "learning_rate": 1.800869462441049e-05, + "loss": 374.8555, + "step": 11040 + }, + { + "epoch": 0.21251941283097975, + "grad_norm": 3436.892560409446, + "learning_rate": 1.8005038333495572e-05, + "loss": 398.9813, + "step": 11050 + }, + { + "epoch": 0.21271173809146027, + "grad_norm": 2549.1782406530206, + "learning_rate": 1.8001379060836088e-05, + "loss": 403.375, + "step": 11060 + }, + { + "epoch": 0.21290406335194081, + "grad_norm": 2266.0715382189323, + "learning_rate": 1.7997716807795046e-05, + "loss": 404.9078, + "step": 11070 + }, + { + "epoch": 0.21309638861242133, + "grad_norm": 2373.7615169944665, + "learning_rate": 1.7994051575736585e-05, + "loss": 395.2838, + "step": 11080 + }, + { + "epoch": 0.21328871387290185, + "grad_norm": 2478.8311099791295, + "learning_rate": 1.7990383366025935e-05, + "loss": 405.3711, + "step": 11090 + }, + { + "epoch": 0.21348103913338237, + "grad_norm": 2577.515646094947, + "learning_rate": 1.7986712180029448e-05, + "loss": 393.1514, + "step": 11100 + }, + { + "epoch": 0.2136733643938629, + "grad_norm": 2362.7725035869066, + "learning_rate": 1.798303801911458e-05, + "loss": 398.3662, + "step": 11110 + }, + { + "epoch": 0.21386568965434344, + "grad_norm": 2163.3205259141787, + "learning_rate": 1.7979360884649894e-05, + "loss": 395.3951, + "step": 11120 + }, + { + "epoch": 0.21405801491482396, + "grad_norm": 2235.8150793368454, + "learning_rate": 1.7975680778005058e-05, + "loss": 392.3371, + "step": 11130 + }, + { + "epoch": 0.21425034017530448, + "grad_norm": 2299.195976343412, + "learning_rate": 1.7971997700550856e-05, + "loss": 402.759, + "step": 11140 + }, + { + "epoch": 0.214442665435785, + "grad_norm": 2592.0103304619306, + "learning_rate": 1.7968311653659177e-05, + "loss": 403.5507, + "step": 11150 + }, + { + "epoch": 0.21463499069626552, + "grad_norm": 2377.799379865453, + "learning_rate": 1.7964622638703003e-05, + "loss": 407.2532, + "step": 11160 + }, + { + "epoch": 0.21482731595674606, + "grad_norm": 3131.9266768005905, + "learning_rate": 1.796093065705644e-05, + "loss": 394.6042, + "step": 11170 + }, + { + "epoch": 0.21501964121722658, + "grad_norm": 2221.285884736338, + "learning_rate": 1.7957235710094686e-05, + "loss": 381.3053, + "step": 11180 + }, + { + "epoch": 0.2152119664777071, + "grad_norm": 2451.7127486980853, + "learning_rate": 1.7953537799194042e-05, + "loss": 397.5707, + "step": 11190 + }, + { + "epoch": 0.21540429173818762, + "grad_norm": 2261.694226947071, + "learning_rate": 1.7949836925731934e-05, + "loss": 411.3794, + "step": 11200 + }, + { + "epoch": 0.21559661699866814, + "grad_norm": 2344.328939293039, + "learning_rate": 1.7946133091086858e-05, + "loss": 405.38, + "step": 11210 + }, + { + "epoch": 0.21578894225914869, + "grad_norm": 2744.2452677672222, + "learning_rate": 1.7942426296638447e-05, + "loss": 400.7316, + "step": 11220 + }, + { + "epoch": 0.2159812675196292, + "grad_norm": 2179.5410545432596, + "learning_rate": 1.7938716543767412e-05, + "loss": 395.5047, + "step": 11230 + }, + { + "epoch": 0.21617359278010972, + "grad_norm": 2932.745050424784, + "learning_rate": 1.7935003833855576e-05, + "loss": 403.2667, + "step": 11240 + }, + { + "epoch": 0.21636591804059024, + "grad_norm": 2328.6822430694315, + "learning_rate": 1.7931288168285863e-05, + "loss": 400.5162, + "step": 11250 + }, + { + "epoch": 0.21655824330107076, + "grad_norm": 2404.7254552353556, + "learning_rate": 1.79275695484423e-05, + "loss": 394.3482, + "step": 11260 + }, + { + "epoch": 0.21675056856155128, + "grad_norm": 2724.9999609740257, + "learning_rate": 1.7923847975710003e-05, + "loss": 400.9097, + "step": 11270 + }, + { + "epoch": 0.21694289382203183, + "grad_norm": 2676.729401971507, + "learning_rate": 1.7920123451475203e-05, + "loss": 408.8387, + "step": 11280 + }, + { + "epoch": 0.21713521908251235, + "grad_norm": 2608.657703456628, + "learning_rate": 1.7916395977125227e-05, + "loss": 388.8734, + "step": 11290 + }, + { + "epoch": 0.21732754434299287, + "grad_norm": 2554.1094913876814, + "learning_rate": 1.7912665554048486e-05, + "loss": 401.6312, + "step": 11300 + }, + { + "epoch": 0.2175198696034734, + "grad_norm": 2526.7331695023895, + "learning_rate": 1.7908932183634515e-05, + "loss": 393.5017, + "step": 11310 + }, + { + "epoch": 0.2177121948639539, + "grad_norm": 2248.5706584848404, + "learning_rate": 1.790519586727392e-05, + "loss": 402.7919, + "step": 11320 + }, + { + "epoch": 0.21790452012443445, + "grad_norm": 2479.6053722356187, + "learning_rate": 1.790145660635843e-05, + "loss": 395.3233, + "step": 11330 + }, + { + "epoch": 0.21809684538491497, + "grad_norm": 2431.351989522283, + "learning_rate": 1.7897714402280844e-05, + "loss": 399.1128, + "step": 11340 + }, + { + "epoch": 0.2182891706453955, + "grad_norm": 2321.3161115556927, + "learning_rate": 1.789396925643508e-05, + "loss": 414.4136, + "step": 11350 + }, + { + "epoch": 0.218481495905876, + "grad_norm": 2433.942334044243, + "learning_rate": 1.7890221170216144e-05, + "loss": 388.3796, + "step": 11360 + }, + { + "epoch": 0.21867382116635653, + "grad_norm": 2242.8210732726634, + "learning_rate": 1.788647014502013e-05, + "loss": 387.7581, + "step": 11370 + }, + { + "epoch": 0.21886614642683708, + "grad_norm": 2310.3153801960866, + "learning_rate": 1.7882716182244242e-05, + "loss": 386.1181, + "step": 11380 + }, + { + "epoch": 0.2190584716873176, + "grad_norm": 2381.915282265231, + "learning_rate": 1.7878959283286758e-05, + "loss": 388.257, + "step": 11390 + }, + { + "epoch": 0.21925079694779812, + "grad_norm": 2586.260447381824, + "learning_rate": 1.787519944954707e-05, + "loss": 388.8421, + "step": 11400 + }, + { + "epoch": 0.21944312220827863, + "grad_norm": 2199.8917309435546, + "learning_rate": 1.7871436682425645e-05, + "loss": 391.1387, + "step": 11410 + }, + { + "epoch": 0.21963544746875915, + "grad_norm": 4909.373677771122, + "learning_rate": 1.786767098332406e-05, + "loss": 404.8643, + "step": 11420 + }, + { + "epoch": 0.2198277727292397, + "grad_norm": 2265.699700181102, + "learning_rate": 1.7863902353644972e-05, + "loss": 380.425, + "step": 11430 + }, + { + "epoch": 0.22002009798972022, + "grad_norm": 2264.2999697652035, + "learning_rate": 1.7860130794792137e-05, + "loss": 389.3438, + "step": 11440 + }, + { + "epoch": 0.22021242325020074, + "grad_norm": 2600.7568210842096, + "learning_rate": 1.7856356308170394e-05, + "loss": 404.331, + "step": 11450 + }, + { + "epoch": 0.22040474851068126, + "grad_norm": 2253.3199406105978, + "learning_rate": 1.7852578895185675e-05, + "loss": 409.1266, + "step": 11460 + }, + { + "epoch": 0.22059707377116178, + "grad_norm": 2961.2445056928277, + "learning_rate": 1.7848798557245008e-05, + "loss": 398.4247, + "step": 11470 + }, + { + "epoch": 0.22078939903164232, + "grad_norm": 2488.328905763648, + "learning_rate": 1.7845015295756506e-05, + "loss": 402.7198, + "step": 11480 + }, + { + "epoch": 0.22098172429212284, + "grad_norm": 2115.781398309855, + "learning_rate": 1.784122911212937e-05, + "loss": 388.8867, + "step": 11490 + }, + { + "epoch": 0.22117404955260336, + "grad_norm": 2136.812712503691, + "learning_rate": 1.7837440007773895e-05, + "loss": 386.4281, + "step": 11500 + }, + { + "epoch": 0.22136637481308388, + "grad_norm": 2347.8785529560864, + "learning_rate": 1.783364798410146e-05, + "loss": 387.8281, + "step": 11510 + }, + { + "epoch": 0.2215587000735644, + "grad_norm": 2228.3889903200293, + "learning_rate": 1.782985304252452e-05, + "loss": 390.3264, + "step": 11520 + }, + { + "epoch": 0.22175102533404495, + "grad_norm": 2508.0085433196264, + "learning_rate": 1.7826055184456643e-05, + "loss": 400.6156, + "step": 11530 + }, + { + "epoch": 0.22194335059452547, + "grad_norm": 2729.6083957527267, + "learning_rate": 1.7822254411312455e-05, + "loss": 397.1274, + "step": 11540 + }, + { + "epoch": 0.222135675855006, + "grad_norm": 2183.676311189381, + "learning_rate": 1.781845072450769e-05, + "loss": 385.762, + "step": 11550 + }, + { + "epoch": 0.2223280011154865, + "grad_norm": 2987.6666025107056, + "learning_rate": 1.7814644125459157e-05, + "loss": 399.8271, + "step": 11560 + }, + { + "epoch": 0.22252032637596703, + "grad_norm": 2403.9241230398407, + "learning_rate": 1.781083461558475e-05, + "loss": 390.9242, + "step": 11570 + }, + { + "epoch": 0.22271265163644757, + "grad_norm": 2532.620143806683, + "learning_rate": 1.7807022196303447e-05, + "loss": 395.1546, + "step": 11580 + }, + { + "epoch": 0.2229049768969281, + "grad_norm": 2304.3143606905132, + "learning_rate": 1.7803206869035318e-05, + "loss": 390.9656, + "step": 11590 + }, + { + "epoch": 0.2230973021574086, + "grad_norm": 2497.0977539681685, + "learning_rate": 1.7799388635201498e-05, + "loss": 396.0081, + "step": 11600 + }, + { + "epoch": 0.22328962741788913, + "grad_norm": 2137.3276684673474, + "learning_rate": 1.7795567496224226e-05, + "loss": 390.5375, + "step": 11610 + }, + { + "epoch": 0.22348195267836965, + "grad_norm": 2475.2489591013623, + "learning_rate": 1.7791743453526812e-05, + "loss": 399.8304, + "step": 11620 + }, + { + "epoch": 0.2236742779388502, + "grad_norm": 2302.8248429456294, + "learning_rate": 1.778791650853364e-05, + "loss": 388.7546, + "step": 11630 + }, + { + "epoch": 0.22386660319933072, + "grad_norm": 2541.3533131710237, + "learning_rate": 1.77840866626702e-05, + "loss": 385.4423, + "step": 11640 + }, + { + "epoch": 0.22405892845981124, + "grad_norm": 2256.7805083032003, + "learning_rate": 1.7780253917363026e-05, + "loss": 388.9178, + "step": 11650 + }, + { + "epoch": 0.22425125372029175, + "grad_norm": 2453.797965663399, + "learning_rate": 1.7776418274039767e-05, + "loss": 387.315, + "step": 11660 + }, + { + "epoch": 0.22444357898077227, + "grad_norm": 2676.6923687295252, + "learning_rate": 1.7772579734129136e-05, + "loss": 396.885, + "step": 11670 + }, + { + "epoch": 0.2246359042412528, + "grad_norm": 3086.89156146947, + "learning_rate": 1.776873829906092e-05, + "loss": 389.7317, + "step": 11680 + }, + { + "epoch": 0.22482822950173334, + "grad_norm": 2424.621620486519, + "learning_rate": 1.776489397026599e-05, + "loss": 406.4202, + "step": 11690 + }, + { + "epoch": 0.22502055476221386, + "grad_norm": 2276.7448812268194, + "learning_rate": 1.7761046749176302e-05, + "loss": 383.7106, + "step": 11700 + }, + { + "epoch": 0.22521288002269438, + "grad_norm": 2313.6311991836037, + "learning_rate": 1.7757196637224874e-05, + "loss": 391.2429, + "step": 11710 + }, + { + "epoch": 0.2254052052831749, + "grad_norm": 2341.3697884828307, + "learning_rate": 1.7753343635845817e-05, + "loss": 386.1084, + "step": 11720 + }, + { + "epoch": 0.22559753054365542, + "grad_norm": 2376.0300132140237, + "learning_rate": 1.7749487746474305e-05, + "loss": 385.0678, + "step": 11730 + }, + { + "epoch": 0.22578985580413596, + "grad_norm": 2347.218519597106, + "learning_rate": 1.7745628970546592e-05, + "loss": 389.4445, + "step": 11740 + }, + { + "epoch": 0.22598218106461648, + "grad_norm": 2687.338690227089, + "learning_rate": 1.774176730950001e-05, + "loss": 373.979, + "step": 11750 + }, + { + "epoch": 0.226174506325097, + "grad_norm": 2307.7770863294495, + "learning_rate": 1.7737902764772967e-05, + "loss": 386.6169, + "step": 11760 + }, + { + "epoch": 0.22636683158557752, + "grad_norm": 3200.131284017193, + "learning_rate": 1.773403533780494e-05, + "loss": 387.5548, + "step": 11770 + }, + { + "epoch": 0.22655915684605804, + "grad_norm": 2325.677583423592, + "learning_rate": 1.7730165030036482e-05, + "loss": 383.3013, + "step": 11780 + }, + { + "epoch": 0.2267514821065386, + "grad_norm": 2438.4287045483115, + "learning_rate": 1.772629184290922e-05, + "loss": 385.3212, + "step": 11790 + }, + { + "epoch": 0.2269438073670191, + "grad_norm": 2324.824326296251, + "learning_rate": 1.7722415777865845e-05, + "loss": 375.5994, + "step": 11800 + }, + { + "epoch": 0.22713613262749963, + "grad_norm": 2443.6408456885065, + "learning_rate": 1.771853683635014e-05, + "loss": 386.032, + "step": 11810 + }, + { + "epoch": 0.22732845788798015, + "grad_norm": 2335.801762020265, + "learning_rate": 1.7714655019806932e-05, + "loss": 393.7155, + "step": 11820 + }, + { + "epoch": 0.22752078314846066, + "grad_norm": 2395.537989034479, + "learning_rate": 1.7710770329682145e-05, + "loss": 400.2536, + "step": 11830 + }, + { + "epoch": 0.2277131084089412, + "grad_norm": 2787.2395348230343, + "learning_rate": 1.770688276742276e-05, + "loss": 390.3201, + "step": 11840 + }, + { + "epoch": 0.22790543366942173, + "grad_norm": 2710.4724119666625, + "learning_rate": 1.770299233447682e-05, + "loss": 387.9175, + "step": 11850 + }, + { + "epoch": 0.22809775892990225, + "grad_norm": 2323.435954751655, + "learning_rate": 1.769909903229346e-05, + "loss": 389.8935, + "step": 11860 + }, + { + "epoch": 0.22829008419038277, + "grad_norm": 2622.2302563890967, + "learning_rate": 1.7695202862322863e-05, + "loss": 386.7145, + "step": 11870 + }, + { + "epoch": 0.2284824094508633, + "grad_norm": 2343.815966484445, + "learning_rate": 1.769130382601629e-05, + "loss": 394.5964, + "step": 11880 + }, + { + "epoch": 0.22867473471134384, + "grad_norm": 2363.7595061816423, + "learning_rate": 1.768740192482607e-05, + "loss": 388.6387, + "step": 11890 + }, + { + "epoch": 0.22886705997182435, + "grad_norm": 2269.353120654822, + "learning_rate": 1.7683497160205595e-05, + "loss": 393.3567, + "step": 11900 + }, + { + "epoch": 0.22905938523230487, + "grad_norm": 2472.494570261814, + "learning_rate": 1.7679589533609323e-05, + "loss": 392.5933, + "step": 11910 + }, + { + "epoch": 0.2292517104927854, + "grad_norm": 2223.128949708475, + "learning_rate": 1.767567904649278e-05, + "loss": 400.179, + "step": 11920 + }, + { + "epoch": 0.2294440357532659, + "grad_norm": 2589.5018549123547, + "learning_rate": 1.7671765700312567e-05, + "loss": 392.6122, + "step": 11930 + }, + { + "epoch": 0.22963636101374646, + "grad_norm": 2111.446795298321, + "learning_rate": 1.7667849496526327e-05, + "loss": 379.3064, + "step": 11940 + }, + { + "epoch": 0.22982868627422698, + "grad_norm": 2378.717511793757, + "learning_rate": 1.7663930436592793e-05, + "loss": 389.1514, + "step": 11950 + }, + { + "epoch": 0.2300210115347075, + "grad_norm": 2346.2808680197413, + "learning_rate": 1.7660008521971744e-05, + "loss": 386.1958, + "step": 11960 + }, + { + "epoch": 0.23021333679518802, + "grad_norm": 2541.89480853922, + "learning_rate": 1.765608375412403e-05, + "loss": 383.4571, + "step": 11970 + }, + { + "epoch": 0.23040566205566854, + "grad_norm": 2160.62311131137, + "learning_rate": 1.7652156134511563e-05, + "loss": 387.4947, + "step": 11980 + }, + { + "epoch": 0.23059798731614908, + "grad_norm": 2233.5666004308027, + "learning_rate": 1.7648225664597314e-05, + "loss": 390.8314, + "step": 11990 + }, + { + "epoch": 0.2307903125766296, + "grad_norm": 2263.0223621099176, + "learning_rate": 1.764429234584532e-05, + "loss": 385.8524, + "step": 12000 + }, + { + "epoch": 0.23098263783711012, + "grad_norm": 2330.289995225945, + "learning_rate": 1.7640356179720674e-05, + "loss": 365.5703, + "step": 12010 + }, + { + "epoch": 0.23117496309759064, + "grad_norm": 2282.907061035763, + "learning_rate": 1.7636417167689538e-05, + "loss": 392.4571, + "step": 12020 + }, + { + "epoch": 0.23136728835807116, + "grad_norm": 2230.4093858725714, + "learning_rate": 1.7632475311219125e-05, + "loss": 377.4121, + "step": 12030 + }, + { + "epoch": 0.2315596136185517, + "grad_norm": 2317.6649136602114, + "learning_rate": 1.7628530611777716e-05, + "loss": 384.7892, + "step": 12040 + }, + { + "epoch": 0.23175193887903223, + "grad_norm": 2202.679921921386, + "learning_rate": 1.7624583070834646e-05, + "loss": 389.4707, + "step": 12050 + }, + { + "epoch": 0.23194426413951275, + "grad_norm": 2417.2271221250494, + "learning_rate": 1.7620632689860298e-05, + "loss": 393.0368, + "step": 12060 + }, + { + "epoch": 0.23213658939999327, + "grad_norm": 2560.741612554429, + "learning_rate": 1.761667947032614e-05, + "loss": 397.0752, + "step": 12070 + }, + { + "epoch": 0.23232891466047378, + "grad_norm": 2994.689721313683, + "learning_rate": 1.761272341370467e-05, + "loss": 382.2997, + "step": 12080 + }, + { + "epoch": 0.23252123992095433, + "grad_norm": 2260.7791320488623, + "learning_rate": 1.7608764521469456e-05, + "loss": 373.9792, + "step": 12090 + }, + { + "epoch": 0.23271356518143485, + "grad_norm": 2302.393193266844, + "learning_rate": 1.760480279509512e-05, + "loss": 382.4867, + "step": 12100 + }, + { + "epoch": 0.23290589044191537, + "grad_norm": 2327.0769193689284, + "learning_rate": 1.7600838236057342e-05, + "loss": 384.5145, + "step": 12110 + }, + { + "epoch": 0.2330982157023959, + "grad_norm": 2150.98676300647, + "learning_rate": 1.759687084583285e-05, + "loss": 378.755, + "step": 12120 + }, + { + "epoch": 0.2332905409628764, + "grad_norm": 2256.504732823715, + "learning_rate": 1.7592900625899437e-05, + "loss": 389.5562, + "step": 12130 + }, + { + "epoch": 0.23348286622335693, + "grad_norm": 2274.3014291975965, + "learning_rate": 1.7588927577735943e-05, + "loss": 375.5534, + "step": 12140 + }, + { + "epoch": 0.23367519148383747, + "grad_norm": 2114.0374629396065, + "learning_rate": 1.758495170282226e-05, + "loss": 380.2994, + "step": 12150 + }, + { + "epoch": 0.233867516744318, + "grad_norm": 2357.7964137669774, + "learning_rate": 1.7580973002639337e-05, + "loss": 388.1062, + "step": 12160 + }, + { + "epoch": 0.2340598420047985, + "grad_norm": 2272.161426274605, + "learning_rate": 1.7576991478669174e-05, + "loss": 384.5954, + "step": 12170 + }, + { + "epoch": 0.23425216726527903, + "grad_norm": 2127.431549340675, + "learning_rate": 1.7573007132394823e-05, + "loss": 381.7495, + "step": 12180 + }, + { + "epoch": 0.23444449252575955, + "grad_norm": 2494.721838509276, + "learning_rate": 1.756901996530039e-05, + "loss": 384.8348, + "step": 12190 + }, + { + "epoch": 0.2346368177862401, + "grad_norm": 2877.4404041265866, + "learning_rate": 1.7565029978871025e-05, + "loss": 400.2325, + "step": 12200 + }, + { + "epoch": 0.23482914304672062, + "grad_norm": 2557.969871364385, + "learning_rate": 1.7561037174592933e-05, + "loss": 381.2346, + "step": 12210 + }, + { + "epoch": 0.23502146830720114, + "grad_norm": 2565.029485905372, + "learning_rate": 1.7557041553953368e-05, + "loss": 374.5849, + "step": 12220 + }, + { + "epoch": 0.23521379356768166, + "grad_norm": 2581.207375088013, + "learning_rate": 1.7553043118440634e-05, + "loss": 375.8843, + "step": 12230 + }, + { + "epoch": 0.23540611882816218, + "grad_norm": 2579.923372208383, + "learning_rate": 1.7549041869544077e-05, + "loss": 378.0457, + "step": 12240 + }, + { + "epoch": 0.23559844408864272, + "grad_norm": 2827.048997734147, + "learning_rate": 1.7545037808754105e-05, + "loss": 375.2411, + "step": 12250 + }, + { + "epoch": 0.23579076934912324, + "grad_norm": 2208.4241563716687, + "learning_rate": 1.754103093756216e-05, + "loss": 364.0572, + "step": 12260 + }, + { + "epoch": 0.23598309460960376, + "grad_norm": 2012.2013360667677, + "learning_rate": 1.7537021257460732e-05, + "loss": 373.4937, + "step": 12270 + }, + { + "epoch": 0.23617541987008428, + "grad_norm": 2337.7945293318235, + "learning_rate": 1.7533008769943366e-05, + "loss": 371.4375, + "step": 12280 + }, + { + "epoch": 0.2363677451305648, + "grad_norm": 2017.197630179005, + "learning_rate": 1.7528993476504644e-05, + "loss": 375.1286, + "step": 12290 + }, + { + "epoch": 0.23656007039104535, + "grad_norm": 2208.9437121421356, + "learning_rate": 1.7524975378640198e-05, + "loss": 377.4877, + "step": 12300 + }, + { + "epoch": 0.23675239565152587, + "grad_norm": 2021.4147609550564, + "learning_rate": 1.75209544778467e-05, + "loss": 375.5848, + "step": 12310 + }, + { + "epoch": 0.23694472091200638, + "grad_norm": 2605.063531196968, + "learning_rate": 1.7516930775621873e-05, + "loss": 395.9945, + "step": 12320 + }, + { + "epoch": 0.2371370461724869, + "grad_norm": 2604.6354111838214, + "learning_rate": 1.751290427346448e-05, + "loss": 380.5949, + "step": 12330 + }, + { + "epoch": 0.23732937143296742, + "grad_norm": 2134.523800512569, + "learning_rate": 1.7508874972874325e-05, + "loss": 385.1871, + "step": 12340 + }, + { + "epoch": 0.23752169669344797, + "grad_norm": 2557.6205213871312, + "learning_rate": 1.7504842875352254e-05, + "loss": 380.2885, + "step": 12350 + }, + { + "epoch": 0.2377140219539285, + "grad_norm": 2402.018914592033, + "learning_rate": 1.7500807982400155e-05, + "loss": 376.2682, + "step": 12360 + }, + { + "epoch": 0.237906347214409, + "grad_norm": 2165.764987759702, + "learning_rate": 1.749677029552097e-05, + "loss": 371.1269, + "step": 12370 + }, + { + "epoch": 0.23809867247488953, + "grad_norm": 2072.7914361726002, + "learning_rate": 1.7492729816218656e-05, + "loss": 378.911, + "step": 12380 + }, + { + "epoch": 0.23829099773537005, + "grad_norm": 2397.20874623808, + "learning_rate": 1.7488686545998237e-05, + "loss": 369.1646, + "step": 12390 + }, + { + "epoch": 0.2384833229958506, + "grad_norm": 2393.3178214051827, + "learning_rate": 1.7484640486365757e-05, + "loss": 377.2179, + "step": 12400 + }, + { + "epoch": 0.2386756482563311, + "grad_norm": 2623.916253511651, + "learning_rate": 1.748059163882831e-05, + "loss": 383.2641, + "step": 12410 + }, + { + "epoch": 0.23886797351681163, + "grad_norm": 2152.0668589588495, + "learning_rate": 1.747654000489402e-05, + "loss": 373.3912, + "step": 12420 + }, + { + "epoch": 0.23906029877729215, + "grad_norm": 2530.02619281479, + "learning_rate": 1.7472485586072062e-05, + "loss": 373.3627, + "step": 12430 + }, + { + "epoch": 0.23925262403777267, + "grad_norm": 2508.9927435530876, + "learning_rate": 1.746842838387264e-05, + "loss": 369.6259, + "step": 12440 + }, + { + "epoch": 0.23944494929825322, + "grad_norm": 3179.2708015154562, + "learning_rate": 1.746436839980698e-05, + "loss": 384.0702, + "step": 12450 + }, + { + "epoch": 0.23963727455873374, + "grad_norm": 2410.888904472343, + "learning_rate": 1.7460305635387372e-05, + "loss": 382.8646, + "step": 12460 + }, + { + "epoch": 0.23982959981921426, + "grad_norm": 2469.982232789963, + "learning_rate": 1.745624009212713e-05, + "loss": 381.0362, + "step": 12470 + }, + { + "epoch": 0.24002192507969478, + "grad_norm": 2569.9909175390508, + "learning_rate": 1.7452171771540593e-05, + "loss": 383.5919, + "step": 12480 + }, + { + "epoch": 0.2402142503401753, + "grad_norm": 2249.847929516714, + "learning_rate": 1.744810067514315e-05, + "loss": 376.9287, + "step": 12490 + }, + { + "epoch": 0.24040657560065584, + "grad_norm": 2038.242923459335, + "learning_rate": 1.7444026804451214e-05, + "loss": 373.5053, + "step": 12500 + }, + { + "epoch": 0.24059890086113636, + "grad_norm": 2250.1997887785706, + "learning_rate": 1.7439950160982236e-05, + "loss": 375.1043, + "step": 12510 + }, + { + "epoch": 0.24079122612161688, + "grad_norm": 2219.2837718383635, + "learning_rate": 1.7435870746254697e-05, + "loss": 376.5237, + "step": 12520 + }, + { + "epoch": 0.2409835513820974, + "grad_norm": 2281.0283481339156, + "learning_rate": 1.7431788561788116e-05, + "loss": 372.6687, + "step": 12530 + }, + { + "epoch": 0.24117587664257792, + "grad_norm": 2078.6147727217976, + "learning_rate": 1.742770360910303e-05, + "loss": 384.8385, + "step": 12540 + }, + { + "epoch": 0.24136820190305847, + "grad_norm": 2162.7392171748047, + "learning_rate": 1.7423615889721027e-05, + "loss": 372.5577, + "step": 12550 + }, + { + "epoch": 0.24156052716353899, + "grad_norm": 2386.920379603306, + "learning_rate": 1.7419525405164705e-05, + "loss": 389.3319, + "step": 12560 + }, + { + "epoch": 0.2417528524240195, + "grad_norm": 2756.8727323904436, + "learning_rate": 1.7415432156957713e-05, + "loss": 379.5574, + "step": 12570 + }, + { + "epoch": 0.24194517768450002, + "grad_norm": 2099.2502349035385, + "learning_rate": 1.741133614662471e-05, + "loss": 378.9946, + "step": 12580 + }, + { + "epoch": 0.24213750294498054, + "grad_norm": 2380.329571350227, + "learning_rate": 1.7407237375691394e-05, + "loss": 390.7249, + "step": 12590 + }, + { + "epoch": 0.24232982820546106, + "grad_norm": 2095.2206685250503, + "learning_rate": 1.740313584568449e-05, + "loss": 372.3252, + "step": 12600 + }, + { + "epoch": 0.2425221534659416, + "grad_norm": 2217.826751681227, + "learning_rate": 1.739903155813175e-05, + "loss": 385.4555, + "step": 12610 + }, + { + "epoch": 0.24271447872642213, + "grad_norm": 2386.987243566276, + "learning_rate": 1.7394924514561955e-05, + "loss": 377.4909, + "step": 12620 + }, + { + "epoch": 0.24290680398690265, + "grad_norm": 2204.0162900759146, + "learning_rate": 1.739081471650491e-05, + "loss": 369.1897, + "step": 12630 + }, + { + "epoch": 0.24309912924738317, + "grad_norm": 2073.5013741947837, + "learning_rate": 1.7386702165491443e-05, + "loss": 383.1417, + "step": 12640 + }, + { + "epoch": 0.24329145450786369, + "grad_norm": 2362.364142856857, + "learning_rate": 1.738258686305342e-05, + "loss": 371.7277, + "step": 12650 + }, + { + "epoch": 0.24348377976834423, + "grad_norm": 2643.743376662587, + "learning_rate": 1.7378468810723713e-05, + "loss": 385.6666, + "step": 12660 + }, + { + "epoch": 0.24367610502882475, + "grad_norm": 2286.0604513845683, + "learning_rate": 1.7374348010036235e-05, + "loss": 378.0156, + "step": 12670 + }, + { + "epoch": 0.24386843028930527, + "grad_norm": 2273.380518491826, + "learning_rate": 1.7370224462525916e-05, + "loss": 376.7685, + "step": 12680 + }, + { + "epoch": 0.2440607555497858, + "grad_norm": 2583.2678746445936, + "learning_rate": 1.736609816972871e-05, + "loss": 381.597, + "step": 12690 + }, + { + "epoch": 0.2442530808102663, + "grad_norm": 2175.324993896793, + "learning_rate": 1.7361969133181585e-05, + "loss": 361.4785, + "step": 12700 + }, + { + "epoch": 0.24444540607074686, + "grad_norm": 2113.7376989641184, + "learning_rate": 1.735783735442255e-05, + "loss": 377.5192, + "step": 12710 + }, + { + "epoch": 0.24463773133122738, + "grad_norm": 2136.1028577925226, + "learning_rate": 1.7353702834990617e-05, + "loss": 397.3817, + "step": 12720 + }, + { + "epoch": 0.2448300565917079, + "grad_norm": 2353.5458717950337, + "learning_rate": 1.7349565576425828e-05, + "loss": 360.6521, + "step": 12730 + }, + { + "epoch": 0.24502238185218841, + "grad_norm": 2289.400301786185, + "learning_rate": 1.7345425580269245e-05, + "loss": 378.4829, + "step": 12740 + }, + { + "epoch": 0.24521470711266893, + "grad_norm": 2301.400052972841, + "learning_rate": 1.734128284806295e-05, + "loss": 372.5589, + "step": 12750 + }, + { + "epoch": 0.24540703237314948, + "grad_norm": 2392.7181586747943, + "learning_rate": 1.7337137381350033e-05, + "loss": 373.6964, + "step": 12760 + }, + { + "epoch": 0.24559935763363, + "grad_norm": 2587.9513884335347, + "learning_rate": 1.7332989181674623e-05, + "loss": 363.099, + "step": 12770 + }, + { + "epoch": 0.24579168289411052, + "grad_norm": 2304.8713947992815, + "learning_rate": 1.7328838250581846e-05, + "loss": 370.9114, + "step": 12780 + }, + { + "epoch": 0.24598400815459104, + "grad_norm": 2353.5580929841517, + "learning_rate": 1.7324684589617862e-05, + "loss": 367.49, + "step": 12790 + }, + { + "epoch": 0.24617633341507156, + "grad_norm": 2552.8687113219935, + "learning_rate": 1.7320528200329846e-05, + "loss": 377.0836, + "step": 12800 + }, + { + "epoch": 0.2463686586755521, + "grad_norm": 2277.3914450397547, + "learning_rate": 1.7316369084265973e-05, + "loss": 363.0377, + "step": 12810 + }, + { + "epoch": 0.24656098393603262, + "grad_norm": 2229.9469580267078, + "learning_rate": 1.731220724297545e-05, + "loss": 361.0781, + "step": 12820 + }, + { + "epoch": 0.24675330919651314, + "grad_norm": 2290.8165063255447, + "learning_rate": 1.730804267800849e-05, + "loss": 373.4446, + "step": 12830 + }, + { + "epoch": 0.24694563445699366, + "grad_norm": 2501.9933224803535, + "learning_rate": 1.7303875390916338e-05, + "loss": 362.5407, + "step": 12840 + }, + { + "epoch": 0.24713795971747418, + "grad_norm": 2295.3147239736013, + "learning_rate": 1.729970538325122e-05, + "loss": 362.7664, + "step": 12850 + }, + { + "epoch": 0.24733028497795473, + "grad_norm": 2400.941128732704, + "learning_rate": 1.7295532656566413e-05, + "loss": 378.3463, + "step": 12860 + }, + { + "epoch": 0.24752261023843525, + "grad_norm": 2241.701808611237, + "learning_rate": 1.729135721241618e-05, + "loss": 366.0954, + "step": 12870 + }, + { + "epoch": 0.24771493549891577, + "grad_norm": 2342.4637468850588, + "learning_rate": 1.7287179052355803e-05, + "loss": 378.5448, + "step": 12880 + }, + { + "epoch": 0.2479072607593963, + "grad_norm": 2288.097396796908, + "learning_rate": 1.7282998177941586e-05, + "loss": 383.0611, + "step": 12890 + }, + { + "epoch": 0.2480995860198768, + "grad_norm": 2432.4276696859324, + "learning_rate": 1.7278814590730826e-05, + "loss": 365.5417, + "step": 12900 + }, + { + "epoch": 0.24829191128035735, + "grad_norm": 2339.524064330787, + "learning_rate": 1.7274628292281846e-05, + "loss": 363.7857, + "step": 12910 + }, + { + "epoch": 0.24848423654083787, + "grad_norm": 2257.424850624551, + "learning_rate": 1.727043928415397e-05, + "loss": 367.3295, + "step": 12920 + }, + { + "epoch": 0.2486765618013184, + "grad_norm": 2297.566011024205, + "learning_rate": 1.726624756790754e-05, + "loss": 374.8469, + "step": 12930 + }, + { + "epoch": 0.2488688870617989, + "grad_norm": 2268.6771212686576, + "learning_rate": 1.7262053145103893e-05, + "loss": 378.9152, + "step": 12940 + }, + { + "epoch": 0.24906121232227943, + "grad_norm": 2326.5117844320807, + "learning_rate": 1.7257856017305387e-05, + "loss": 373.2381, + "step": 12950 + }, + { + "epoch": 0.24925353758275998, + "grad_norm": 2166.3961909250083, + "learning_rate": 1.725365618607538e-05, + "loss": 363.4617, + "step": 12960 + }, + { + "epoch": 0.2494458628432405, + "grad_norm": 2123.8157116986677, + "learning_rate": 1.7249453652978244e-05, + "loss": 366.4645, + "step": 12970 + }, + { + "epoch": 0.24963818810372101, + "grad_norm": 1954.5908539168058, + "learning_rate": 1.7245248419579353e-05, + "loss": 375.104, + "step": 12980 + }, + { + "epoch": 0.24983051336420153, + "grad_norm": 2311.455331979595, + "learning_rate": 1.7241040487445082e-05, + "loss": 366.1221, + "step": 12990 + }, + { + "epoch": 0.25002283862468205, + "grad_norm": 1952.5267212095614, + "learning_rate": 1.723682985814282e-05, + "loss": 366.4078, + "step": 13000 + }, + { + "epoch": 0.2502151638851626, + "grad_norm": 2494.0820832215604, + "learning_rate": 1.7232616533240958e-05, + "loss": 370.3326, + "step": 13010 + }, + { + "epoch": 0.2504074891456431, + "grad_norm": 2318.699171466557, + "learning_rate": 1.7228400514308884e-05, + "loss": 375.1649, + "step": 13020 + }, + { + "epoch": 0.2505998144061236, + "grad_norm": 2923.064596255426, + "learning_rate": 1.7224181802917003e-05, + "loss": 365.9617, + "step": 13030 + }, + { + "epoch": 0.2507921396666042, + "grad_norm": 2235.8162398618665, + "learning_rate": 1.721996040063671e-05, + "loss": 371.2755, + "step": 13040 + }, + { + "epoch": 0.2509844649270847, + "grad_norm": 3402.491641507786, + "learning_rate": 1.7215736309040408e-05, + "loss": 364.7011, + "step": 13050 + }, + { + "epoch": 0.2511767901875652, + "grad_norm": 2190.4111841868817, + "learning_rate": 1.7211509529701507e-05, + "loss": 364.3978, + "step": 13060 + }, + { + "epoch": 0.25136911544804574, + "grad_norm": 2182.905249871719, + "learning_rate": 1.7207280064194403e-05, + "loss": 370.612, + "step": 13070 + }, + { + "epoch": 0.25156144070852626, + "grad_norm": 2412.7129913277554, + "learning_rate": 1.7203047914094514e-05, + "loss": 363.9965, + "step": 13080 + }, + { + "epoch": 0.2517537659690068, + "grad_norm": 2262.078866173598, + "learning_rate": 1.7198813080978235e-05, + "loss": 368.0875, + "step": 13090 + }, + { + "epoch": 0.2519460912294873, + "grad_norm": 2141.2010465395024, + "learning_rate": 1.7194575566422975e-05, + "loss": 372.1631, + "step": 13100 + }, + { + "epoch": 0.2521384164899678, + "grad_norm": 2236.876871069789, + "learning_rate": 1.719033537200714e-05, + "loss": 357.9208, + "step": 13110 + }, + { + "epoch": 0.25233074175044834, + "grad_norm": 2368.896756757804, + "learning_rate": 1.7186092499310133e-05, + "loss": 370.6848, + "step": 13120 + }, + { + "epoch": 0.25252306701092886, + "grad_norm": 2034.295919620777, + "learning_rate": 1.7181846949912347e-05, + "loss": 365.5694, + "step": 13130 + }, + { + "epoch": 0.25271539227140943, + "grad_norm": 2112.365699259346, + "learning_rate": 1.717759872539519e-05, + "loss": 357.8564, + "step": 13140 + }, + { + "epoch": 0.25290771753188995, + "grad_norm": 2236.7844958234014, + "learning_rate": 1.7173347827341046e-05, + "loss": 363.274, + "step": 13150 + }, + { + "epoch": 0.25310004279237047, + "grad_norm": 2131.5027472197603, + "learning_rate": 1.7169094257333307e-05, + "loss": 377.5089, + "step": 13160 + }, + { + "epoch": 0.253292368052851, + "grad_norm": 2201.601387732003, + "learning_rate": 1.716483801695636e-05, + "loss": 372.2224, + "step": 13170 + }, + { + "epoch": 0.2534846933133315, + "grad_norm": 2270.5089754773767, + "learning_rate": 1.7160579107795587e-05, + "loss": 371.2687, + "step": 13180 + }, + { + "epoch": 0.25367701857381203, + "grad_norm": 2087.7274790080764, + "learning_rate": 1.715631753143735e-05, + "loss": 361.2863, + "step": 13190 + }, + { + "epoch": 0.25386934383429255, + "grad_norm": 2203.274867124545, + "learning_rate": 1.715205328946903e-05, + "loss": 375.0043, + "step": 13200 + }, + { + "epoch": 0.25406166909477307, + "grad_norm": 2183.0265198187403, + "learning_rate": 1.7147786383478978e-05, + "loss": 365.3814, + "step": 13210 + }, + { + "epoch": 0.2542539943552536, + "grad_norm": 2071.064381032265, + "learning_rate": 1.7143516815056545e-05, + "loss": 373.6266, + "step": 13220 + }, + { + "epoch": 0.2544463196157341, + "grad_norm": 2120.748161443223, + "learning_rate": 1.713924458579208e-05, + "loss": 381.5468, + "step": 13230 + }, + { + "epoch": 0.2546386448762147, + "grad_norm": 2381.4264809998035, + "learning_rate": 1.7134969697276912e-05, + "loss": 361.3351, + "step": 13240 + }, + { + "epoch": 0.2548309701366952, + "grad_norm": 2204.218987668362, + "learning_rate": 1.7130692151103373e-05, + "loss": 360.3402, + "step": 13250 + }, + { + "epoch": 0.2550232953971757, + "grad_norm": 2287.9938308956316, + "learning_rate": 1.7126411948864776e-05, + "loss": 361.9453, + "step": 13260 + }, + { + "epoch": 0.25521562065765624, + "grad_norm": 2352.708402483653, + "learning_rate": 1.7122129092155422e-05, + "loss": 372.9736, + "step": 13270 + }, + { + "epoch": 0.25540794591813676, + "grad_norm": 2365.0151964818424, + "learning_rate": 1.7117843582570608e-05, + "loss": 363.5916, + "step": 13280 + }, + { + "epoch": 0.2556002711786173, + "grad_norm": 2428.924702841836, + "learning_rate": 1.711355542170661e-05, + "loss": 354.8736, + "step": 13290 + }, + { + "epoch": 0.2557925964390978, + "grad_norm": 2373.2535106694277, + "learning_rate": 1.710926461116071e-05, + "loss": 354.0335, + "step": 13300 + }, + { + "epoch": 0.2559849216995783, + "grad_norm": 2174.4421393852217, + "learning_rate": 1.710497115253115e-05, + "loss": 367.6362, + "step": 13310 + }, + { + "epoch": 0.25617724696005884, + "grad_norm": 2100.664711178483, + "learning_rate": 1.7100675047417178e-05, + "loss": 362.7153, + "step": 13320 + }, + { + "epoch": 0.25636957222053935, + "grad_norm": 2265.959941281571, + "learning_rate": 1.7096376297419027e-05, + "loss": 364.1679, + "step": 13330 + }, + { + "epoch": 0.2565618974810199, + "grad_norm": 2303.4915717391596, + "learning_rate": 1.70920749041379e-05, + "loss": 368.2602, + "step": 13340 + }, + { + "epoch": 0.25675422274150045, + "grad_norm": 2253.541679059977, + "learning_rate": 1.7087770869176005e-05, + "loss": 363.7776, + "step": 13350 + }, + { + "epoch": 0.25694654800198097, + "grad_norm": 2619.359812239953, + "learning_rate": 1.7083464194136517e-05, + "loss": 364.5733, + "step": 13360 + }, + { + "epoch": 0.2571388732624615, + "grad_norm": 2327.6661959592793, + "learning_rate": 1.707915488062361e-05, + "loss": 367.0563, + "step": 13370 + }, + { + "epoch": 0.257331198522942, + "grad_norm": 2210.701600130927, + "learning_rate": 1.7074842930242418e-05, + "loss": 361.3447, + "step": 13380 + }, + { + "epoch": 0.2575235237834225, + "grad_norm": 2255.424614088036, + "learning_rate": 1.7070528344599083e-05, + "loss": 366.5962, + "step": 13390 + }, + { + "epoch": 0.25771584904390304, + "grad_norm": 1939.1075943547062, + "learning_rate": 1.7066211125300713e-05, + "loss": 355.21, + "step": 13400 + }, + { + "epoch": 0.25790817430438356, + "grad_norm": 2218.0890782546844, + "learning_rate": 1.70618912739554e-05, + "loss": 354.0135, + "step": 13410 + }, + { + "epoch": 0.2581004995648641, + "grad_norm": 2228.9430765714455, + "learning_rate": 1.705756879217222e-05, + "loss": 360.8117, + "step": 13420 + }, + { + "epoch": 0.2582928248253446, + "grad_norm": 2339.181835304501, + "learning_rate": 1.7053243681561225e-05, + "loss": 391.1912, + "step": 13430 + }, + { + "epoch": 0.2584851500858251, + "grad_norm": 2942.846542274916, + "learning_rate": 1.7048915943733444e-05, + "loss": 374.7205, + "step": 13440 + }, + { + "epoch": 0.2586774753463057, + "grad_norm": 2108.8569405750263, + "learning_rate": 1.704458558030089e-05, + "loss": 361.5591, + "step": 13450 + }, + { + "epoch": 0.2588698006067862, + "grad_norm": 2173.377418923527, + "learning_rate": 1.704025259287656e-05, + "loss": 349.6717, + "step": 13460 + }, + { + "epoch": 0.25906212586726673, + "grad_norm": 2270.1040677140495, + "learning_rate": 1.7035916983074405e-05, + "loss": 365.4547, + "step": 13470 + }, + { + "epoch": 0.25925445112774725, + "grad_norm": 2191.429983903243, + "learning_rate": 1.7031578752509377e-05, + "loss": 376.328, + "step": 13480 + }, + { + "epoch": 0.2594467763882278, + "grad_norm": 2341.776015951565, + "learning_rate": 1.70272379027974e-05, + "loss": 353.0333, + "step": 13490 + }, + { + "epoch": 0.2596391016487083, + "grad_norm": 2991.992840272398, + "learning_rate": 1.7022894435555356e-05, + "loss": 364.1902, + "step": 13500 + }, + { + "epoch": 0.2598314269091888, + "grad_norm": 2251.311098855147, + "learning_rate": 1.7018548352401123e-05, + "loss": 360.6715, + "step": 13510 + }, + { + "epoch": 0.26002375216966933, + "grad_norm": 2069.2219256224384, + "learning_rate": 1.7014199654953543e-05, + "loss": 354.686, + "step": 13520 + }, + { + "epoch": 0.26021607743014985, + "grad_norm": 2339.4828322112303, + "learning_rate": 1.700984834483244e-05, + "loss": 357.82, + "step": 13530 + }, + { + "epoch": 0.26040840269063037, + "grad_norm": 2275.0942544119675, + "learning_rate": 1.7005494423658598e-05, + "loss": 361.6596, + "step": 13540 + }, + { + "epoch": 0.26060072795111094, + "grad_norm": 2423.1712189884956, + "learning_rate": 1.7001137893053782e-05, + "loss": 352.1861, + "step": 13550 + }, + { + "epoch": 0.26079305321159146, + "grad_norm": 1952.630341986935, + "learning_rate": 1.6996778754640727e-05, + "loss": 355.9204, + "step": 13560 + }, + { + "epoch": 0.260985378472072, + "grad_norm": 2803.1007945938454, + "learning_rate": 1.6992417010043144e-05, + "loss": 362.2839, + "step": 13570 + }, + { + "epoch": 0.2611777037325525, + "grad_norm": 2491.0123601699406, + "learning_rate": 1.6988052660885707e-05, + "loss": 356.3197, + "step": 13580 + }, + { + "epoch": 0.261370028993033, + "grad_norm": 2249.6902256937415, + "learning_rate": 1.6983685708794064e-05, + "loss": 351.8635, + "step": 13590 + }, + { + "epoch": 0.26156235425351354, + "grad_norm": 2154.262615559068, + "learning_rate": 1.6979316155394834e-05, + "loss": 360.5741, + "step": 13600 + }, + { + "epoch": 0.26175467951399406, + "grad_norm": 2290.60966799732, + "learning_rate": 1.6974944002315605e-05, + "loss": 352.6973, + "step": 13610 + }, + { + "epoch": 0.2619470047744746, + "grad_norm": 2141.0607880001735, + "learning_rate": 1.697056925118493e-05, + "loss": 355.3396, + "step": 13620 + }, + { + "epoch": 0.2621393300349551, + "grad_norm": 2161.2153868605674, + "learning_rate": 1.696619190363233e-05, + "loss": 372.7554, + "step": 13630 + }, + { + "epoch": 0.2623316552954356, + "grad_norm": 2306.6074489399607, + "learning_rate": 1.69618119612883e-05, + "loss": 352.0928, + "step": 13640 + }, + { + "epoch": 0.2625239805559162, + "grad_norm": 2372.277682089138, + "learning_rate": 1.695742942578429e-05, + "loss": 360.1005, + "step": 13650 + }, + { + "epoch": 0.2627163058163967, + "grad_norm": 2148.801092905007, + "learning_rate": 1.6953044298752724e-05, + "loss": 354.6568, + "step": 13660 + }, + { + "epoch": 0.26290863107687723, + "grad_norm": 2107.752353383053, + "learning_rate": 1.694865658182699e-05, + "loss": 376.3746, + "step": 13670 + }, + { + "epoch": 0.26310095633735775, + "grad_norm": 2251.879910727671, + "learning_rate": 1.6944266276641442e-05, + "loss": 359.1013, + "step": 13680 + }, + { + "epoch": 0.26329328159783827, + "grad_norm": 2280.915123621908, + "learning_rate": 1.6939873384831394e-05, + "loss": 352.8126, + "step": 13690 + }, + { + "epoch": 0.2634856068583188, + "grad_norm": 2151.763211719724, + "learning_rate": 1.6935477908033124e-05, + "loss": 365.3831, + "step": 13700 + }, + { + "epoch": 0.2636779321187993, + "grad_norm": 2219.681130064892, + "learning_rate": 1.6931079847883877e-05, + "loss": 360.9801, + "step": 13710 + }, + { + "epoch": 0.2638702573792798, + "grad_norm": 2148.437330167889, + "learning_rate": 1.692667920602186e-05, + "loss": 360.1392, + "step": 13720 + }, + { + "epoch": 0.26406258263976035, + "grad_norm": 2311.2112790663277, + "learning_rate": 1.6922275984086233e-05, + "loss": 368.1768, + "step": 13730 + }, + { + "epoch": 0.26425490790024087, + "grad_norm": 3347.3742915926946, + "learning_rate": 1.691787018371713e-05, + "loss": 359.7262, + "step": 13740 + }, + { + "epoch": 0.2644472331607214, + "grad_norm": 1958.7131328256862, + "learning_rate": 1.6913461806555635e-05, + "loss": 366.6636, + "step": 13750 + }, + { + "epoch": 0.26463955842120196, + "grad_norm": 2124.205274840122, + "learning_rate": 1.6909050854243797e-05, + "loss": 361.5742, + "step": 13760 + }, + { + "epoch": 0.2648318836816825, + "grad_norm": 2196.083347899615, + "learning_rate": 1.690463732842462e-05, + "loss": 358.3661, + "step": 13770 + }, + { + "epoch": 0.265024208942163, + "grad_norm": 2099.8495510558323, + "learning_rate": 1.6900221230742073e-05, + "loss": 363.142, + "step": 13780 + }, + { + "epoch": 0.2652165342026435, + "grad_norm": 2287.4135454319503, + "learning_rate": 1.689580256284108e-05, + "loss": 371.7677, + "step": 13790 + }, + { + "epoch": 0.26540885946312404, + "grad_norm": 2159.738428224536, + "learning_rate": 1.689138132636752e-05, + "loss": 355.8717, + "step": 13800 + }, + { + "epoch": 0.26560118472360456, + "grad_norm": 2417.10382081598, + "learning_rate": 1.688695752296823e-05, + "loss": 353.2436, + "step": 13810 + }, + { + "epoch": 0.2657935099840851, + "grad_norm": 2175.4065633322334, + "learning_rate": 1.6882531154291007e-05, + "loss": 357.5786, + "step": 13820 + }, + { + "epoch": 0.2659858352445656, + "grad_norm": 2448.4463448035663, + "learning_rate": 1.6878102221984593e-05, + "loss": 366.5969, + "step": 13830 + }, + { + "epoch": 0.2661781605050461, + "grad_norm": 2311.1519905753667, + "learning_rate": 1.6873670727698702e-05, + "loss": 370.8688, + "step": 13840 + }, + { + "epoch": 0.26637048576552663, + "grad_norm": 1833.2445852137944, + "learning_rate": 1.686923667308398e-05, + "loss": 359.431, + "step": 13850 + }, + { + "epoch": 0.2665628110260072, + "grad_norm": 2311.8042821976733, + "learning_rate": 1.6864800059792057e-05, + "loss": 358.09, + "step": 13860 + }, + { + "epoch": 0.2667551362864877, + "grad_norm": 2290.26105451503, + "learning_rate": 1.686036088947548e-05, + "loss": 378.6407, + "step": 13870 + }, + { + "epoch": 0.26694746154696825, + "grad_norm": 2308.049201900373, + "learning_rate": 1.6855919163787777e-05, + "loss": 368.1997, + "step": 13880 + }, + { + "epoch": 0.26713978680744876, + "grad_norm": 2402.727721477322, + "learning_rate": 1.6851474884383416e-05, + "loss": 352.0517, + "step": 13890 + }, + { + "epoch": 0.2673321120679293, + "grad_norm": 2126.4202684865063, + "learning_rate": 1.6847028052917814e-05, + "loss": 356.8757, + "step": 13900 + }, + { + "epoch": 0.2675244373284098, + "grad_norm": 2219.704431840823, + "learning_rate": 1.6842578671047345e-05, + "loss": 351.6863, + "step": 13910 + }, + { + "epoch": 0.2677167625888903, + "grad_norm": 2165.459820766716, + "learning_rate": 1.683812674042933e-05, + "loss": 343.2167, + "step": 13920 + }, + { + "epoch": 0.26790908784937084, + "grad_norm": 1973.858326095211, + "learning_rate": 1.683367226272204e-05, + "loss": 354.5079, + "step": 13930 + }, + { + "epoch": 0.26810141310985136, + "grad_norm": 2009.8594355693149, + "learning_rate": 1.6829215239584695e-05, + "loss": 359.1503, + "step": 13940 + }, + { + "epoch": 0.2682937383703319, + "grad_norm": 2476.9372914391574, + "learning_rate": 1.6824755672677458e-05, + "loss": 359.3496, + "step": 13950 + }, + { + "epoch": 0.26848606363081245, + "grad_norm": 2533.3646696574024, + "learning_rate": 1.682029356366145e-05, + "loss": 357.923, + "step": 13960 + }, + { + "epoch": 0.268678388891293, + "grad_norm": 1972.045103103336, + "learning_rate": 1.6815828914198732e-05, + "loss": 343.1023, + "step": 13970 + }, + { + "epoch": 0.2688707141517735, + "grad_norm": 2168.6922610639886, + "learning_rate": 1.6811361725952308e-05, + "loss": 369.1714, + "step": 13980 + }, + { + "epoch": 0.269063039412254, + "grad_norm": 2238.709758857822, + "learning_rate": 1.6806892000586135e-05, + "loss": 356.6833, + "step": 13990 + }, + { + "epoch": 0.26925536467273453, + "grad_norm": 2216.9579415444555, + "learning_rate": 1.6802419739765114e-05, + "loss": 356.4481, + "step": 14000 + }, + { + "epoch": 0.26944768993321505, + "grad_norm": 2411.948012072841, + "learning_rate": 1.679794494515508e-05, + "loss": 357.2391, + "step": 14010 + }, + { + "epoch": 0.26964001519369557, + "grad_norm": 2333.3106707951483, + "learning_rate": 1.6793467618422828e-05, + "loss": 351.3738, + "step": 14020 + }, + { + "epoch": 0.2698323404541761, + "grad_norm": 2344.896906114102, + "learning_rate": 1.6788987761236088e-05, + "loss": 359.5713, + "step": 14030 + }, + { + "epoch": 0.2700246657146566, + "grad_norm": 2047.8087364894836, + "learning_rate": 1.6784505375263533e-05, + "loss": 351.5925, + "step": 14040 + }, + { + "epoch": 0.27021699097513713, + "grad_norm": 2289.9871298987428, + "learning_rate": 1.678002046217477e-05, + "loss": 354.071, + "step": 14050 + }, + { + "epoch": 0.2704093162356177, + "grad_norm": 2297.079878289623, + "learning_rate": 1.6775533023640363e-05, + "loss": 382.4293, + "step": 14060 + }, + { + "epoch": 0.2706016414960982, + "grad_norm": 2707.1323362802527, + "learning_rate": 1.6771043061331806e-05, + "loss": 379.08, + "step": 14070 + }, + { + "epoch": 0.27079396675657874, + "grad_norm": 2140.7356612219182, + "learning_rate": 1.6766550576921533e-05, + "loss": 350.7611, + "step": 14080 + }, + { + "epoch": 0.27098629201705926, + "grad_norm": 2148.0099854408904, + "learning_rate": 1.676205557208293e-05, + "loss": 361.6202, + "step": 14090 + }, + { + "epoch": 0.2711786172775398, + "grad_norm": 2315.1241871264983, + "learning_rate": 1.67575580484903e-05, + "loss": 341.0929, + "step": 14100 + }, + { + "epoch": 0.2713709425380203, + "grad_norm": 2316.589255783513, + "learning_rate": 1.6753058007818906e-05, + "loss": 360.6659, + "step": 14110 + }, + { + "epoch": 0.2715632677985008, + "grad_norm": 1899.0355660504529, + "learning_rate": 1.674855545174493e-05, + "loss": 351.1972, + "step": 14120 + }, + { + "epoch": 0.27175559305898134, + "grad_norm": 2222.084557494, + "learning_rate": 1.6744050381945507e-05, + "loss": 361.3624, + "step": 14130 + }, + { + "epoch": 0.27194791831946186, + "grad_norm": 2264.881700045289, + "learning_rate": 1.67395428000987e-05, + "loss": 352.5764, + "step": 14140 + }, + { + "epoch": 0.2721402435799424, + "grad_norm": 2189.680143711977, + "learning_rate": 1.6735032707883502e-05, + "loss": 355.9011, + "step": 14150 + }, + { + "epoch": 0.27233256884042295, + "grad_norm": 1972.6086135796374, + "learning_rate": 1.6730520106979855e-05, + "loss": 341.7274, + "step": 14160 + }, + { + "epoch": 0.27252489410090347, + "grad_norm": 2110.300745632065, + "learning_rate": 1.672600499906863e-05, + "loss": 353.6648, + "step": 14170 + }, + { + "epoch": 0.272717219361384, + "grad_norm": 2204.052763725582, + "learning_rate": 1.6721487385831622e-05, + "loss": 357.5317, + "step": 14180 + }, + { + "epoch": 0.2729095446218645, + "grad_norm": 2041.8102406745647, + "learning_rate": 1.6716967268951574e-05, + "loss": 358.4835, + "step": 14190 + }, + { + "epoch": 0.273101869882345, + "grad_norm": 2005.8114284987464, + "learning_rate": 1.6712444650112152e-05, + "loss": 362.9404, + "step": 14200 + }, + { + "epoch": 0.27329419514282555, + "grad_norm": 2412.752289840888, + "learning_rate": 1.6707919530997956e-05, + "loss": 363.9804, + "step": 14210 + }, + { + "epoch": 0.27348652040330607, + "grad_norm": 2471.0214667614428, + "learning_rate": 1.6703391913294524e-05, + "loss": 354.7934, + "step": 14220 + }, + { + "epoch": 0.2736788456637866, + "grad_norm": 2675.282545326385, + "learning_rate": 1.6698861798688312e-05, + "loss": 360.2001, + "step": 14230 + }, + { + "epoch": 0.2738711709242671, + "grad_norm": 2087.417377825284, + "learning_rate": 1.6694329188866717e-05, + "loss": 358.0862, + "step": 14240 + }, + { + "epoch": 0.2740634961847476, + "grad_norm": 2181.953738189635, + "learning_rate": 1.6689794085518057e-05, + "loss": 345.6908, + "step": 14250 + }, + { + "epoch": 0.27425582144522814, + "grad_norm": 2214.301271829383, + "learning_rate": 1.668525649033159e-05, + "loss": 360.2512, + "step": 14260 + }, + { + "epoch": 0.2744481467057087, + "grad_norm": 2070.4968198587185, + "learning_rate": 1.6680716404997482e-05, + "loss": 351.3011, + "step": 14270 + }, + { + "epoch": 0.27464047196618924, + "grad_norm": 1970.425874835042, + "learning_rate": 1.667617383120686e-05, + "loss": 359.1019, + "step": 14280 + }, + { + "epoch": 0.27483279722666976, + "grad_norm": 1979.2190282830913, + "learning_rate": 1.667162877065174e-05, + "loss": 352.6497, + "step": 14290 + }, + { + "epoch": 0.2750251224871503, + "grad_norm": 2053.884922474694, + "learning_rate": 1.6667081225025087e-05, + "loss": 347.5574, + "step": 14300 + }, + { + "epoch": 0.2752174477476308, + "grad_norm": 2823.2874066492827, + "learning_rate": 1.666253119602079e-05, + "loss": 361.0676, + "step": 14310 + }, + { + "epoch": 0.2754097730081113, + "grad_norm": 2429.729872293584, + "learning_rate": 1.665797868533366e-05, + "loss": 345.4754, + "step": 14320 + }, + { + "epoch": 0.27560209826859183, + "grad_norm": 2564.8506157499537, + "learning_rate": 1.6653423694659433e-05, + "loss": 366.7636, + "step": 14330 + }, + { + "epoch": 0.27579442352907235, + "grad_norm": 2126.2268195384677, + "learning_rate": 1.6648866225694757e-05, + "loss": 354.3418, + "step": 14340 + }, + { + "epoch": 0.27598674878955287, + "grad_norm": 2174.4487955254194, + "learning_rate": 1.6644306280137227e-05, + "loss": 350.8759, + "step": 14350 + }, + { + "epoch": 0.2761790740500334, + "grad_norm": 2121.795515758063, + "learning_rate": 1.6639743859685336e-05, + "loss": 358.6837, + "step": 14360 + }, + { + "epoch": 0.27637139931051397, + "grad_norm": 2235.7630797497222, + "learning_rate": 1.663517896603852e-05, + "loss": 353.1709, + "step": 14370 + }, + { + "epoch": 0.2765637245709945, + "grad_norm": 2119.0977257896884, + "learning_rate": 1.6630611600897126e-05, + "loss": 352.6597, + "step": 14380 + }, + { + "epoch": 0.276756049831475, + "grad_norm": 2501.0019998363905, + "learning_rate": 1.6626041765962413e-05, + "loss": 364.0342, + "step": 14390 + }, + { + "epoch": 0.2769483750919555, + "grad_norm": 2166.2492933080384, + "learning_rate": 1.662146946293658e-05, + "loss": 354.1955, + "step": 14400 + }, + { + "epoch": 0.27714070035243604, + "grad_norm": 2126.9277228427763, + "learning_rate": 1.6616894693522727e-05, + "loss": 346.3621, + "step": 14410 + }, + { + "epoch": 0.27733302561291656, + "grad_norm": 2257.7783824903768, + "learning_rate": 1.6612317459424884e-05, + "loss": 345.0017, + "step": 14420 + }, + { + "epoch": 0.2775253508733971, + "grad_norm": 2179.224609595095, + "learning_rate": 1.6607737762347987e-05, + "loss": 352.2429, + "step": 14430 + }, + { + "epoch": 0.2777176761338776, + "grad_norm": 2067.1268379003523, + "learning_rate": 1.6603155603997908e-05, + "loss": 350.3075, + "step": 14440 + }, + { + "epoch": 0.2779100013943581, + "grad_norm": 2304.950825416585, + "learning_rate": 1.6598570986081424e-05, + "loss": 353.2081, + "step": 14450 + }, + { + "epoch": 0.27810232665483864, + "grad_norm": 14988.110072366713, + "learning_rate": 1.6593983910306225e-05, + "loss": 361.7833, + "step": 14460 + }, + { + "epoch": 0.2782946519153192, + "grad_norm": 2311.952332545013, + "learning_rate": 1.658939437838092e-05, + "loss": 345.4867, + "step": 14470 + }, + { + "epoch": 0.27848697717579973, + "grad_norm": 2068.3808973281393, + "learning_rate": 1.658480239201504e-05, + "loss": 348.6313, + "step": 14480 + }, + { + "epoch": 0.27867930243628025, + "grad_norm": 2170.969441943965, + "learning_rate": 1.6580207952919018e-05, + "loss": 342.5731, + "step": 14490 + }, + { + "epoch": 0.27887162769676077, + "grad_norm": 2021.5175783147224, + "learning_rate": 1.657561106280421e-05, + "loss": 346.9506, + "step": 14500 + }, + { + "epoch": 0.2790639529572413, + "grad_norm": 2028.6282206966307, + "learning_rate": 1.6571011723382882e-05, + "loss": 346.9975, + "step": 14510 + }, + { + "epoch": 0.2792562782177218, + "grad_norm": 2187.477258236756, + "learning_rate": 1.6566409936368207e-05, + "loss": 356.4622, + "step": 14520 + }, + { + "epoch": 0.27944860347820233, + "grad_norm": 2137.1956438430307, + "learning_rate": 1.6561805703474285e-05, + "loss": 342.7526, + "step": 14530 + }, + { + "epoch": 0.27964092873868285, + "grad_norm": 2218.8060348649406, + "learning_rate": 1.655719902641611e-05, + "loss": 352.7918, + "step": 14540 + }, + { + "epoch": 0.27983325399916337, + "grad_norm": 2144.055837550634, + "learning_rate": 1.6552589906909586e-05, + "loss": 348.8337, + "step": 14550 + }, + { + "epoch": 0.2800255792596439, + "grad_norm": 2253.7484428585017, + "learning_rate": 1.654797834667155e-05, + "loss": 336.3861, + "step": 14560 + }, + { + "epoch": 0.28021790452012446, + "grad_norm": 2236.6492524430246, + "learning_rate": 1.6543364347419714e-05, + "loss": 354.0255, + "step": 14570 + }, + { + "epoch": 0.280410229780605, + "grad_norm": 2160.2015474686873, + "learning_rate": 1.6538747910872733e-05, + "loss": 350.5711, + "step": 14580 + }, + { + "epoch": 0.2806025550410855, + "grad_norm": 2135.825692923429, + "learning_rate": 1.6534129038750145e-05, + "loss": 351.2991, + "step": 14590 + }, + { + "epoch": 0.280794880301566, + "grad_norm": 2102.410992561339, + "learning_rate": 1.65295077327724e-05, + "loss": 347.3173, + "step": 14600 + }, + { + "epoch": 0.28098720556204654, + "grad_norm": 1960.491867519005, + "learning_rate": 1.652488399466087e-05, + "loss": 338.7215, + "step": 14610 + }, + { + "epoch": 0.28117953082252706, + "grad_norm": 2216.935971908477, + "learning_rate": 1.6520257826137807e-05, + "loss": 355.5081, + "step": 14620 + }, + { + "epoch": 0.2813718560830076, + "grad_norm": 2545.0622226305786, + "learning_rate": 1.6515629228926396e-05, + "loss": 357.2568, + "step": 14630 + }, + { + "epoch": 0.2815641813434881, + "grad_norm": 2415.123601846704, + "learning_rate": 1.6510998204750702e-05, + "loss": 364.0826, + "step": 14640 + }, + { + "epoch": 0.2817565066039686, + "grad_norm": 2157.644372041305, + "learning_rate": 1.650636475533571e-05, + "loss": 349.2333, + "step": 14650 + }, + { + "epoch": 0.28194883186444913, + "grad_norm": 2045.6376577360497, + "learning_rate": 1.6501728882407305e-05, + "loss": 350.3113, + "step": 14660 + }, + { + "epoch": 0.28214115712492965, + "grad_norm": 2119.425827077217, + "learning_rate": 1.649709058769227e-05, + "loss": 338.2437, + "step": 14670 + }, + { + "epoch": 0.28233348238541023, + "grad_norm": 2559.4017979471387, + "learning_rate": 1.6492449872918293e-05, + "loss": 337.4597, + "step": 14680 + }, + { + "epoch": 0.28252580764589075, + "grad_norm": 2034.6266755131946, + "learning_rate": 1.6487806739813966e-05, + "loss": 343.1722, + "step": 14690 + }, + { + "epoch": 0.28271813290637127, + "grad_norm": 2027.6246653423716, + "learning_rate": 1.6483161190108778e-05, + "loss": 348.8538, + "step": 14700 + }, + { + "epoch": 0.2829104581668518, + "grad_norm": 2391.058467009256, + "learning_rate": 1.6478513225533117e-05, + "loss": 354.9481, + "step": 14710 + }, + { + "epoch": 0.2831027834273323, + "grad_norm": 1935.4540753060578, + "learning_rate": 1.647386284781828e-05, + "loss": 353.477, + "step": 14720 + }, + { + "epoch": 0.2832951086878128, + "grad_norm": 1900.7454901121991, + "learning_rate": 1.6469210058696448e-05, + "loss": 356.1835, + "step": 14730 + }, + { + "epoch": 0.28348743394829334, + "grad_norm": 2178.9793520560766, + "learning_rate": 1.646455485990071e-05, + "loss": 344.4227, + "step": 14740 + }, + { + "epoch": 0.28367975920877386, + "grad_norm": 2317.573864365449, + "learning_rate": 1.645989725316506e-05, + "loss": 342.7161, + "step": 14750 + }, + { + "epoch": 0.2838720844692544, + "grad_norm": 2148.3270520104556, + "learning_rate": 1.6455237240224364e-05, + "loss": 341.3651, + "step": 14760 + }, + { + "epoch": 0.2840644097297349, + "grad_norm": 1890.0661872101743, + "learning_rate": 1.6450574822814412e-05, + "loss": 348.7713, + "step": 14770 + }, + { + "epoch": 0.2842567349902155, + "grad_norm": 2111.166658494511, + "learning_rate": 1.6445910002671872e-05, + "loss": 346.0004, + "step": 14780 + }, + { + "epoch": 0.284449060250696, + "grad_norm": 2182.8152442404066, + "learning_rate": 1.644124278153431e-05, + "loss": 349.5256, + "step": 14790 + }, + { + "epoch": 0.2846413855111765, + "grad_norm": 2174.55236921231, + "learning_rate": 1.64365731611402e-05, + "loss": 351.512, + "step": 14800 + }, + { + "epoch": 0.28483371077165703, + "grad_norm": 1925.2918978791486, + "learning_rate": 1.6431901143228888e-05, + "loss": 341.6268, + "step": 14810 + }, + { + "epoch": 0.28502603603213755, + "grad_norm": 2174.7874966646423, + "learning_rate": 1.6427226729540623e-05, + "loss": 340.9609, + "step": 14820 + }, + { + "epoch": 0.2852183612926181, + "grad_norm": 2134.1172658835803, + "learning_rate": 1.6422549921816556e-05, + "loss": 351.6038, + "step": 14830 + }, + { + "epoch": 0.2854106865530986, + "grad_norm": 2217.3516934749564, + "learning_rate": 1.641787072179871e-05, + "loss": 341.5353, + "step": 14840 + }, + { + "epoch": 0.2856030118135791, + "grad_norm": 2223.8972668867937, + "learning_rate": 1.6413189131230022e-05, + "loss": 341.5969, + "step": 14850 + }, + { + "epoch": 0.28579533707405963, + "grad_norm": 2054.9513926731647, + "learning_rate": 1.6408505151854292e-05, + "loss": 342.8476, + "step": 14860 + }, + { + "epoch": 0.28598766233454015, + "grad_norm": 2009.6768638747901, + "learning_rate": 1.6403818785416236e-05, + "loss": 339.1113, + "step": 14870 + }, + { + "epoch": 0.2861799875950207, + "grad_norm": 2053.080049113835, + "learning_rate": 1.6399130033661444e-05, + "loss": 357.4447, + "step": 14880 + }, + { + "epoch": 0.28637231285550124, + "grad_norm": 2127.5793168709356, + "learning_rate": 1.6394438898336402e-05, + "loss": 347.4203, + "step": 14890 + }, + { + "epoch": 0.28656463811598176, + "grad_norm": 2410.0591550511567, + "learning_rate": 1.6389745381188475e-05, + "loss": 353.4183, + "step": 14900 + }, + { + "epoch": 0.2867569633764623, + "grad_norm": 1984.7275396999785, + "learning_rate": 1.6385049483965926e-05, + "loss": 339.8347, + "step": 14910 + }, + { + "epoch": 0.2869492886369428, + "grad_norm": 2272.169987853921, + "learning_rate": 1.6380351208417897e-05, + "loss": 345.1773, + "step": 14920 + }, + { + "epoch": 0.2871416138974233, + "grad_norm": 2174.804837594622, + "learning_rate": 1.6375650556294417e-05, + "loss": 341.4035, + "step": 14930 + }, + { + "epoch": 0.28733393915790384, + "grad_norm": 2274.165185256845, + "learning_rate": 1.6370947529346404e-05, + "loss": 346.0246, + "step": 14940 + }, + { + "epoch": 0.28752626441838436, + "grad_norm": 2070.155956987362, + "learning_rate": 1.6366242129325652e-05, + "loss": 347.7661, + "step": 14950 + }, + { + "epoch": 0.2877185896788649, + "grad_norm": 1993.8221844616796, + "learning_rate": 1.636153435798485e-05, + "loss": 352.0007, + "step": 14960 + }, + { + "epoch": 0.2879109149393454, + "grad_norm": 2197.798183522224, + "learning_rate": 1.6356824217077564e-05, + "loss": 333.8231, + "step": 14970 + }, + { + "epoch": 0.28810324019982597, + "grad_norm": 2411.3087500087086, + "learning_rate": 1.6352111708358243e-05, + "loss": 343.7047, + "step": 14980 + }, + { + "epoch": 0.2882955654603065, + "grad_norm": 2028.8896675886378, + "learning_rate": 1.6347396833582224e-05, + "loss": 348.8022, + "step": 14990 + }, + { + "epoch": 0.288487890720787, + "grad_norm": 2053.5154042317085, + "learning_rate": 1.634267959450571e-05, + "loss": 339.8348, + "step": 15000 + }, + { + "epoch": 0.28868021598126753, + "grad_norm": 2449.9333994756007, + "learning_rate": 1.63379599928858e-05, + "loss": 354.8625, + "step": 15010 + }, + { + "epoch": 0.28887254124174805, + "grad_norm": 1991.0620960043839, + "learning_rate": 1.6333238030480473e-05, + "loss": 349.4698, + "step": 15020 + }, + { + "epoch": 0.28906486650222857, + "grad_norm": 2156.6215816389918, + "learning_rate": 1.6328513709048573e-05, + "loss": 336.3499, + "step": 15030 + }, + { + "epoch": 0.2892571917627091, + "grad_norm": 2159.9912011033466, + "learning_rate": 1.6323787030349833e-05, + "loss": 343.4849, + "step": 15040 + }, + { + "epoch": 0.2894495170231896, + "grad_norm": 2466.2459075722577, + "learning_rate": 1.6319057996144868e-05, + "loss": 348.7814, + "step": 15050 + }, + { + "epoch": 0.2896418422836701, + "grad_norm": 2103.788500815738, + "learning_rate": 1.631432660819516e-05, + "loss": 335.7118, + "step": 15060 + }, + { + "epoch": 0.28983416754415064, + "grad_norm": 2249.819119280314, + "learning_rate": 1.6309592868263075e-05, + "loss": 344.7883, + "step": 15070 + }, + { + "epoch": 0.2900264928046312, + "grad_norm": 2113.4292951546504, + "learning_rate": 1.630485677811185e-05, + "loss": 349.2526, + "step": 15080 + }, + { + "epoch": 0.29021881806511174, + "grad_norm": 2016.642538768806, + "learning_rate": 1.630011833950561e-05, + "loss": 343.9141, + "step": 15090 + }, + { + "epoch": 0.29041114332559226, + "grad_norm": 2323.251189811375, + "learning_rate": 1.6295377554209338e-05, + "loss": 357.7049, + "step": 15100 + }, + { + "epoch": 0.2906034685860728, + "grad_norm": 2313.839921820489, + "learning_rate": 1.6290634423988897e-05, + "loss": 347.8679, + "step": 15110 + }, + { + "epoch": 0.2907957938465533, + "grad_norm": 1922.0937291669225, + "learning_rate": 1.6285888950611023e-05, + "loss": 350.3313, + "step": 15120 + }, + { + "epoch": 0.2909881191070338, + "grad_norm": 2172.165611825818, + "learning_rate": 1.6281141135843334e-05, + "loss": 339.3288, + "step": 15130 + }, + { + "epoch": 0.29118044436751434, + "grad_norm": 1902.3087888659554, + "learning_rate": 1.6276390981454306e-05, + "loss": 345.3844, + "step": 15140 + }, + { + "epoch": 0.29137276962799485, + "grad_norm": 2484.71928775544, + "learning_rate": 1.6271638489213297e-05, + "loss": 354.9697, + "step": 15150 + }, + { + "epoch": 0.2915650948884754, + "grad_norm": 2314.1098835200887, + "learning_rate": 1.6266883660890527e-05, + "loss": 357.2189, + "step": 15160 + }, + { + "epoch": 0.2917574201489559, + "grad_norm": 2079.488309143478, + "learning_rate": 1.6262126498257098e-05, + "loss": 342.9789, + "step": 15170 + }, + { + "epoch": 0.2919497454094364, + "grad_norm": 2159.5246672841668, + "learning_rate": 1.625736700308497e-05, + "loss": 345.0607, + "step": 15180 + }, + { + "epoch": 0.292142070669917, + "grad_norm": 2039.1375020606483, + "learning_rate": 1.6252605177146978e-05, + "loss": 343.0139, + "step": 15190 + }, + { + "epoch": 0.2923343959303975, + "grad_norm": 2201.209101113219, + "learning_rate": 1.624784102221682e-05, + "loss": 369.8984, + "step": 15200 + }, + { + "epoch": 0.292526721190878, + "grad_norm": 2235.822789944362, + "learning_rate": 1.6243074540069067e-05, + "loss": 341.1845, + "step": 15210 + }, + { + "epoch": 0.29271904645135854, + "grad_norm": 2071.1335185407997, + "learning_rate": 1.6238305732479158e-05, + "loss": 349.0349, + "step": 15220 + }, + { + "epoch": 0.29291137171183906, + "grad_norm": 2392.5512339717075, + "learning_rate": 1.6233534601223396e-05, + "loss": 345.4016, + "step": 15230 + }, + { + "epoch": 0.2931036969723196, + "grad_norm": 2054.6089100408317, + "learning_rate": 1.6228761148078943e-05, + "loss": 346.1329, + "step": 15240 + }, + { + "epoch": 0.2932960222328001, + "grad_norm": 1999.651223568873, + "learning_rate": 1.622398537482383e-05, + "loss": 350.039, + "step": 15250 + }, + { + "epoch": 0.2934883474932806, + "grad_norm": 2452.492930003192, + "learning_rate": 1.621920728323696e-05, + "loss": 350.0799, + "step": 15260 + }, + { + "epoch": 0.29368067275376114, + "grad_norm": 2051.588893775578, + "learning_rate": 1.621442687509809e-05, + "loss": 341.6577, + "step": 15270 + }, + { + "epoch": 0.29387299801424166, + "grad_norm": 2056.5558340694497, + "learning_rate": 1.6209644152187848e-05, + "loss": 351.3471, + "step": 15280 + }, + { + "epoch": 0.29406532327472223, + "grad_norm": 2147.400731848501, + "learning_rate": 1.620485911628771e-05, + "loss": 350.3942, + "step": 15290 + }, + { + "epoch": 0.29425764853520275, + "grad_norm": 2146.700466555457, + "learning_rate": 1.6200071769180026e-05, + "loss": 328.6214, + "step": 15300 + }, + { + "epoch": 0.2944499737956833, + "grad_norm": 2055.8986477886388, + "learning_rate": 1.6195282112648007e-05, + "loss": 344.675, + "step": 15310 + }, + { + "epoch": 0.2946422990561638, + "grad_norm": 2392.27956202332, + "learning_rate": 1.6190490148475724e-05, + "loss": 337.4436, + "step": 15320 + }, + { + "epoch": 0.2948346243166443, + "grad_norm": 2050.8781501790213, + "learning_rate": 1.6185695878448094e-05, + "loss": 343.9708, + "step": 15330 + }, + { + "epoch": 0.29502694957712483, + "grad_norm": 2032.4167013905073, + "learning_rate": 1.6180899304350915e-05, + "loss": 329.2228, + "step": 15340 + }, + { + "epoch": 0.29521927483760535, + "grad_norm": 1959.2517802295768, + "learning_rate": 1.6176100427970826e-05, + "loss": 335.7417, + "step": 15350 + }, + { + "epoch": 0.29541160009808587, + "grad_norm": 2342.0114212060766, + "learning_rate": 1.6171299251095324e-05, + "loss": 341.7732, + "step": 15360 + }, + { + "epoch": 0.2956039253585664, + "grad_norm": 2192.902401093125, + "learning_rate": 1.6166495775512777e-05, + "loss": 351.5493, + "step": 15370 + }, + { + "epoch": 0.2957962506190469, + "grad_norm": 2263.4632326344313, + "learning_rate": 1.6161690003012392e-05, + "loss": 347.1629, + "step": 15380 + }, + { + "epoch": 0.2959885758795275, + "grad_norm": 2219.263283930718, + "learning_rate": 1.615688193538425e-05, + "loss": 334.7667, + "step": 15390 + }, + { + "epoch": 0.296180901140008, + "grad_norm": 2480.6173716009744, + "learning_rate": 1.615207157441927e-05, + "loss": 346.333, + "step": 15400 + }, + { + "epoch": 0.2963732264004885, + "grad_norm": 2028.1183731241908, + "learning_rate": 1.6147258921909236e-05, + "loss": 335.6766, + "step": 15410 + }, + { + "epoch": 0.29656555166096904, + "grad_norm": 2486.808921182866, + "learning_rate": 1.6142443979646774e-05, + "loss": 353.8469, + "step": 15420 + }, + { + "epoch": 0.29675787692144956, + "grad_norm": 2116.3810332652265, + "learning_rate": 1.6137626749425377e-05, + "loss": 335.7973, + "step": 15430 + }, + { + "epoch": 0.2969502021819301, + "grad_norm": 2025.8910088704392, + "learning_rate": 1.6132807233039382e-05, + "loss": 340.9344, + "step": 15440 + }, + { + "epoch": 0.2971425274424106, + "grad_norm": 2012.3189811695688, + "learning_rate": 1.612798543228398e-05, + "loss": 337.8212, + "step": 15450 + }, + { + "epoch": 0.2973348527028911, + "grad_norm": 2030.5678851637938, + "learning_rate": 1.612316134895521e-05, + "loss": 343.1717, + "step": 15460 + }, + { + "epoch": 0.29752717796337164, + "grad_norm": 2066.6076610157006, + "learning_rate": 1.611833498484997e-05, + "loss": 342.1694, + "step": 15470 + }, + { + "epoch": 0.29771950322385216, + "grad_norm": 2040.0285872280083, + "learning_rate": 1.611350634176599e-05, + "loss": 344.1515, + "step": 15480 + }, + { + "epoch": 0.29791182848433273, + "grad_norm": 1885.0376331820423, + "learning_rate": 1.6108675421501865e-05, + "loss": 346.8648, + "step": 15490 + }, + { + "epoch": 0.29810415374481325, + "grad_norm": 2378.5574491418756, + "learning_rate": 1.610384222585704e-05, + "loss": 348.066, + "step": 15500 + }, + { + "epoch": 0.29829647900529377, + "grad_norm": 2142.635413293242, + "learning_rate": 1.609900675663179e-05, + "loss": 335.4527, + "step": 15510 + }, + { + "epoch": 0.2984888042657743, + "grad_norm": 2174.693092624955, + "learning_rate": 1.609416901562725e-05, + "loss": 329.3521, + "step": 15520 + }, + { + "epoch": 0.2986811295262548, + "grad_norm": 2082.419491545453, + "learning_rate": 1.60893290046454e-05, + "loss": 338.2611, + "step": 15530 + }, + { + "epoch": 0.2988734547867353, + "grad_norm": 1990.4989218204114, + "learning_rate": 1.608448672548907e-05, + "loss": 343.3369, + "step": 15540 + }, + { + "epoch": 0.29906578004721585, + "grad_norm": 2058.5567291446737, + "learning_rate": 1.6079642179961917e-05, + "loss": 328.6461, + "step": 15550 + }, + { + "epoch": 0.29925810530769636, + "grad_norm": 2059.5328825023216, + "learning_rate": 1.6074795369868463e-05, + "loss": 344.672, + "step": 15560 + }, + { + "epoch": 0.2994504305681769, + "grad_norm": 2042.0033589316513, + "learning_rate": 1.6069946297014064e-05, + "loss": 337.5279, + "step": 15570 + }, + { + "epoch": 0.2996427558286574, + "grad_norm": 2113.1761985993344, + "learning_rate": 1.6065094963204915e-05, + "loss": 335.2345, + "step": 15580 + }, + { + "epoch": 0.2998350810891379, + "grad_norm": 2028.4594574437606, + "learning_rate": 1.6060241370248064e-05, + "loss": 340.8168, + "step": 15590 + }, + { + "epoch": 0.3000274063496185, + "grad_norm": 2071.6367684556894, + "learning_rate": 1.605538551995139e-05, + "loss": 347.7569, + "step": 15600 + }, + { + "epoch": 0.300219731610099, + "grad_norm": 2022.8654457668224, + "learning_rate": 1.6050527414123616e-05, + "loss": 337.1365, + "step": 15610 + }, + { + "epoch": 0.30041205687057954, + "grad_norm": 1984.6657992908138, + "learning_rate": 1.604566705457431e-05, + "loss": 326.2922, + "step": 15620 + }, + { + "epoch": 0.30060438213106006, + "grad_norm": 2108.2426764165125, + "learning_rate": 1.604080444311387e-05, + "loss": 332.3005, + "step": 15630 + }, + { + "epoch": 0.3007967073915406, + "grad_norm": 2417.668494699437, + "learning_rate": 1.6035939581553543e-05, + "loss": 343.7115, + "step": 15640 + }, + { + "epoch": 0.3009890326520211, + "grad_norm": 2098.9251251981427, + "learning_rate": 1.603107247170541e-05, + "loss": 350.848, + "step": 15650 + }, + { + "epoch": 0.3011813579125016, + "grad_norm": 2187.8632945932964, + "learning_rate": 1.6026203115382392e-05, + "loss": 350.0126, + "step": 15660 + }, + { + "epoch": 0.30137368317298213, + "grad_norm": 2081.12109471647, + "learning_rate": 1.6021331514398233e-05, + "loss": 331.2589, + "step": 15670 + }, + { + "epoch": 0.30156600843346265, + "grad_norm": 2026.1622593795485, + "learning_rate": 1.6016457670567535e-05, + "loss": 350.4996, + "step": 15680 + }, + { + "epoch": 0.30175833369394317, + "grad_norm": 1947.1323797000855, + "learning_rate": 1.6011581585705715e-05, + "loss": 338.5748, + "step": 15690 + }, + { + "epoch": 0.30195065895442375, + "grad_norm": 1978.9177872431721, + "learning_rate": 1.600670326162904e-05, + "loss": 345.5526, + "step": 15700 + }, + { + "epoch": 0.30214298421490426, + "grad_norm": 2065.192048660437, + "learning_rate": 1.600182270015461e-05, + "loss": 332.2462, + "step": 15710 + }, + { + "epoch": 0.3023353094753848, + "grad_norm": 2122.800141350639, + "learning_rate": 1.5996939903100338e-05, + "loss": 330.6463, + "step": 15720 + }, + { + "epoch": 0.3025276347358653, + "grad_norm": 2216.346427216386, + "learning_rate": 1.5992054872285005e-05, + "loss": 333.9136, + "step": 15730 + }, + { + "epoch": 0.3027199599963458, + "grad_norm": 1781.8287120360487, + "learning_rate": 1.5987167609528187e-05, + "loss": 341.6092, + "step": 15740 + }, + { + "epoch": 0.30291228525682634, + "grad_norm": 2240.8663635102666, + "learning_rate": 1.598227811665032e-05, + "loss": 350.1469, + "step": 15750 + }, + { + "epoch": 0.30310461051730686, + "grad_norm": 2311.8158310427875, + "learning_rate": 1.597738639547265e-05, + "loss": 329.6627, + "step": 15760 + }, + { + "epoch": 0.3032969357777874, + "grad_norm": 1823.629312620248, + "learning_rate": 1.597249244781727e-05, + "loss": 341.2837, + "step": 15770 + }, + { + "epoch": 0.3034892610382679, + "grad_norm": 2163.3408733522306, + "learning_rate": 1.5967596275507094e-05, + "loss": 344.6266, + "step": 15780 + }, + { + "epoch": 0.3036815862987484, + "grad_norm": 1933.7887376312376, + "learning_rate": 1.5962697880365863e-05, + "loss": 330.588, + "step": 15790 + }, + { + "epoch": 0.303873911559229, + "grad_norm": 1908.057850733314, + "learning_rate": 1.5957797264218145e-05, + "loss": 328.8406, + "step": 15800 + }, + { + "epoch": 0.3040662368197095, + "grad_norm": 2161.8909836142493, + "learning_rate": 1.5952894428889347e-05, + "loss": 354.0483, + "step": 15810 + }, + { + "epoch": 0.30425856208019003, + "grad_norm": 1892.9751899058763, + "learning_rate": 1.594798937620569e-05, + "loss": 329.8559, + "step": 15820 + }, + { + "epoch": 0.30445088734067055, + "grad_norm": 2106.3950913699387, + "learning_rate": 1.594308210799422e-05, + "loss": 336.1968, + "step": 15830 + }, + { + "epoch": 0.30464321260115107, + "grad_norm": 2247.289635998585, + "learning_rate": 1.5938172626082823e-05, + "loss": 342.8016, + "step": 15840 + }, + { + "epoch": 0.3048355378616316, + "grad_norm": 2085.1920447215507, + "learning_rate": 1.5933260932300192e-05, + "loss": 337.2578, + "step": 15850 + }, + { + "epoch": 0.3050278631221121, + "grad_norm": 2044.9909918871356, + "learning_rate": 1.5928347028475855e-05, + "loss": 338.7811, + "step": 15860 + }, + { + "epoch": 0.3052201883825926, + "grad_norm": 2056.4942800640515, + "learning_rate": 1.592343091644016e-05, + "loss": 334.6297, + "step": 15870 + }, + { + "epoch": 0.30541251364307315, + "grad_norm": 2279.2176455707217, + "learning_rate": 1.5918512598024275e-05, + "loss": 324.1793, + "step": 15880 + }, + { + "epoch": 0.30560483890355367, + "grad_norm": 2421.7687068141227, + "learning_rate": 1.59135920750602e-05, + "loss": 348.8277, + "step": 15890 + }, + { + "epoch": 0.30579716416403424, + "grad_norm": 2122.412821515118, + "learning_rate": 1.590866934938074e-05, + "loss": 340.6207, + "step": 15900 + }, + { + "epoch": 0.30598948942451476, + "grad_norm": 1895.899102182779, + "learning_rate": 1.590374442281953e-05, + "loss": 330.9981, + "step": 15910 + }, + { + "epoch": 0.3061818146849953, + "grad_norm": 2101.911142862094, + "learning_rate": 1.5898817297211028e-05, + "loss": 331.7635, + "step": 15920 + }, + { + "epoch": 0.3063741399454758, + "grad_norm": 2185.9010198046053, + "learning_rate": 1.589388797439051e-05, + "loss": 343.5025, + "step": 15930 + }, + { + "epoch": 0.3065664652059563, + "grad_norm": 2039.0075617015264, + "learning_rate": 1.5888956456194056e-05, + "loss": 336.8323, + "step": 15940 + }, + { + "epoch": 0.30675879046643684, + "grad_norm": 2004.7136092241692, + "learning_rate": 1.588402274445858e-05, + "loss": 351.7526, + "step": 15950 + }, + { + "epoch": 0.30695111572691736, + "grad_norm": 2021.5746304781997, + "learning_rate": 1.5879086841021815e-05, + "loss": 344.0881, + "step": 15960 + }, + { + "epoch": 0.3071434409873979, + "grad_norm": 2081.0173856942433, + "learning_rate": 1.5874148747722294e-05, + "loss": 339.0723, + "step": 15970 + }, + { + "epoch": 0.3073357662478784, + "grad_norm": 2130.4156752434787, + "learning_rate": 1.5869208466399382e-05, + "loss": 335.951, + "step": 15980 + }, + { + "epoch": 0.3075280915083589, + "grad_norm": 2292.130303399673, + "learning_rate": 1.586426599889325e-05, + "loss": 339.7939, + "step": 15990 + }, + { + "epoch": 0.3077204167688395, + "grad_norm": 2493.4941406807498, + "learning_rate": 1.5859321347044882e-05, + "loss": 347.21, + "step": 16000 + }, + { + "epoch": 0.30791274202932, + "grad_norm": 2053.740886710575, + "learning_rate": 1.5854374512696084e-05, + "loss": 342.1399, + "step": 16010 + }, + { + "epoch": 0.3081050672898005, + "grad_norm": 2447.624340403277, + "learning_rate": 1.584942549768947e-05, + "loss": 336.8734, + "step": 16020 + }, + { + "epoch": 0.30829739255028105, + "grad_norm": 2086.5396459126378, + "learning_rate": 1.584447430386846e-05, + "loss": 349.1976, + "step": 16030 + }, + { + "epoch": 0.30848971781076157, + "grad_norm": 2075.0906560218377, + "learning_rate": 1.58395209330773e-05, + "loss": 322.2554, + "step": 16040 + }, + { + "epoch": 0.3086820430712421, + "grad_norm": 2372.653710833521, + "learning_rate": 1.5834565387161034e-05, + "loss": 341.3229, + "step": 16050 + }, + { + "epoch": 0.3088743683317226, + "grad_norm": 2211.589477666567, + "learning_rate": 1.5829607667965524e-05, + "loss": 333.5114, + "step": 16060 + }, + { + "epoch": 0.3090666935922031, + "grad_norm": 2570.1561750505557, + "learning_rate": 1.5824647777337433e-05, + "loss": 358.9544, + "step": 16070 + }, + { + "epoch": 0.30925901885268364, + "grad_norm": 2180.9552364678066, + "learning_rate": 1.5819685717124245e-05, + "loss": 331.2237, + "step": 16080 + }, + { + "epoch": 0.30945134411316416, + "grad_norm": 2085.2442998980287, + "learning_rate": 1.5814721489174246e-05, + "loss": 333.0478, + "step": 16090 + }, + { + "epoch": 0.3096436693736447, + "grad_norm": 2065.1309089259485, + "learning_rate": 1.580975509533652e-05, + "loss": 339.2547, + "step": 16100 + }, + { + "epoch": 0.30983599463412526, + "grad_norm": 2234.1041916736517, + "learning_rate": 1.5804786537460972e-05, + "loss": 328.4009, + "step": 16110 + }, + { + "epoch": 0.3100283198946058, + "grad_norm": 2002.5156719897268, + "learning_rate": 1.5799815817398312e-05, + "loss": 334.0761, + "step": 16120 + }, + { + "epoch": 0.3102206451550863, + "grad_norm": 2989.9680736875594, + "learning_rate": 1.579484293700004e-05, + "loss": 339.9199, + "step": 16130 + }, + { + "epoch": 0.3104129704155668, + "grad_norm": 2140.2577985832277, + "learning_rate": 1.578986789811849e-05, + "loss": 348.1439, + "step": 16140 + }, + { + "epoch": 0.31060529567604733, + "grad_norm": 2037.7938563831885, + "learning_rate": 1.5784890702606763e-05, + "loss": 330.0218, + "step": 16150 + }, + { + "epoch": 0.31079762093652785, + "grad_norm": 2273.0544599516516, + "learning_rate": 1.5779911352318792e-05, + "loss": 335.1149, + "step": 16160 + }, + { + "epoch": 0.31098994619700837, + "grad_norm": 2158.9584996291364, + "learning_rate": 1.5774929849109303e-05, + "loss": 334.0113, + "step": 16170 + }, + { + "epoch": 0.3111822714574889, + "grad_norm": 1986.953982832818, + "learning_rate": 1.5769946194833816e-05, + "loss": 350.6712, + "step": 16180 + }, + { + "epoch": 0.3113745967179694, + "grad_norm": 2680.7694975063, + "learning_rate": 1.5764960391348666e-05, + "loss": 344.6785, + "step": 16190 + }, + { + "epoch": 0.31156692197844993, + "grad_norm": 1836.2954392767801, + "learning_rate": 1.5759972440510985e-05, + "loss": 323.0379, + "step": 16200 + }, + { + "epoch": 0.3117592472389305, + "grad_norm": 1891.9186317164206, + "learning_rate": 1.5754982344178697e-05, + "loss": 324.2129, + "step": 16210 + }, + { + "epoch": 0.311951572499411, + "grad_norm": 1858.3685193919584, + "learning_rate": 1.5749990104210534e-05, + "loss": 338.1616, + "step": 16220 + }, + { + "epoch": 0.31214389775989154, + "grad_norm": 1942.769362270575, + "learning_rate": 1.574499572246602e-05, + "loss": 331.0952, + "step": 16230 + }, + { + "epoch": 0.31233622302037206, + "grad_norm": 2391.8832959326223, + "learning_rate": 1.5739999200805483e-05, + "loss": 349.1809, + "step": 16240 + }, + { + "epoch": 0.3125285482808526, + "grad_norm": 1896.4588090836314, + "learning_rate": 1.573500054109004e-05, + "loss": 336.1771, + "step": 16250 + }, + { + "epoch": 0.3127208735413331, + "grad_norm": 1933.2231685213192, + "learning_rate": 1.5729999745181617e-05, + "loss": 333.6326, + "step": 16260 + }, + { + "epoch": 0.3129131988018136, + "grad_norm": 1957.8009081451296, + "learning_rate": 1.572499681494292e-05, + "loss": 333.6753, + "step": 16270 + }, + { + "epoch": 0.31310552406229414, + "grad_norm": 1930.2970190906028, + "learning_rate": 1.571999175223746e-05, + "loss": 329.0752, + "step": 16280 + }, + { + "epoch": 0.31329784932277466, + "grad_norm": 2199.0292486815474, + "learning_rate": 1.571498455892954e-05, + "loss": 341.7442, + "step": 16290 + }, + { + "epoch": 0.3134901745832552, + "grad_norm": 1986.7851098666854, + "learning_rate": 1.570997523688426e-05, + "loss": 340.3837, + "step": 16300 + }, + { + "epoch": 0.31368249984373575, + "grad_norm": 1964.379709960503, + "learning_rate": 1.570496378796751e-05, + "loss": 330.2182, + "step": 16310 + }, + { + "epoch": 0.31387482510421627, + "grad_norm": 2158.3643877281206, + "learning_rate": 1.5699950214045966e-05, + "loss": 342.3301, + "step": 16320 + }, + { + "epoch": 0.3140671503646968, + "grad_norm": 2159.7701323799, + "learning_rate": 1.5694934516987102e-05, + "loss": 340.2655, + "step": 16330 + }, + { + "epoch": 0.3142594756251773, + "grad_norm": 2126.4845282418337, + "learning_rate": 1.5689916698659193e-05, + "loss": 342.693, + "step": 16340 + }, + { + "epoch": 0.31445180088565783, + "grad_norm": 1911.873268297658, + "learning_rate": 1.568489676093128e-05, + "loss": 322.9534, + "step": 16350 + }, + { + "epoch": 0.31464412614613835, + "grad_norm": 1942.7507027055012, + "learning_rate": 1.5679874705673215e-05, + "loss": 336.9441, + "step": 16360 + }, + { + "epoch": 0.31483645140661887, + "grad_norm": 2114.4984494087657, + "learning_rate": 1.5674850534755628e-05, + "loss": 340.4918, + "step": 16370 + }, + { + "epoch": 0.3150287766670994, + "grad_norm": 2074.3587412918373, + "learning_rate": 1.566982425004994e-05, + "loss": 334.4541, + "step": 16380 + }, + { + "epoch": 0.3152211019275799, + "grad_norm": 2020.809113927804, + "learning_rate": 1.5664795853428357e-05, + "loss": 333.6528, + "step": 16390 + }, + { + "epoch": 0.3154134271880604, + "grad_norm": 1933.6723048394228, + "learning_rate": 1.565976534676388e-05, + "loss": 330.6611, + "step": 16400 + }, + { + "epoch": 0.315605752448541, + "grad_norm": 2222.27062645372, + "learning_rate": 1.5654732731930286e-05, + "loss": 334.5393, + "step": 16410 + }, + { + "epoch": 0.3157980777090215, + "grad_norm": 2212.9341483115013, + "learning_rate": 1.5649698010802138e-05, + "loss": 352.7397, + "step": 16420 + }, + { + "epoch": 0.31599040296950204, + "grad_norm": 1878.5701680525785, + "learning_rate": 1.564466118525479e-05, + "loss": 330.0373, + "step": 16430 + }, + { + "epoch": 0.31618272822998256, + "grad_norm": 2013.2482855645796, + "learning_rate": 1.5639622257164372e-05, + "loss": 334.0851, + "step": 16440 + }, + { + "epoch": 0.3163750534904631, + "grad_norm": 2027.427984102896, + "learning_rate": 1.5634581228407807e-05, + "loss": 321.8146, + "step": 16450 + }, + { + "epoch": 0.3165673787509436, + "grad_norm": 2079.9571648910664, + "learning_rate": 1.562953810086279e-05, + "loss": 335.0436, + "step": 16460 + }, + { + "epoch": 0.3167597040114241, + "grad_norm": 2090.528441509643, + "learning_rate": 1.562449287640781e-05, + "loss": 326.4796, + "step": 16470 + }, + { + "epoch": 0.31695202927190463, + "grad_norm": 1981.8539034822998, + "learning_rate": 1.5619445556922118e-05, + "loss": 335.8691, + "step": 16480 + }, + { + "epoch": 0.31714435453238515, + "grad_norm": 1966.4134138299942, + "learning_rate": 1.561439614428577e-05, + "loss": 339.6275, + "step": 16490 + }, + { + "epoch": 0.3173366797928657, + "grad_norm": 1907.2904507812912, + "learning_rate": 1.5609344640379585e-05, + "loss": 335.7618, + "step": 16500 + }, + { + "epoch": 0.3175290050533462, + "grad_norm": 2095.980158379129, + "learning_rate": 1.560429104708516e-05, + "loss": 326.9014, + "step": 16510 + }, + { + "epoch": 0.31772133031382677, + "grad_norm": 1960.8340623964773, + "learning_rate": 1.5599235366284874e-05, + "loss": 329.9941, + "step": 16520 + }, + { + "epoch": 0.3179136555743073, + "grad_norm": 2122.109266842947, + "learning_rate": 1.5594177599861894e-05, + "loss": 337.7616, + "step": 16530 + }, + { + "epoch": 0.3181059808347878, + "grad_norm": 2447.161113783872, + "learning_rate": 1.5589117749700147e-05, + "loss": 335.4703, + "step": 16540 + }, + { + "epoch": 0.3182983060952683, + "grad_norm": 1930.1834990275436, + "learning_rate": 1.5584055817684346e-05, + "loss": 325.2849, + "step": 16550 + }, + { + "epoch": 0.31849063135574884, + "grad_norm": 2203.4758860547086, + "learning_rate": 1.5578991805699975e-05, + "loss": 333.9773, + "step": 16560 + }, + { + "epoch": 0.31868295661622936, + "grad_norm": 2133.660235062438, + "learning_rate": 1.5573925715633297e-05, + "loss": 332.5323, + "step": 16570 + }, + { + "epoch": 0.3188752818767099, + "grad_norm": 1922.932702918146, + "learning_rate": 1.5568857549371348e-05, + "loss": 337.5257, + "step": 16580 + }, + { + "epoch": 0.3190676071371904, + "grad_norm": 2138.1516124156665, + "learning_rate": 1.5563787308801934e-05, + "loss": 335.7361, + "step": 16590 + }, + { + "epoch": 0.3192599323976709, + "grad_norm": 2088.3037741374383, + "learning_rate": 1.5558714995813636e-05, + "loss": 330.6313, + "step": 16600 + }, + { + "epoch": 0.31945225765815144, + "grad_norm": 1949.1565451246443, + "learning_rate": 1.5553640612295807e-05, + "loss": 330.558, + "step": 16610 + }, + { + "epoch": 0.319644582918632, + "grad_norm": 2117.9288843146896, + "learning_rate": 1.5548564160138572e-05, + "loss": 338.077, + "step": 16620 + }, + { + "epoch": 0.31983690817911253, + "grad_norm": 2041.70053444773, + "learning_rate": 1.5543485641232825e-05, + "loss": 325.9343, + "step": 16630 + }, + { + "epoch": 0.32002923343959305, + "grad_norm": 2192.4520615181723, + "learning_rate": 1.553840505747023e-05, + "loss": 322.8957, + "step": 16640 + }, + { + "epoch": 0.32022155870007357, + "grad_norm": 1936.7697945460645, + "learning_rate": 1.5533322410743223e-05, + "loss": 332.07, + "step": 16650 + }, + { + "epoch": 0.3204138839605541, + "grad_norm": 2078.863803346763, + "learning_rate": 1.5528237702945e-05, + "loss": 330.5714, + "step": 16660 + }, + { + "epoch": 0.3206062092210346, + "grad_norm": 23951.936369128667, + "learning_rate": 1.5523150935969534e-05, + "loss": 327.5922, + "step": 16670 + }, + { + "epoch": 0.32079853448151513, + "grad_norm": 2094.1039223778803, + "learning_rate": 1.5518062111711566e-05, + "loss": 328.2249, + "step": 16680 + }, + { + "epoch": 0.32099085974199565, + "grad_norm": 2203.447765173993, + "learning_rate": 1.5512971232066593e-05, + "loss": 326.7832, + "step": 16690 + }, + { + "epoch": 0.32118318500247617, + "grad_norm": 2267.712583511857, + "learning_rate": 1.5507878298930888e-05, + "loss": 329.2262, + "step": 16700 + }, + { + "epoch": 0.3213755102629567, + "grad_norm": 1825.498688551304, + "learning_rate": 1.5502783314201478e-05, + "loss": 333.0049, + "step": 16710 + }, + { + "epoch": 0.32156783552343726, + "grad_norm": 2131.370407651775, + "learning_rate": 1.549768627977617e-05, + "loss": 333.8423, + "step": 16720 + }, + { + "epoch": 0.3217601607839178, + "grad_norm": 2237.4984306172946, + "learning_rate": 1.5492587197553517e-05, + "loss": 339.3328, + "step": 16730 + }, + { + "epoch": 0.3219524860443983, + "grad_norm": 2015.8269770930942, + "learning_rate": 1.5487486069432848e-05, + "loss": 328.2787, + "step": 16740 + }, + { + "epoch": 0.3221448113048788, + "grad_norm": 1950.4103937897498, + "learning_rate": 1.5482382897314243e-05, + "loss": 327.7073, + "step": 16750 + }, + { + "epoch": 0.32233713656535934, + "grad_norm": 2134.0277555161397, + "learning_rate": 1.5477277683098555e-05, + "loss": 341.0341, + "step": 16760 + }, + { + "epoch": 0.32252946182583986, + "grad_norm": 2112.634876932766, + "learning_rate": 1.547217042868739e-05, + "loss": 323.5574, + "step": 16770 + }, + { + "epoch": 0.3227217870863204, + "grad_norm": 1942.7595504479032, + "learning_rate": 1.546706113598312e-05, + "loss": 322.569, + "step": 16780 + }, + { + "epoch": 0.3229141123468009, + "grad_norm": 2122.0691961567495, + "learning_rate": 1.5461949806888867e-05, + "loss": 323.3565, + "step": 16790 + }, + { + "epoch": 0.3231064376072814, + "grad_norm": 2072.3440021732094, + "learning_rate": 1.5456836443308512e-05, + "loss": 329.8078, + "step": 16800 + }, + { + "epoch": 0.32329876286776194, + "grad_norm": 2163.2838632362523, + "learning_rate": 1.545172104714671e-05, + "loss": 329.1739, + "step": 16810 + }, + { + "epoch": 0.3234910881282425, + "grad_norm": 1971.8664793007088, + "learning_rate": 1.544660362030886e-05, + "loss": 326.3791, + "step": 16820 + }, + { + "epoch": 0.32368341338872303, + "grad_norm": 2179.2026442975875, + "learning_rate": 1.544148416470111e-05, + "loss": 332.5055, + "step": 16830 + }, + { + "epoch": 0.32387573864920355, + "grad_norm": 2206.3075491996556, + "learning_rate": 1.5436362682230378e-05, + "loss": 336.3999, + "step": 16840 + }, + { + "epoch": 0.32406806390968407, + "grad_norm": 1935.5689654090077, + "learning_rate": 1.543123917480433e-05, + "loss": 332.8678, + "step": 16850 + }, + { + "epoch": 0.3242603891701646, + "grad_norm": 2012.1462183908066, + "learning_rate": 1.542611364433139e-05, + "loss": 335.3134, + "step": 16860 + }, + { + "epoch": 0.3244527144306451, + "grad_norm": 2007.0735892387465, + "learning_rate": 1.5420986092720735e-05, + "loss": 323.2146, + "step": 16870 + }, + { + "epoch": 0.3246450396911256, + "grad_norm": 1911.6239401173323, + "learning_rate": 1.541585652188229e-05, + "loss": 324.8375, + "step": 16880 + }, + { + "epoch": 0.32483736495160614, + "grad_norm": 2069.4784281287316, + "learning_rate": 1.5410724933726732e-05, + "loss": 336.6477, + "step": 16890 + }, + { + "epoch": 0.32502969021208666, + "grad_norm": 1926.7697556794687, + "learning_rate": 1.5405591330165503e-05, + "loss": 318.2225, + "step": 16900 + }, + { + "epoch": 0.3252220154725672, + "grad_norm": 2011.9796932076379, + "learning_rate": 1.5400455713110777e-05, + "loss": 332.4606, + "step": 16910 + }, + { + "epoch": 0.32541434073304776, + "grad_norm": 2013.159557686021, + "learning_rate": 1.539531808447549e-05, + "loss": 336.5361, + "step": 16920 + }, + { + "epoch": 0.3256066659935283, + "grad_norm": 2070.4194640024884, + "learning_rate": 1.5390178446173325e-05, + "loss": 333.021, + "step": 16930 + }, + { + "epoch": 0.3257989912540088, + "grad_norm": 2017.7817211832491, + "learning_rate": 1.538503680011871e-05, + "loss": 328.0961, + "step": 16940 + }, + { + "epoch": 0.3259913165144893, + "grad_norm": 1908.0387238264814, + "learning_rate": 1.537989314822682e-05, + "loss": 326.2898, + "step": 16950 + }, + { + "epoch": 0.32618364177496983, + "grad_norm": 2089.0363119673184, + "learning_rate": 1.5374747492413587e-05, + "loss": 330.2057, + "step": 16960 + }, + { + "epoch": 0.32637596703545035, + "grad_norm": 2111.0971296183175, + "learning_rate": 1.536959983459568e-05, + "loss": 335.5717, + "step": 16970 + }, + { + "epoch": 0.3265682922959309, + "grad_norm": 1928.90596373864, + "learning_rate": 1.536445017669052e-05, + "loss": 324.4423, + "step": 16980 + }, + { + "epoch": 0.3267606175564114, + "grad_norm": 1950.6647464743678, + "learning_rate": 1.535929852061626e-05, + "loss": 320.3136, + "step": 16990 + }, + { + "epoch": 0.3269529428168919, + "grad_norm": 2002.1189101190598, + "learning_rate": 1.5354144868291817e-05, + "loss": 337.2886, + "step": 17000 + }, + { + "epoch": 0.32714526807737243, + "grad_norm": 2063.863592766914, + "learning_rate": 1.5348989221636835e-05, + "loss": 325.4392, + "step": 17010 + }, + { + "epoch": 0.32733759333785295, + "grad_norm": 1989.6880282565426, + "learning_rate": 1.5343831582571706e-05, + "loss": 344.3409, + "step": 17020 + }, + { + "epoch": 0.3275299185983335, + "grad_norm": 2126.617739771563, + "learning_rate": 1.5338671953017576e-05, + "loss": 323.6003, + "step": 17030 + }, + { + "epoch": 0.32772224385881404, + "grad_norm": 1980.7580303789032, + "learning_rate": 1.5333510334896308e-05, + "loss": 323.6241, + "step": 17040 + }, + { + "epoch": 0.32791456911929456, + "grad_norm": 7989.5692703841, + "learning_rate": 1.532834673013053e-05, + "loss": 338.0232, + "step": 17050 + }, + { + "epoch": 0.3281068943797751, + "grad_norm": 2105.4461893366692, + "learning_rate": 1.5323181140643598e-05, + "loss": 324.2593, + "step": 17060 + }, + { + "epoch": 0.3282992196402556, + "grad_norm": 2233.1294595468275, + "learning_rate": 1.5318013568359603e-05, + "loss": 325.8482, + "step": 17070 + }, + { + "epoch": 0.3284915449007361, + "grad_norm": 2053.89517412598, + "learning_rate": 1.531284401520338e-05, + "loss": 320.8966, + "step": 17080 + }, + { + "epoch": 0.32868387016121664, + "grad_norm": 1804.1815399037755, + "learning_rate": 1.530767248310051e-05, + "loss": 324.8647, + "step": 17090 + }, + { + "epoch": 0.32887619542169716, + "grad_norm": 1984.4429863739215, + "learning_rate": 1.53024989739773e-05, + "loss": 326.1241, + "step": 17100 + }, + { + "epoch": 0.3290685206821777, + "grad_norm": 2119.3650834321793, + "learning_rate": 1.5297323489760792e-05, + "loss": 325.1836, + "step": 17110 + }, + { + "epoch": 0.3292608459426582, + "grad_norm": 2011.8162230472658, + "learning_rate": 1.5292146032378778e-05, + "loss": 331.957, + "step": 17120 + }, + { + "epoch": 0.3294531712031388, + "grad_norm": 2208.5290778087833, + "learning_rate": 1.5286966603759767e-05, + "loss": 329.0348, + "step": 17130 + }, + { + "epoch": 0.3296454964636193, + "grad_norm": 1983.1830730633092, + "learning_rate": 1.5281785205833013e-05, + "loss": 324.2075, + "step": 17140 + }, + { + "epoch": 0.3298378217240998, + "grad_norm": 2177.5842019402803, + "learning_rate": 1.52766018405285e-05, + "loss": 321.3308, + "step": 17150 + }, + { + "epoch": 0.33003014698458033, + "grad_norm": 2017.432680715218, + "learning_rate": 1.5271416509776948e-05, + "loss": 321.9001, + "step": 17160 + }, + { + "epoch": 0.33022247224506085, + "grad_norm": 1924.177275402273, + "learning_rate": 1.5266229215509806e-05, + "loss": 323.9805, + "step": 17170 + }, + { + "epoch": 0.33041479750554137, + "grad_norm": 2325.5354430363386, + "learning_rate": 1.5261039959659257e-05, + "loss": 330.6658, + "step": 17180 + }, + { + "epoch": 0.3306071227660219, + "grad_norm": 1991.2666160434483, + "learning_rate": 1.5255848744158214e-05, + "loss": 313.3101, + "step": 17190 + }, + { + "epoch": 0.3307994480265024, + "grad_norm": 2004.8450498962638, + "learning_rate": 1.5250655570940317e-05, + "loss": 339.1153, + "step": 17200 + }, + { + "epoch": 0.3309917732869829, + "grad_norm": 2047.0739413470224, + "learning_rate": 1.5245460441939934e-05, + "loss": 330.8931, + "step": 17210 + }, + { + "epoch": 0.33118409854746345, + "grad_norm": 2156.6426925055903, + "learning_rate": 1.5240263359092167e-05, + "loss": 328.6984, + "step": 17220 + }, + { + "epoch": 0.331376423807944, + "grad_norm": 2027.7322002174321, + "learning_rate": 1.5235064324332846e-05, + "loss": 335.8807, + "step": 17230 + }, + { + "epoch": 0.33156874906842454, + "grad_norm": 1998.791675253118, + "learning_rate": 1.5229863339598528e-05, + "loss": 321.5385, + "step": 17240 + }, + { + "epoch": 0.33176107432890506, + "grad_norm": 1957.281613309926, + "learning_rate": 1.5224660406826486e-05, + "loss": 318.0045, + "step": 17250 + }, + { + "epoch": 0.3319533995893856, + "grad_norm": 2137.653876681757, + "learning_rate": 1.5219455527954732e-05, + "loss": 336.5705, + "step": 17260 + }, + { + "epoch": 0.3321457248498661, + "grad_norm": 1958.2701604944473, + "learning_rate": 1.5214248704921995e-05, + "loss": 323.9953, + "step": 17270 + }, + { + "epoch": 0.3323380501103466, + "grad_norm": 2307.58974962407, + "learning_rate": 1.5209039939667731e-05, + "loss": 325.2405, + "step": 17280 + }, + { + "epoch": 0.33253037537082714, + "grad_norm": 2035.1142977745817, + "learning_rate": 1.5203829234132118e-05, + "loss": 325.5495, + "step": 17290 + }, + { + "epoch": 0.33272270063130766, + "grad_norm": 1946.6222467425596, + "learning_rate": 1.5198616590256064e-05, + "loss": 330.5086, + "step": 17300 + }, + { + "epoch": 0.3329150258917882, + "grad_norm": 2005.549179065666, + "learning_rate": 1.5193402009981187e-05, + "loss": 314.0701, + "step": 17310 + }, + { + "epoch": 0.3331073511522687, + "grad_norm": 2482.5043269857088, + "learning_rate": 1.5188185495249832e-05, + "loss": 330.9168, + "step": 17320 + }, + { + "epoch": 0.33329967641274927, + "grad_norm": 1789.6741231107126, + "learning_rate": 1.5182967048005069e-05, + "loss": 326.3284, + "step": 17330 + }, + { + "epoch": 0.3334920016732298, + "grad_norm": 1999.3730211320772, + "learning_rate": 1.5177746670190674e-05, + "loss": 320.489, + "step": 17340 + }, + { + "epoch": 0.3336843269337103, + "grad_norm": 1835.1780236735635, + "learning_rate": 1.5172524363751162e-05, + "loss": 318.593, + "step": 17350 + }, + { + "epoch": 0.3338766521941908, + "grad_norm": 2175.5909436717416, + "learning_rate": 1.5167300130631748e-05, + "loss": 324.3285, + "step": 17360 + }, + { + "epoch": 0.33406897745467135, + "grad_norm": 2004.7006577466032, + "learning_rate": 1.516207397277838e-05, + "loss": 322.8537, + "step": 17370 + }, + { + "epoch": 0.33426130271515186, + "grad_norm": 1998.9144930847553, + "learning_rate": 1.5156845892137711e-05, + "loss": 311.381, + "step": 17380 + }, + { + "epoch": 0.3344536279756324, + "grad_norm": 2198.4087850985256, + "learning_rate": 1.5151615890657113e-05, + "loss": 328.1361, + "step": 17390 + }, + { + "epoch": 0.3346459532361129, + "grad_norm": 2002.820027979471, + "learning_rate": 1.5146383970284679e-05, + "loss": 324.4853, + "step": 17400 + }, + { + "epoch": 0.3348382784965934, + "grad_norm": 1805.7839113516109, + "learning_rate": 1.514115013296921e-05, + "loss": 323.149, + "step": 17410 + }, + { + "epoch": 0.33503060375707394, + "grad_norm": 2183.193993180048, + "learning_rate": 1.513591438066023e-05, + "loss": 328.4162, + "step": 17420 + }, + { + "epoch": 0.33522292901755446, + "grad_norm": 2182.4459137606827, + "learning_rate": 1.5130676715307962e-05, + "loss": 337.7823, + "step": 17430 + }, + { + "epoch": 0.33541525427803504, + "grad_norm": 2017.3594994786833, + "learning_rate": 1.5125437138863353e-05, + "loss": 336.6125, + "step": 17440 + }, + { + "epoch": 0.33560757953851555, + "grad_norm": 1980.9282396432104, + "learning_rate": 1.512019565327806e-05, + "loss": 329.9132, + "step": 17450 + }, + { + "epoch": 0.3357999047989961, + "grad_norm": 2265.1804667663673, + "learning_rate": 1.5114952260504448e-05, + "loss": 336.1914, + "step": 17460 + }, + { + "epoch": 0.3359922300594766, + "grad_norm": 1906.9452157750625, + "learning_rate": 1.5109706962495596e-05, + "loss": 327.1075, + "step": 17470 + }, + { + "epoch": 0.3361845553199571, + "grad_norm": 2493.3960080989086, + "learning_rate": 1.510445976120529e-05, + "loss": 320.5026, + "step": 17480 + }, + { + "epoch": 0.33637688058043763, + "grad_norm": 2204.2267700891744, + "learning_rate": 1.5099210658588029e-05, + "loss": 326.1179, + "step": 17490 + }, + { + "epoch": 0.33656920584091815, + "grad_norm": 2051.901544526736, + "learning_rate": 1.5093959656599008e-05, + "loss": 327.8618, + "step": 17500 + }, + { + "epoch": 0.33676153110139867, + "grad_norm": 1917.0879883303546, + "learning_rate": 1.5088706757194147e-05, + "loss": 316.0583, + "step": 17510 + }, + { + "epoch": 0.3369538563618792, + "grad_norm": 1878.027348792607, + "learning_rate": 1.5083451962330062e-05, + "loss": 319.3782, + "step": 17520 + }, + { + "epoch": 0.3371461816223597, + "grad_norm": 2319.7363964115143, + "learning_rate": 1.5078195273964081e-05, + "loss": 320.0514, + "step": 17530 + }, + { + "epoch": 0.3373385068828403, + "grad_norm": 2373.517712199625, + "learning_rate": 1.5072936694054222e-05, + "loss": 319.1762, + "step": 17540 + }, + { + "epoch": 0.3375308321433208, + "grad_norm": 2096.8316519822724, + "learning_rate": 1.5067676224559231e-05, + "loss": 324.5782, + "step": 17550 + }, + { + "epoch": 0.3377231574038013, + "grad_norm": 1921.033773791416, + "learning_rate": 1.506241386743854e-05, + "loss": 325.5894, + "step": 17560 + }, + { + "epoch": 0.33791548266428184, + "grad_norm": 2054.43677617778, + "learning_rate": 1.5057149624652297e-05, + "loss": 313.1354, + "step": 17570 + }, + { + "epoch": 0.33810780792476236, + "grad_norm": 1862.8960805804957, + "learning_rate": 1.5051883498161334e-05, + "loss": 320.0346, + "step": 17580 + }, + { + "epoch": 0.3383001331852429, + "grad_norm": 1971.0422353913254, + "learning_rate": 1.5046615489927206e-05, + "loss": 319.7538, + "step": 17590 + }, + { + "epoch": 0.3384924584457234, + "grad_norm": 2030.5111586380638, + "learning_rate": 1.5041345601912154e-05, + "loss": 320.2064, + "step": 17600 + }, + { + "epoch": 0.3386847837062039, + "grad_norm": 1959.3174767968349, + "learning_rate": 1.5036073836079128e-05, + "loss": 328.8023, + "step": 17610 + }, + { + "epoch": 0.33887710896668444, + "grad_norm": 1934.1969978271761, + "learning_rate": 1.5030800194391773e-05, + "loss": 314.1831, + "step": 17620 + }, + { + "epoch": 0.33906943422716496, + "grad_norm": 2169.611251447064, + "learning_rate": 1.5025524678814428e-05, + "loss": 326.154, + "step": 17630 + }, + { + "epoch": 0.33926175948764553, + "grad_norm": 1922.8586206121417, + "learning_rate": 1.5020247291312138e-05, + "loss": 325.2667, + "step": 17640 + }, + { + "epoch": 0.33945408474812605, + "grad_norm": 2085.36074181669, + "learning_rate": 1.5014968033850647e-05, + "loss": 338.1495, + "step": 17650 + }, + { + "epoch": 0.33964641000860657, + "grad_norm": 2373.3831354809463, + "learning_rate": 1.5009686908396388e-05, + "loss": 325.9303, + "step": 17660 + }, + { + "epoch": 0.3398387352690871, + "grad_norm": 1884.2010035743306, + "learning_rate": 1.5004403916916494e-05, + "loss": 325.0139, + "step": 17670 + }, + { + "epoch": 0.3400310605295676, + "grad_norm": 2169.9003744987435, + "learning_rate": 1.4999119061378791e-05, + "loss": 346.9868, + "step": 17680 + }, + { + "epoch": 0.3402233857900481, + "grad_norm": 2292.426596440604, + "learning_rate": 1.4993832343751802e-05, + "loss": 338.4296, + "step": 17690 + }, + { + "epoch": 0.34041571105052865, + "grad_norm": 2114.140782796333, + "learning_rate": 1.4988543766004737e-05, + "loss": 316.1823, + "step": 17700 + }, + { + "epoch": 0.34060803631100917, + "grad_norm": 1888.9179425005343, + "learning_rate": 1.498325333010751e-05, + "loss": 309.8166, + "step": 17710 + }, + { + "epoch": 0.3408003615714897, + "grad_norm": 1951.0823971475256, + "learning_rate": 1.497796103803072e-05, + "loss": 325.782, + "step": 17720 + }, + { + "epoch": 0.3409926868319702, + "grad_norm": 2088.9968188153207, + "learning_rate": 1.4972666891745655e-05, + "loss": 323.7715, + "step": 17730 + }, + { + "epoch": 0.3411850120924508, + "grad_norm": 2036.82448764238, + "learning_rate": 1.49673708932243e-05, + "loss": 310.4458, + "step": 17740 + }, + { + "epoch": 0.3413773373529313, + "grad_norm": 1970.2248684614185, + "learning_rate": 1.4962073044439328e-05, + "loss": 326.2854, + "step": 17750 + }, + { + "epoch": 0.3415696626134118, + "grad_norm": 1924.3678663716555, + "learning_rate": 1.4956773347364095e-05, + "loss": 327.319, + "step": 17760 + }, + { + "epoch": 0.34176198787389234, + "grad_norm": 1907.118918461094, + "learning_rate": 1.4951471803972657e-05, + "loss": 323.0438, + "step": 17770 + }, + { + "epoch": 0.34195431313437286, + "grad_norm": 1893.8868023094, + "learning_rate": 1.4946168416239746e-05, + "loss": 320.827, + "step": 17780 + }, + { + "epoch": 0.3421466383948534, + "grad_norm": 2047.7456871641295, + "learning_rate": 1.4940863186140788e-05, + "loss": 334.5147, + "step": 17790 + }, + { + "epoch": 0.3423389636553339, + "grad_norm": 1935.984066118262, + "learning_rate": 1.4935556115651898e-05, + "loss": 311.2683, + "step": 17800 + }, + { + "epoch": 0.3425312889158144, + "grad_norm": 2157.4059313849093, + "learning_rate": 1.4930247206749863e-05, + "loss": 316.3749, + "step": 17810 + }, + { + "epoch": 0.34272361417629493, + "grad_norm": 2154.6794877591947, + "learning_rate": 1.4924936461412172e-05, + "loss": 327.4048, + "step": 17820 + }, + { + "epoch": 0.34291593943677545, + "grad_norm": 2129.092134332189, + "learning_rate": 1.4919623881616989e-05, + "loss": 315.438, + "step": 17830 + }, + { + "epoch": 0.343108264697256, + "grad_norm": 1769.4656200328968, + "learning_rate": 1.4914309469343158e-05, + "loss": 308.637, + "step": 17840 + }, + { + "epoch": 0.34330058995773655, + "grad_norm": 1921.0105116747557, + "learning_rate": 1.4908993226570214e-05, + "loss": 335.8036, + "step": 17850 + }, + { + "epoch": 0.34349291521821707, + "grad_norm": 2489.4731367913478, + "learning_rate": 1.4903675155278365e-05, + "loss": 316.3473, + "step": 17860 + }, + { + "epoch": 0.3436852404786976, + "grad_norm": 1754.5021339822188, + "learning_rate": 1.4898355257448508e-05, + "loss": 326.5426, + "step": 17870 + }, + { + "epoch": 0.3438775657391781, + "grad_norm": 1747.207452600057, + "learning_rate": 1.4893033535062219e-05, + "loss": 315.3213, + "step": 17880 + }, + { + "epoch": 0.3440698909996586, + "grad_norm": 1905.074965064672, + "learning_rate": 1.4887709990101748e-05, + "loss": 321.391, + "step": 17890 + }, + { + "epoch": 0.34426221626013914, + "grad_norm": 2023.926765494719, + "learning_rate": 1.4882384624550028e-05, + "loss": 326.0344, + "step": 17900 + }, + { + "epoch": 0.34445454152061966, + "grad_norm": 2048.243543418818, + "learning_rate": 1.4877057440390672e-05, + "loss": 312.2275, + "step": 17910 + }, + { + "epoch": 0.3446468667811002, + "grad_norm": 2440.3189371072713, + "learning_rate": 1.4871728439607967e-05, + "loss": 322.354, + "step": 17920 + }, + { + "epoch": 0.3448391920415807, + "grad_norm": 2049.14963222684, + "learning_rate": 1.4866397624186877e-05, + "loss": 319.812, + "step": 17930 + }, + { + "epoch": 0.3450315173020612, + "grad_norm": 1764.1111453288904, + "learning_rate": 1.4861064996113042e-05, + "loss": 314.0869, + "step": 17940 + }, + { + "epoch": 0.3452238425625418, + "grad_norm": 2046.198191920155, + "learning_rate": 1.485573055737278e-05, + "loss": 326.3094, + "step": 17950 + }, + { + "epoch": 0.3454161678230223, + "grad_norm": 2385.1610373708513, + "learning_rate": 1.485039430995308e-05, + "loss": 320.4093, + "step": 17960 + }, + { + "epoch": 0.34560849308350283, + "grad_norm": 2310.3930441294906, + "learning_rate": 1.4845056255841608e-05, + "loss": 324.5415, + "step": 17970 + }, + { + "epoch": 0.34580081834398335, + "grad_norm": 2326.4672852586045, + "learning_rate": 1.48397163970267e-05, + "loss": 332.3383, + "step": 17980 + }, + { + "epoch": 0.34599314360446387, + "grad_norm": 2074.804843858858, + "learning_rate": 1.4834374735497362e-05, + "loss": 319.3209, + "step": 17990 + }, + { + "epoch": 0.3461854688649444, + "grad_norm": 2094.009201572699, + "learning_rate": 1.4829031273243277e-05, + "loss": 327.2733, + "step": 18000 + }, + { + "epoch": 0.3463777941254249, + "grad_norm": 1859.8269819007644, + "learning_rate": 1.4823686012254798e-05, + "loss": 314.172, + "step": 18010 + }, + { + "epoch": 0.34657011938590543, + "grad_norm": 1765.6420807287334, + "learning_rate": 1.4818338954522943e-05, + "loss": 311.1994, + "step": 18020 + }, + { + "epoch": 0.34676244464638595, + "grad_norm": 2018.194964187766, + "learning_rate": 1.4812990102039411e-05, + "loss": 320.7778, + "step": 18030 + }, + { + "epoch": 0.34695476990686647, + "grad_norm": 1875.9220214495574, + "learning_rate": 1.480763945679655e-05, + "loss": 314.9387, + "step": 18040 + }, + { + "epoch": 0.34714709516734704, + "grad_norm": 2084.257906742831, + "learning_rate": 1.4802287020787396e-05, + "loss": 316.8192, + "step": 18050 + }, + { + "epoch": 0.34733942042782756, + "grad_norm": 2012.1965208451954, + "learning_rate": 1.4796932796005634e-05, + "loss": 322.3936, + "step": 18060 + }, + { + "epoch": 0.3475317456883081, + "grad_norm": 1865.3618847142252, + "learning_rate": 1.4791576784445632e-05, + "loss": 313.6889, + "step": 18070 + }, + { + "epoch": 0.3477240709487886, + "grad_norm": 1972.297950123333, + "learning_rate": 1.4786218988102414e-05, + "loss": 316.1889, + "step": 18080 + }, + { + "epoch": 0.3479163962092691, + "grad_norm": 1931.513425895944, + "learning_rate": 1.4780859408971668e-05, + "loss": 309.7523, + "step": 18090 + }, + { + "epoch": 0.34810872146974964, + "grad_norm": 2143.1011270207578, + "learning_rate": 1.4775498049049754e-05, + "loss": 321.8954, + "step": 18100 + }, + { + "epoch": 0.34830104673023016, + "grad_norm": 1931.306303255075, + "learning_rate": 1.4770134910333684e-05, + "loss": 325.3682, + "step": 18110 + }, + { + "epoch": 0.3484933719907107, + "grad_norm": 1838.3266126720682, + "learning_rate": 1.4764769994821145e-05, + "loss": 327.3196, + "step": 18120 + }, + { + "epoch": 0.3486856972511912, + "grad_norm": 2205.6414409446684, + "learning_rate": 1.4759403304510472e-05, + "loss": 325.9669, + "step": 18130 + }, + { + "epoch": 0.3488780225116717, + "grad_norm": 2158.1387264897116, + "learning_rate": 1.475403484140067e-05, + "loss": 328.6126, + "step": 18140 + }, + { + "epoch": 0.3490703477721523, + "grad_norm": 1933.6185484531904, + "learning_rate": 1.4748664607491408e-05, + "loss": 318.6722, + "step": 18150 + }, + { + "epoch": 0.3492626730326328, + "grad_norm": 1798.4376151061012, + "learning_rate": 1.4743292604783008e-05, + "loss": 322.1551, + "step": 18160 + }, + { + "epoch": 0.34945499829311333, + "grad_norm": 1880.9531443315814, + "learning_rate": 1.4737918835276451e-05, + "loss": 317.39, + "step": 18170 + }, + { + "epoch": 0.34964732355359385, + "grad_norm": 2041.1887987519235, + "learning_rate": 1.4732543300973374e-05, + "loss": 317.1812, + "step": 18180 + }, + { + "epoch": 0.34983964881407437, + "grad_norm": 1905.7255850185568, + "learning_rate": 1.472716600387608e-05, + "loss": 317.1819, + "step": 18190 + }, + { + "epoch": 0.3500319740745549, + "grad_norm": 1853.7110640473709, + "learning_rate": 1.4721786945987519e-05, + "loss": 324.7371, + "step": 18200 + }, + { + "epoch": 0.3502242993350354, + "grad_norm": 2109.624381698033, + "learning_rate": 1.4716406129311307e-05, + "loss": 322.3356, + "step": 18210 + }, + { + "epoch": 0.3504166245955159, + "grad_norm": 2084.676895145801, + "learning_rate": 1.4711023555851702e-05, + "loss": 324.9865, + "step": 18220 + }, + { + "epoch": 0.35060894985599644, + "grad_norm": 2066.4189840773197, + "learning_rate": 1.470563922761363e-05, + "loss": 307.3408, + "step": 18230 + }, + { + "epoch": 0.35080127511647696, + "grad_norm": 1869.6423144397857, + "learning_rate": 1.470025314660266e-05, + "loss": 319.2566, + "step": 18240 + }, + { + "epoch": 0.35099360037695754, + "grad_norm": 2095.261229177306, + "learning_rate": 1.4694865314825024e-05, + "loss": 322.8215, + "step": 18250 + }, + { + "epoch": 0.35118592563743806, + "grad_norm": 2143.808312555047, + "learning_rate": 1.4689475734287596e-05, + "loss": 321.3808, + "step": 18260 + }, + { + "epoch": 0.3513782508979186, + "grad_norm": 2027.4728901627552, + "learning_rate": 1.4684084406997903e-05, + "loss": 322.6761, + "step": 18270 + }, + { + "epoch": 0.3515705761583991, + "grad_norm": 2067.7195730213252, + "learning_rate": 1.467869133496413e-05, + "loss": 318.2469, + "step": 18280 + }, + { + "epoch": 0.3517629014188796, + "grad_norm": 2030.820218952797, + "learning_rate": 1.4673296520195105e-05, + "loss": 314.2379, + "step": 18290 + }, + { + "epoch": 0.35195522667936013, + "grad_norm": 1892.0949388087715, + "learning_rate": 1.4667899964700309e-05, + "loss": 316.7988, + "step": 18300 + }, + { + "epoch": 0.35214755193984065, + "grad_norm": 1967.7305646277794, + "learning_rate": 1.466250167048987e-05, + "loss": 317.0118, + "step": 18310 + }, + { + "epoch": 0.35233987720032117, + "grad_norm": 1784.1315707035938, + "learning_rate": 1.4657101639574563e-05, + "loss": 324.7445, + "step": 18320 + }, + { + "epoch": 0.3525322024608017, + "grad_norm": 1809.1691018959052, + "learning_rate": 1.4651699873965808e-05, + "loss": 319.2028, + "step": 18330 + }, + { + "epoch": 0.3527245277212822, + "grad_norm": 1938.2220324138657, + "learning_rate": 1.4646296375675676e-05, + "loss": 326.4758, + "step": 18340 + }, + { + "epoch": 0.35291685298176273, + "grad_norm": 1970.6963131640166, + "learning_rate": 1.464089114671688e-05, + "loss": 328.4086, + "step": 18350 + }, + { + "epoch": 0.3531091782422433, + "grad_norm": 1993.257745861049, + "learning_rate": 1.4635484189102776e-05, + "loss": 316.6254, + "step": 18360 + }, + { + "epoch": 0.3533015035027238, + "grad_norm": 2419.8754322232635, + "learning_rate": 1.4630075504847373e-05, + "loss": 318.0136, + "step": 18370 + }, + { + "epoch": 0.35349382876320434, + "grad_norm": 1734.813658359721, + "learning_rate": 1.4624665095965311e-05, + "loss": 307.9769, + "step": 18380 + }, + { + "epoch": 0.35368615402368486, + "grad_norm": 2101.34467235573, + "learning_rate": 1.4619252964471881e-05, + "loss": 313.932, + "step": 18390 + }, + { + "epoch": 0.3538784792841654, + "grad_norm": 2127.022887820255, + "learning_rate": 1.461383911238301e-05, + "loss": 329.91, + "step": 18400 + }, + { + "epoch": 0.3540708045446459, + "grad_norm": 2532.5380742352836, + "learning_rate": 1.4608423541715273e-05, + "loss": 317.393, + "step": 18410 + }, + { + "epoch": 0.3542631298051264, + "grad_norm": 2032.124816421271, + "learning_rate": 1.4603006254485874e-05, + "loss": 318.3746, + "step": 18420 + }, + { + "epoch": 0.35445545506560694, + "grad_norm": 1923.7326716813545, + "learning_rate": 1.4597587252712666e-05, + "loss": 318.1964, + "step": 18430 + }, + { + "epoch": 0.35464778032608746, + "grad_norm": 1959.1941719642653, + "learning_rate": 1.4592166538414136e-05, + "loss": 308.2598, + "step": 18440 + }, + { + "epoch": 0.354840105586568, + "grad_norm": 2058.6767455449412, + "learning_rate": 1.4586744113609416e-05, + "loss": 319.3535, + "step": 18450 + }, + { + "epoch": 0.35503243084704855, + "grad_norm": 1855.3898169160702, + "learning_rate": 1.4581319980318266e-05, + "loss": 323.4018, + "step": 18460 + }, + { + "epoch": 0.35522475610752907, + "grad_norm": 2078.0727114579167, + "learning_rate": 1.4575894140561086e-05, + "loss": 306.4881, + "step": 18470 + }, + { + "epoch": 0.3554170813680096, + "grad_norm": 2669.718554169241, + "learning_rate": 1.4570466596358914e-05, + "loss": 328.2359, + "step": 18480 + }, + { + "epoch": 0.3556094066284901, + "grad_norm": 1828.7174987731157, + "learning_rate": 1.4565037349733415e-05, + "loss": 313.8513, + "step": 18490 + }, + { + "epoch": 0.35580173188897063, + "grad_norm": 2146.5401200268298, + "learning_rate": 1.45596064027069e-05, + "loss": 316.1958, + "step": 18500 + }, + { + "epoch": 0.35599405714945115, + "grad_norm": 2058.8989907633168, + "learning_rate": 1.4554173757302303e-05, + "loss": 308.0151, + "step": 18510 + }, + { + "epoch": 0.35618638240993167, + "grad_norm": 2076.327720638844, + "learning_rate": 1.4548739415543197e-05, + "loss": 310.2676, + "step": 18520 + }, + { + "epoch": 0.3563787076704122, + "grad_norm": 1808.30379456728, + "learning_rate": 1.454330337945378e-05, + "loss": 311.1484, + "step": 18530 + }, + { + "epoch": 0.3565710329308927, + "grad_norm": 1867.0293368215587, + "learning_rate": 1.4537865651058893e-05, + "loss": 310.1113, + "step": 18540 + }, + { + "epoch": 0.3567633581913732, + "grad_norm": 1916.9749913680243, + "learning_rate": 1.4532426232383998e-05, + "loss": 315.0335, + "step": 18550 + }, + { + "epoch": 0.3569556834518538, + "grad_norm": 1888.0848784079424, + "learning_rate": 1.4526985125455184e-05, + "loss": 318.2653, + "step": 18560 + }, + { + "epoch": 0.3571480087123343, + "grad_norm": 2015.6795246459603, + "learning_rate": 1.4521542332299177e-05, + "loss": 320.7423, + "step": 18570 + }, + { + "epoch": 0.35734033397281484, + "grad_norm": 1837.266065593453, + "learning_rate": 1.4516097854943325e-05, + "loss": 311.4094, + "step": 18580 + }, + { + "epoch": 0.35753265923329536, + "grad_norm": 1981.0795432117868, + "learning_rate": 1.4510651695415612e-05, + "loss": 304.5394, + "step": 18590 + }, + { + "epoch": 0.3577249844937759, + "grad_norm": 2795.787894580473, + "learning_rate": 1.4505203855744637e-05, + "loss": 326.8149, + "step": 18600 + }, + { + "epoch": 0.3579173097542564, + "grad_norm": 2086.2310172510206, + "learning_rate": 1.4499754337959628e-05, + "loss": 317.7597, + "step": 18610 + }, + { + "epoch": 0.3581096350147369, + "grad_norm": 1921.11740535252, + "learning_rate": 1.449430314409045e-05, + "loss": 310.1388, + "step": 18620 + }, + { + "epoch": 0.35830196027521743, + "grad_norm": 2069.939321975512, + "learning_rate": 1.4488850276167572e-05, + "loss": 315.3359, + "step": 18630 + }, + { + "epoch": 0.35849428553569795, + "grad_norm": 1969.5201478033393, + "learning_rate": 1.44833957362221e-05, + "loss": 320.9641, + "step": 18640 + }, + { + "epoch": 0.3586866107961785, + "grad_norm": 2108.6232107583496, + "learning_rate": 1.4477939526285767e-05, + "loss": 322.8485, + "step": 18650 + }, + { + "epoch": 0.35887893605665905, + "grad_norm": 2163.322998180366, + "learning_rate": 1.4472481648390914e-05, + "loss": 329.6273, + "step": 18660 + }, + { + "epoch": 0.35907126131713957, + "grad_norm": 2242.9781783935637, + "learning_rate": 1.4467022104570514e-05, + "loss": 319.2365, + "step": 18670 + }, + { + "epoch": 0.3592635865776201, + "grad_norm": 2119.987093377786, + "learning_rate": 1.4461560896858156e-05, + "loss": 317.7456, + "step": 18680 + }, + { + "epoch": 0.3594559118381006, + "grad_norm": 1910.747115614292, + "learning_rate": 1.4456098027288046e-05, + "loss": 312.7495, + "step": 18690 + }, + { + "epoch": 0.3596482370985811, + "grad_norm": 1992.3958919806419, + "learning_rate": 1.4450633497895017e-05, + "loss": 312.3302, + "step": 18700 + }, + { + "epoch": 0.35984056235906164, + "grad_norm": 1867.6226183600268, + "learning_rate": 1.4445167310714514e-05, + "loss": 322.3002, + "step": 18710 + }, + { + "epoch": 0.36003288761954216, + "grad_norm": 2016.9168798952683, + "learning_rate": 1.4439699467782602e-05, + "loss": 311.2525, + "step": 18720 + }, + { + "epoch": 0.3602252128800227, + "grad_norm": 1879.312367571334, + "learning_rate": 1.4434229971135965e-05, + "loss": 318.1439, + "step": 18730 + }, + { + "epoch": 0.3604175381405032, + "grad_norm": 2128.9900564083687, + "learning_rate": 1.4428758822811894e-05, + "loss": 314.8298, + "step": 18740 + }, + { + "epoch": 0.3606098634009837, + "grad_norm": 2124.0174372540587, + "learning_rate": 1.442328602484831e-05, + "loss": 322.5594, + "step": 18750 + }, + { + "epoch": 0.3608021886614643, + "grad_norm": 1844.7012521631195, + "learning_rate": 1.441781157928373e-05, + "loss": 309.2597, + "step": 18760 + }, + { + "epoch": 0.3609945139219448, + "grad_norm": 1891.4001240238235, + "learning_rate": 1.44123354881573e-05, + "loss": 305.0057, + "step": 18770 + }, + { + "epoch": 0.36118683918242533, + "grad_norm": 1984.21550273332, + "learning_rate": 1.4406857753508772e-05, + "loss": 323.6916, + "step": 18780 + }, + { + "epoch": 0.36137916444290585, + "grad_norm": 2282.0088909131864, + "learning_rate": 1.4401378377378512e-05, + "loss": 316.7283, + "step": 18790 + }, + { + "epoch": 0.3615714897033864, + "grad_norm": 2158.795102463002, + "learning_rate": 1.43958973618075e-05, + "loss": 306.5132, + "step": 18800 + }, + { + "epoch": 0.3617638149638669, + "grad_norm": 1711.2221485116238, + "learning_rate": 1.4390414708837322e-05, + "loss": 308.3174, + "step": 18810 + }, + { + "epoch": 0.3619561402243474, + "grad_norm": 5177.026732398226, + "learning_rate": 1.4384930420510173e-05, + "loss": 309.7688, + "step": 18820 + }, + { + "epoch": 0.36214846548482793, + "grad_norm": 2371.9891617859353, + "learning_rate": 1.4379444498868864e-05, + "loss": 309.2038, + "step": 18830 + }, + { + "epoch": 0.36234079074530845, + "grad_norm": 1973.3877647265913, + "learning_rate": 1.4373956945956807e-05, + "loss": 328.1908, + "step": 18840 + }, + { + "epoch": 0.36253311600578897, + "grad_norm": 1985.5771462030311, + "learning_rate": 1.4368467763818026e-05, + "loss": 307.2351, + "step": 18850 + }, + { + "epoch": 0.3627254412662695, + "grad_norm": 1963.4280274222835, + "learning_rate": 1.436297695449715e-05, + "loss": 320.2213, + "step": 18860 + }, + { + "epoch": 0.36291776652675006, + "grad_norm": 1826.2839625130428, + "learning_rate": 1.4357484520039412e-05, + "loss": 310.4313, + "step": 18870 + }, + { + "epoch": 0.3631100917872306, + "grad_norm": 2162.835794138138, + "learning_rate": 1.4351990462490662e-05, + "loss": 312.1796, + "step": 18880 + }, + { + "epoch": 0.3633024170477111, + "grad_norm": 1685.3395633462474, + "learning_rate": 1.434649478389734e-05, + "loss": 306.479, + "step": 18890 + }, + { + "epoch": 0.3634947423081916, + "grad_norm": 1776.8404205917066, + "learning_rate": 1.4340997486306491e-05, + "loss": 325.0773, + "step": 18900 + }, + { + "epoch": 0.36368706756867214, + "grad_norm": 1697.9502374028443, + "learning_rate": 1.4335498571765777e-05, + "loss": 319.4545, + "step": 18910 + }, + { + "epoch": 0.36387939282915266, + "grad_norm": 2193.8686567811064, + "learning_rate": 1.4329998042323447e-05, + "loss": 303.4668, + "step": 18920 + }, + { + "epoch": 0.3640717180896332, + "grad_norm": 2084.9775769730727, + "learning_rate": 1.4324495900028358e-05, + "loss": 319.325, + "step": 18930 + }, + { + "epoch": 0.3642640433501137, + "grad_norm": 2012.3829496433511, + "learning_rate": 1.4318992146929967e-05, + "loss": 311.6177, + "step": 18940 + }, + { + "epoch": 0.3644563686105942, + "grad_norm": 1820.7741656741277, + "learning_rate": 1.4313486785078335e-05, + "loss": 310.1186, + "step": 18950 + }, + { + "epoch": 0.36464869387107474, + "grad_norm": 1762.4376145747967, + "learning_rate": 1.4307979816524111e-05, + "loss": 315.7654, + "step": 18960 + }, + { + "epoch": 0.3648410191315553, + "grad_norm": 2166.390370436035, + "learning_rate": 1.4302471243318554e-05, + "loss": 318.3813, + "step": 18970 + }, + { + "epoch": 0.36503334439203583, + "grad_norm": 1983.5687903411094, + "learning_rate": 1.4296961067513519e-05, + "loss": 312.6711, + "step": 18980 + }, + { + "epoch": 0.36522566965251635, + "grad_norm": 1926.7835146225914, + "learning_rate": 1.4291449291161452e-05, + "loss": 310.5246, + "step": 18990 + }, + { + "epoch": 0.36541799491299687, + "grad_norm": 1927.507952019359, + "learning_rate": 1.4285935916315401e-05, + "loss": 310.5273, + "step": 19000 + }, + { + "epoch": 0.3656103201734774, + "grad_norm": 2016.7856479857473, + "learning_rate": 1.4280420945029004e-05, + "loss": 321.7089, + "step": 19010 + }, + { + "epoch": 0.3658026454339579, + "grad_norm": 1786.1187781237063, + "learning_rate": 1.4274904379356498e-05, + "loss": 314.2794, + "step": 19020 + }, + { + "epoch": 0.3659949706944384, + "grad_norm": 2282.8631987246968, + "learning_rate": 1.4269386221352714e-05, + "loss": 314.0156, + "step": 19030 + }, + { + "epoch": 0.36618729595491895, + "grad_norm": 1928.6441685431853, + "learning_rate": 1.4263866473073076e-05, + "loss": 310.3671, + "step": 19040 + }, + { + "epoch": 0.36637962121539946, + "grad_norm": 1907.3715722787026, + "learning_rate": 1.4258345136573595e-05, + "loss": 303.5411, + "step": 19050 + }, + { + "epoch": 0.36657194647588, + "grad_norm": 2103.9555527095317, + "learning_rate": 1.4252822213910878e-05, + "loss": 313.5066, + "step": 19060 + }, + { + "epoch": 0.36676427173636056, + "grad_norm": 2010.3933330536374, + "learning_rate": 1.4247297707142126e-05, + "loss": 310.4386, + "step": 19070 + }, + { + "epoch": 0.3669565969968411, + "grad_norm": 2063.4211860022233, + "learning_rate": 1.4241771618325123e-05, + "loss": 309.1785, + "step": 19080 + }, + { + "epoch": 0.3671489222573216, + "grad_norm": 1956.2391677728288, + "learning_rate": 1.4236243949518249e-05, + "loss": 320.1582, + "step": 19090 + }, + { + "epoch": 0.3673412475178021, + "grad_norm": 1950.6993736481784, + "learning_rate": 1.4230714702780466e-05, + "loss": 310.1097, + "step": 19100 + }, + { + "epoch": 0.36753357277828264, + "grad_norm": 1904.7005026276433, + "learning_rate": 1.422518388017133e-05, + "loss": 317.9098, + "step": 19110 + }, + { + "epoch": 0.36772589803876315, + "grad_norm": 1915.5051774378703, + "learning_rate": 1.4219651483750978e-05, + "loss": 308.9524, + "step": 19120 + }, + { + "epoch": 0.3679182232992437, + "grad_norm": 1809.2140718054884, + "learning_rate": 1.4214117515580139e-05, + "loss": 313.6805, + "step": 19130 + }, + { + "epoch": 0.3681105485597242, + "grad_norm": 2092.510817558125, + "learning_rate": 1.4208581977720124e-05, + "loss": 317.2792, + "step": 19140 + }, + { + "epoch": 0.3683028738202047, + "grad_norm": 1933.8141245130055, + "learning_rate": 1.420304487223283e-05, + "loss": 305.4637, + "step": 19150 + }, + { + "epoch": 0.36849519908068523, + "grad_norm": 1952.8186864404734, + "learning_rate": 1.4197506201180737e-05, + "loss": 313.2031, + "step": 19160 + }, + { + "epoch": 0.3686875243411658, + "grad_norm": 2378.1138098497067, + "learning_rate": 1.4191965966626908e-05, + "loss": 301.6322, + "step": 19170 + }, + { + "epoch": 0.3688798496016463, + "grad_norm": 1934.6936088973725, + "learning_rate": 1.418642417063499e-05, + "loss": 303.935, + "step": 19180 + }, + { + "epoch": 0.36907217486212684, + "grad_norm": 1952.5898928401564, + "learning_rate": 1.4180880815269207e-05, + "loss": 301.3802, + "step": 19190 + }, + { + "epoch": 0.36926450012260736, + "grad_norm": 2078.106652840737, + "learning_rate": 1.4175335902594372e-05, + "loss": 307.9113, + "step": 19200 + }, + { + "epoch": 0.3694568253830879, + "grad_norm": 1852.4005499181155, + "learning_rate": 1.416978943467587e-05, + "loss": 314.8365, + "step": 19210 + }, + { + "epoch": 0.3696491506435684, + "grad_norm": 2110.3624721328297, + "learning_rate": 1.4164241413579669e-05, + "loss": 315.4488, + "step": 19220 + }, + { + "epoch": 0.3698414759040489, + "grad_norm": 2166.5131944429427, + "learning_rate": 1.4158691841372318e-05, + "loss": 316.4969, + "step": 19230 + }, + { + "epoch": 0.37003380116452944, + "grad_norm": 1998.1800692681916, + "learning_rate": 1.4153140720120936e-05, + "loss": 307.1049, + "step": 19240 + }, + { + "epoch": 0.37022612642500996, + "grad_norm": 2095.3634420311373, + "learning_rate": 1.4147588051893233e-05, + "loss": 307.5317, + "step": 19250 + }, + { + "epoch": 0.3704184516854905, + "grad_norm": 1965.7569767201355, + "learning_rate": 1.4142033838757476e-05, + "loss": 308.2359, + "step": 19260 + }, + { + "epoch": 0.370610776945971, + "grad_norm": 1809.7745368880326, + "learning_rate": 1.4136478082782525e-05, + "loss": 308.7504, + "step": 19270 + }, + { + "epoch": 0.3708031022064516, + "grad_norm": 1730.1898504855028, + "learning_rate": 1.4130920786037798e-05, + "loss": 309.5616, + "step": 19280 + }, + { + "epoch": 0.3709954274669321, + "grad_norm": 1896.750624329191, + "learning_rate": 1.412536195059331e-05, + "loss": 315.0295, + "step": 19290 + }, + { + "epoch": 0.3711877527274126, + "grad_norm": 1807.6157393935891, + "learning_rate": 1.4119801578519625e-05, + "loss": 316.3897, + "step": 19300 + }, + { + "epoch": 0.37138007798789313, + "grad_norm": 1857.1932913980472, + "learning_rate": 1.4114239671887892e-05, + "loss": 318.6112, + "step": 19310 + }, + { + "epoch": 0.37157240324837365, + "grad_norm": 1868.5201341392255, + "learning_rate": 1.4108676232769831e-05, + "loss": 311.7844, + "step": 19320 + }, + { + "epoch": 0.37176472850885417, + "grad_norm": 1856.0649615078016, + "learning_rate": 1.410311126323773e-05, + "loss": 309.1805, + "step": 19330 + }, + { + "epoch": 0.3719570537693347, + "grad_norm": 1996.4311550702541, + "learning_rate": 1.409754476536445e-05, + "loss": 303.6167, + "step": 19340 + }, + { + "epoch": 0.3721493790298152, + "grad_norm": 2106.725941757717, + "learning_rate": 1.4091976741223414e-05, + "loss": 304.3662, + "step": 19350 + }, + { + "epoch": 0.3723417042902957, + "grad_norm": 3005.4569479533657, + "learning_rate": 1.408640719288863e-05, + "loss": 325.7058, + "step": 19360 + }, + { + "epoch": 0.37253402955077625, + "grad_norm": 1718.3012210985255, + "learning_rate": 1.408083612243465e-05, + "loss": 316.0893, + "step": 19370 + }, + { + "epoch": 0.3727263548112568, + "grad_norm": 1824.6290617258912, + "learning_rate": 1.4075263531936614e-05, + "loss": 313.317, + "step": 19380 + }, + { + "epoch": 0.37291868007173734, + "grad_norm": 1905.9594764430149, + "learning_rate": 1.4069689423470219e-05, + "loss": 304.4165, + "step": 19390 + }, + { + "epoch": 0.37311100533221786, + "grad_norm": 1924.1121000284206, + "learning_rate": 1.4064113799111725e-05, + "loss": 312.3041, + "step": 19400 + }, + { + "epoch": 0.3733033305926984, + "grad_norm": 1846.1835277819953, + "learning_rate": 1.405853666093796e-05, + "loss": 307.0441, + "step": 19410 + }, + { + "epoch": 0.3734956558531789, + "grad_norm": 1973.9086806717667, + "learning_rate": 1.405295801102632e-05, + "loss": 309.8415, + "step": 19420 + }, + { + "epoch": 0.3736879811136594, + "grad_norm": 1915.473961806632, + "learning_rate": 1.4047377851454758e-05, + "loss": 317.7333, + "step": 19430 + }, + { + "epoch": 0.37388030637413994, + "grad_norm": 3382.5123701927755, + "learning_rate": 1.4041796184301788e-05, + "loss": 303.0723, + "step": 19440 + }, + { + "epoch": 0.37407263163462046, + "grad_norm": 1971.8609093903099, + "learning_rate": 1.4036213011646496e-05, + "loss": 316.7415, + "step": 19450 + }, + { + "epoch": 0.374264956895101, + "grad_norm": 2101.1370131171916, + "learning_rate": 1.4030628335568515e-05, + "loss": 317.4839, + "step": 19460 + }, + { + "epoch": 0.3744572821555815, + "grad_norm": 1850.3315744382107, + "learning_rate": 1.4025042158148048e-05, + "loss": 307.7922, + "step": 19470 + }, + { + "epoch": 0.37464960741606207, + "grad_norm": 2139.3139482021966, + "learning_rate": 1.4019454481465853e-05, + "loss": 311.2271, + "step": 19480 + }, + { + "epoch": 0.3748419326765426, + "grad_norm": 2051.129950819537, + "learning_rate": 1.4013865307603248e-05, + "loss": 304.0343, + "step": 19490 + }, + { + "epoch": 0.3750342579370231, + "grad_norm": 1986.422851101012, + "learning_rate": 1.4008274638642103e-05, + "loss": 316.6552, + "step": 19500 + }, + { + "epoch": 0.3752265831975036, + "grad_norm": 2141.2965455420413, + "learning_rate": 1.4002682476664857e-05, + "loss": 307.9586, + "step": 19510 + }, + { + "epoch": 0.37541890845798415, + "grad_norm": 1783.2098075083138, + "learning_rate": 1.3997088823754494e-05, + "loss": 301.2444, + "step": 19520 + }, + { + "epoch": 0.37561123371846467, + "grad_norm": 2356.57486767964, + "learning_rate": 1.399149368199456e-05, + "loss": 314.9871, + "step": 19530 + }, + { + "epoch": 0.3758035589789452, + "grad_norm": 1921.3499142703806, + "learning_rate": 1.398589705346915e-05, + "loss": 306.1402, + "step": 19540 + }, + { + "epoch": 0.3759958842394257, + "grad_norm": 1994.8936359819968, + "learning_rate": 1.3980298940262918e-05, + "loss": 308.3559, + "step": 19550 + }, + { + "epoch": 0.3761882094999062, + "grad_norm": 1916.4051343548426, + "learning_rate": 1.3974699344461065e-05, + "loss": 306.8087, + "step": 19560 + }, + { + "epoch": 0.37638053476038674, + "grad_norm": 1862.185972088084, + "learning_rate": 1.396909826814935e-05, + "loss": 314.2664, + "step": 19570 + }, + { + "epoch": 0.3765728600208673, + "grad_norm": 2152.1525009731063, + "learning_rate": 1.3963495713414085e-05, + "loss": 308.783, + "step": 19580 + }, + { + "epoch": 0.37676518528134784, + "grad_norm": 1719.5065081571609, + "learning_rate": 1.3957891682342127e-05, + "loss": 310.7855, + "step": 19590 + }, + { + "epoch": 0.37695751054182836, + "grad_norm": 2097.0431765370918, + "learning_rate": 1.3952286177020879e-05, + "loss": 309.2558, + "step": 19600 + }, + { + "epoch": 0.3771498358023089, + "grad_norm": 1944.0466706544819, + "learning_rate": 1.3946679199538308e-05, + "loss": 302.2271, + "step": 19610 + }, + { + "epoch": 0.3773421610627894, + "grad_norm": 1918.8782355396409, + "learning_rate": 1.3941070751982917e-05, + "loss": 302.6986, + "step": 19620 + }, + { + "epoch": 0.3775344863232699, + "grad_norm": 1861.0998667746328, + "learning_rate": 1.3935460836443758e-05, + "loss": 308.8521, + "step": 19630 + }, + { + "epoch": 0.37772681158375043, + "grad_norm": 1865.3471595427097, + "learning_rate": 1.3929849455010433e-05, + "loss": 314.5706, + "step": 19640 + }, + { + "epoch": 0.37791913684423095, + "grad_norm": 2021.8855646959091, + "learning_rate": 1.3924236609773094e-05, + "loss": 309.5404, + "step": 19650 + }, + { + "epoch": 0.37811146210471147, + "grad_norm": 2017.1147507819485, + "learning_rate": 1.3918622302822425e-05, + "loss": 306.997, + "step": 19660 + }, + { + "epoch": 0.378303787365192, + "grad_norm": 2030.0589372727097, + "learning_rate": 1.391300653624967e-05, + "loss": 316.3002, + "step": 19670 + }, + { + "epoch": 0.37849611262567256, + "grad_norm": 1872.5902010347115, + "learning_rate": 1.39073893121466e-05, + "loss": 308.783, + "step": 19680 + }, + { + "epoch": 0.3786884378861531, + "grad_norm": 1937.8186631922715, + "learning_rate": 1.3901770632605546e-05, + "loss": 306.2152, + "step": 19690 + }, + { + "epoch": 0.3788807631466336, + "grad_norm": 1725.4803130162502, + "learning_rate": 1.3896150499719372e-05, + "loss": 301.3231, + "step": 19700 + }, + { + "epoch": 0.3790730884071141, + "grad_norm": 1912.7927087868466, + "learning_rate": 1.3890528915581482e-05, + "loss": 308.968, + "step": 19710 + }, + { + "epoch": 0.37926541366759464, + "grad_norm": 1833.9923013133835, + "learning_rate": 1.3884905882285829e-05, + "loss": 308.7003, + "step": 19720 + }, + { + "epoch": 0.37945773892807516, + "grad_norm": 1628.4517013746747, + "learning_rate": 1.3879281401926894e-05, + "loss": 302.7566, + "step": 19730 + }, + { + "epoch": 0.3796500641885557, + "grad_norm": 1851.9579537275933, + "learning_rate": 1.3873655476599707e-05, + "loss": 302.5998, + "step": 19740 + }, + { + "epoch": 0.3798423894490362, + "grad_norm": 1871.898105679082, + "learning_rate": 1.3868028108399829e-05, + "loss": 304.5054, + "step": 19750 + }, + { + "epoch": 0.3800347147095167, + "grad_norm": 1777.4171810886094, + "learning_rate": 1.3862399299423364e-05, + "loss": 306.4808, + "step": 19760 + }, + { + "epoch": 0.38022703996999724, + "grad_norm": 1784.2512467844372, + "learning_rate": 1.3856769051766947e-05, + "loss": 307.1631, + "step": 19770 + }, + { + "epoch": 0.38041936523047776, + "grad_norm": 1792.7844449582904, + "learning_rate": 1.3851137367527757e-05, + "loss": 295.8958, + "step": 19780 + }, + { + "epoch": 0.38061169049095833, + "grad_norm": 1943.2985094579828, + "learning_rate": 1.3845504248803501e-05, + "loss": 309.6948, + "step": 19790 + }, + { + "epoch": 0.38080401575143885, + "grad_norm": 1795.7383849342336, + "learning_rate": 1.383986969769242e-05, + "loss": 318.3569, + "step": 19800 + }, + { + "epoch": 0.38099634101191937, + "grad_norm": 1790.1327686269296, + "learning_rate": 1.38342337162933e-05, + "loss": 311.1551, + "step": 19810 + }, + { + "epoch": 0.3811886662723999, + "grad_norm": 1814.746142774624, + "learning_rate": 1.3828596306705442e-05, + "loss": 303.1561, + "step": 19820 + }, + { + "epoch": 0.3813809915328804, + "grad_norm": 2045.3185718760863, + "learning_rate": 1.3822957471028693e-05, + "loss": 296.6726, + "step": 19830 + }, + { + "epoch": 0.38157331679336093, + "grad_norm": 1870.208715880873, + "learning_rate": 1.3817317211363422e-05, + "loss": 297.9434, + "step": 19840 + }, + { + "epoch": 0.38176564205384145, + "grad_norm": 2449.6777574980124, + "learning_rate": 1.3811675529810535e-05, + "loss": 303.1532, + "step": 19850 + }, + { + "epoch": 0.38195796731432197, + "grad_norm": 2092.790781881497, + "learning_rate": 1.3806032428471463e-05, + "loss": 306.2771, + "step": 19860 + }, + { + "epoch": 0.3821502925748025, + "grad_norm": 2165.8330746346837, + "learning_rate": 1.3800387909448171e-05, + "loss": 308.9358, + "step": 19870 + }, + { + "epoch": 0.382342617835283, + "grad_norm": 2036.3089414824317, + "learning_rate": 1.3794741974843154e-05, + "loss": 312.2573, + "step": 19880 + }, + { + "epoch": 0.3825349430957636, + "grad_norm": 1758.9101366434552, + "learning_rate": 1.3789094626759419e-05, + "loss": 301.9613, + "step": 19890 + }, + { + "epoch": 0.3827272683562441, + "grad_norm": 1721.577430651585, + "learning_rate": 1.3783445867300515e-05, + "loss": 311.2537, + "step": 19900 + }, + { + "epoch": 0.3829195936167246, + "grad_norm": 2152.709150467604, + "learning_rate": 1.3777795698570511e-05, + "loss": 312.099, + "step": 19910 + }, + { + "epoch": 0.38311191887720514, + "grad_norm": 1718.4757220843696, + "learning_rate": 1.3772144122674e-05, + "loss": 301.4711, + "step": 19920 + }, + { + "epoch": 0.38330424413768566, + "grad_norm": 1753.7455137870545, + "learning_rate": 1.3766491141716103e-05, + "loss": 303.6883, + "step": 19930 + }, + { + "epoch": 0.3834965693981662, + "grad_norm": 1921.620070446975, + "learning_rate": 1.3760836757802462e-05, + "loss": 308.2743, + "step": 19940 + }, + { + "epoch": 0.3836888946586467, + "grad_norm": 1820.7438708008497, + "learning_rate": 1.3755180973039241e-05, + "loss": 308.7924, + "step": 19950 + }, + { + "epoch": 0.3838812199191272, + "grad_norm": 2025.6634304031434, + "learning_rate": 1.3749523789533128e-05, + "loss": 304.7339, + "step": 19960 + }, + { + "epoch": 0.38407354517960773, + "grad_norm": 2104.3083708910913, + "learning_rate": 1.374386520939133e-05, + "loss": 299.9552, + "step": 19970 + }, + { + "epoch": 0.38426587044008825, + "grad_norm": 2070.0041175750175, + "learning_rate": 1.3738205234721568e-05, + "loss": 319.6469, + "step": 19980 + }, + { + "epoch": 0.3844581957005688, + "grad_norm": 1869.4763599286875, + "learning_rate": 1.3732543867632098e-05, + "loss": 306.2007, + "step": 19990 + }, + { + "epoch": 0.38465052096104935, + "grad_norm": 1892.170127635212, + "learning_rate": 1.3726881110231682e-05, + "loss": 302.9237, + "step": 20000 + }, + { + "epoch": 0.38484284622152987, + "grad_norm": 1968.1574306751745, + "learning_rate": 1.3721216964629605e-05, + "loss": 308.6295, + "step": 20010 + }, + { + "epoch": 0.3850351714820104, + "grad_norm": 1816.0456579383317, + "learning_rate": 1.3715551432935664e-05, + "loss": 317.6802, + "step": 20020 + }, + { + "epoch": 0.3852274967424909, + "grad_norm": 1864.5578866243363, + "learning_rate": 1.3709884517260178e-05, + "loss": 318.0706, + "step": 20030 + }, + { + "epoch": 0.3854198220029714, + "grad_norm": 1839.697498226589, + "learning_rate": 1.3704216219713984e-05, + "loss": 300.5747, + "step": 20040 + }, + { + "epoch": 0.38561214726345194, + "grad_norm": 2010.0250361594676, + "learning_rate": 1.3698546542408424e-05, + "loss": 307.6041, + "step": 20050 + }, + { + "epoch": 0.38580447252393246, + "grad_norm": 1887.5334117631078, + "learning_rate": 1.369287548745536e-05, + "loss": 304.0994, + "step": 20060 + }, + { + "epoch": 0.385996797784413, + "grad_norm": 3058.863678189641, + "learning_rate": 1.3687203056967165e-05, + "loss": 309.2307, + "step": 20070 + }, + { + "epoch": 0.3861891230448935, + "grad_norm": 1924.364075007915, + "learning_rate": 1.368152925305673e-05, + "loss": 312.305, + "step": 20080 + }, + { + "epoch": 0.3863814483053741, + "grad_norm": 1694.1889732630975, + "learning_rate": 1.3675854077837449e-05, + "loss": 307.0336, + "step": 20090 + }, + { + "epoch": 0.3865737735658546, + "grad_norm": 1735.3038576083945, + "learning_rate": 1.3670177533423234e-05, + "loss": 296.0693, + "step": 20100 + }, + { + "epoch": 0.3867660988263351, + "grad_norm": 1800.3112961267825, + "learning_rate": 1.3664499621928502e-05, + "loss": 302.2557, + "step": 20110 + }, + { + "epoch": 0.38695842408681563, + "grad_norm": 1886.0789788519592, + "learning_rate": 1.3658820345468183e-05, + "loss": 309.6297, + "step": 20120 + }, + { + "epoch": 0.38715074934729615, + "grad_norm": 1860.5563989674151, + "learning_rate": 1.365313970615771e-05, + "loss": 299.3849, + "step": 20130 + }, + { + "epoch": 0.38734307460777667, + "grad_norm": 1933.7444355192472, + "learning_rate": 1.3647457706113031e-05, + "loss": 310.0622, + "step": 20140 + }, + { + "epoch": 0.3875353998682572, + "grad_norm": 1923.294080177032, + "learning_rate": 1.36417743474506e-05, + "loss": 311.0535, + "step": 20150 + }, + { + "epoch": 0.3877277251287377, + "grad_norm": 1742.9458139277679, + "learning_rate": 1.3636089632287369e-05, + "loss": 315.1836, + "step": 20160 + }, + { + "epoch": 0.38792005038921823, + "grad_norm": 1699.483128600211, + "learning_rate": 1.36304035627408e-05, + "loss": 310.0409, + "step": 20170 + }, + { + "epoch": 0.38811237564969875, + "grad_norm": 1838.9495473511888, + "learning_rate": 1.3624716140928861e-05, + "loss": 316.7457, + "step": 20180 + }, + { + "epoch": 0.38830470091017927, + "grad_norm": 1872.3533495535078, + "learning_rate": 1.3619027368970025e-05, + "loss": 309.5434, + "step": 20190 + }, + { + "epoch": 0.38849702617065984, + "grad_norm": 1910.4574769206988, + "learning_rate": 1.3613337248983265e-05, + "loss": 302.2189, + "step": 20200 + }, + { + "epoch": 0.38868935143114036, + "grad_norm": 1999.723835217515, + "learning_rate": 1.3607645783088055e-05, + "loss": 308.666, + "step": 20210 + }, + { + "epoch": 0.3888816766916209, + "grad_norm": 1820.57260452842, + "learning_rate": 1.3601952973404376e-05, + "loss": 314.4062, + "step": 20220 + }, + { + "epoch": 0.3890740019521014, + "grad_norm": 1830.3909934952903, + "learning_rate": 1.35962588220527e-05, + "loss": 306.443, + "step": 20230 + }, + { + "epoch": 0.3892663272125819, + "grad_norm": 2191.5605644904786, + "learning_rate": 1.3590563331154008e-05, + "loss": 316.5143, + "step": 20240 + }, + { + "epoch": 0.38945865247306244, + "grad_norm": 1963.1167099716085, + "learning_rate": 1.3584866502829774e-05, + "loss": 309.769, + "step": 20250 + }, + { + "epoch": 0.38965097773354296, + "grad_norm": 1885.9349248379274, + "learning_rate": 1.3579168339201975e-05, + "loss": 307.6866, + "step": 20260 + }, + { + "epoch": 0.3898433029940235, + "grad_norm": 1982.2356328691396, + "learning_rate": 1.3573468842393077e-05, + "loss": 302.0568, + "step": 20270 + }, + { + "epoch": 0.390035628254504, + "grad_norm": 1787.9098675560642, + "learning_rate": 1.356776801452606e-05, + "loss": 305.8773, + "step": 20280 + }, + { + "epoch": 0.3902279535149845, + "grad_norm": 1900.9447895241024, + "learning_rate": 1.3562065857724378e-05, + "loss": 306.7134, + "step": 20290 + }, + { + "epoch": 0.3904202787754651, + "grad_norm": 2310.223757347156, + "learning_rate": 1.3556362374111993e-05, + "loss": 304.0658, + "step": 20300 + }, + { + "epoch": 0.3906126040359456, + "grad_norm": 2283.91585825219, + "learning_rate": 1.3550657565813362e-05, + "loss": 311.3708, + "step": 20310 + }, + { + "epoch": 0.39080492929642613, + "grad_norm": 1843.021944476782, + "learning_rate": 1.3544951434953423e-05, + "loss": 299.4825, + "step": 20320 + }, + { + "epoch": 0.39099725455690665, + "grad_norm": 1968.392839143164, + "learning_rate": 1.3539243983657627e-05, + "loss": 304.3815, + "step": 20330 + }, + { + "epoch": 0.39118957981738717, + "grad_norm": 1935.425234679553, + "learning_rate": 1.3533535214051896e-05, + "loss": 304.0946, + "step": 20340 + }, + { + "epoch": 0.3913819050778677, + "grad_norm": 2016.4306690262665, + "learning_rate": 1.3527825128262656e-05, + "loss": 305.2499, + "step": 20350 + }, + { + "epoch": 0.3915742303383482, + "grad_norm": 1738.8058022016662, + "learning_rate": 1.3522113728416821e-05, + "loss": 311.6364, + "step": 20360 + }, + { + "epoch": 0.3917665555988287, + "grad_norm": 1792.621179012536, + "learning_rate": 1.3516401016641793e-05, + "loss": 299.9791, + "step": 20370 + }, + { + "epoch": 0.39195888085930924, + "grad_norm": 1919.9712146204274, + "learning_rate": 1.351068699506546e-05, + "loss": 305.6713, + "step": 20380 + }, + { + "epoch": 0.39215120611978976, + "grad_norm": 1753.0533645800076, + "learning_rate": 1.3504971665816202e-05, + "loss": 301.0382, + "step": 20390 + }, + { + "epoch": 0.39234353138027034, + "grad_norm": 1790.4699338073806, + "learning_rate": 1.3499255031022887e-05, + "loss": 294.2801, + "step": 20400 + }, + { + "epoch": 0.39253585664075086, + "grad_norm": 1716.6242004302876, + "learning_rate": 1.3493537092814863e-05, + "loss": 307.2205, + "step": 20410 + }, + { + "epoch": 0.3927281819012314, + "grad_norm": 2143.055767097122, + "learning_rate": 1.348781785332197e-05, + "loss": 303.4742, + "step": 20420 + }, + { + "epoch": 0.3929205071617119, + "grad_norm": 1811.5101931208928, + "learning_rate": 1.3482097314674526e-05, + "loss": 300.5256, + "step": 20430 + }, + { + "epoch": 0.3931128324221924, + "grad_norm": 1908.5382972739787, + "learning_rate": 1.3476375479003347e-05, + "loss": 300.7552, + "step": 20440 + }, + { + "epoch": 0.39330515768267293, + "grad_norm": 1934.5156499404786, + "learning_rate": 1.3470652348439715e-05, + "loss": 302.4374, + "step": 20450 + }, + { + "epoch": 0.39349748294315345, + "grad_norm": 1944.0607000815519, + "learning_rate": 1.3464927925115405e-05, + "loss": 318.1395, + "step": 20460 + }, + { + "epoch": 0.393689808203634, + "grad_norm": 1816.2609733133675, + "learning_rate": 1.3459202211162663e-05, + "loss": 300.7997, + "step": 20470 + }, + { + "epoch": 0.3938821334641145, + "grad_norm": 2159.638351922511, + "learning_rate": 1.345347520871423e-05, + "loss": 300.4586, + "step": 20480 + }, + { + "epoch": 0.394074458724595, + "grad_norm": 1877.6505123313825, + "learning_rate": 1.3447746919903318e-05, + "loss": 297.6736, + "step": 20490 + }, + { + "epoch": 0.3942667839850756, + "grad_norm": 1693.4089025440492, + "learning_rate": 1.3442017346863618e-05, + "loss": 306.1064, + "step": 20500 + }, + { + "epoch": 0.3944591092455561, + "grad_norm": 1928.2149955214938, + "learning_rate": 1.3436286491729306e-05, + "loss": 312.0971, + "step": 20510 + }, + { + "epoch": 0.3946514345060366, + "grad_norm": 1944.4569307096185, + "learning_rate": 1.3430554356635029e-05, + "loss": 307.6115, + "step": 20520 + }, + { + "epoch": 0.39484375976651714, + "grad_norm": 1820.6749193770745, + "learning_rate": 1.342482094371591e-05, + "loss": 300.1809, + "step": 20530 + }, + { + "epoch": 0.39503608502699766, + "grad_norm": 1897.183396809748, + "learning_rate": 1.341908625510755e-05, + "loss": 311.0384, + "step": 20540 + }, + { + "epoch": 0.3952284102874782, + "grad_norm": 1738.8602129288627, + "learning_rate": 1.341335029294603e-05, + "loss": 298.289, + "step": 20550 + }, + { + "epoch": 0.3954207355479587, + "grad_norm": 1850.6868360886565, + "learning_rate": 1.3407613059367898e-05, + "loss": 301.0768, + "step": 20560 + }, + { + "epoch": 0.3956130608084392, + "grad_norm": 1897.2260623714128, + "learning_rate": 1.3401874556510182e-05, + "loss": 301.7463, + "step": 20570 + }, + { + "epoch": 0.39580538606891974, + "grad_norm": 1802.2790799115212, + "learning_rate": 1.3396134786510375e-05, + "loss": 308.0902, + "step": 20580 + }, + { + "epoch": 0.39599771132940026, + "grad_norm": 2107.27764567609, + "learning_rate": 1.3390393751506452e-05, + "loss": 304.4175, + "step": 20590 + }, + { + "epoch": 0.39619003658988083, + "grad_norm": 1924.5927188210346, + "learning_rate": 1.338465145363685e-05, + "loss": 294.9072, + "step": 20600 + }, + { + "epoch": 0.39638236185036135, + "grad_norm": 2123.723157473039, + "learning_rate": 1.3378907895040478e-05, + "loss": 306.6204, + "step": 20610 + }, + { + "epoch": 0.3965746871108419, + "grad_norm": 1978.9194895967685, + "learning_rate": 1.3373163077856724e-05, + "loss": 301.3621, + "step": 20620 + }, + { + "epoch": 0.3967670123713224, + "grad_norm": 2008.498190228556, + "learning_rate": 1.3367417004225429e-05, + "loss": 307.2951, + "step": 20630 + }, + { + "epoch": 0.3969593376318029, + "grad_norm": 1983.9487977245653, + "learning_rate": 1.3361669676286919e-05, + "loss": 306.8815, + "step": 20640 + }, + { + "epoch": 0.39715166289228343, + "grad_norm": 1885.2817705791942, + "learning_rate": 1.335592109618197e-05, + "loss": 291.2721, + "step": 20650 + }, + { + "epoch": 0.39734398815276395, + "grad_norm": 1978.2404699626795, + "learning_rate": 1.335017126605184e-05, + "loss": 307.5826, + "step": 20660 + }, + { + "epoch": 0.39753631341324447, + "grad_norm": 1711.536637295286, + "learning_rate": 1.3344420188038243e-05, + "loss": 295.397, + "step": 20670 + }, + { + "epoch": 0.397728638673725, + "grad_norm": 2072.1110820578065, + "learning_rate": 1.333866786428336e-05, + "loss": 305.0892, + "step": 20680 + }, + { + "epoch": 0.3979209639342055, + "grad_norm": 1986.9482166882167, + "learning_rate": 1.3332914296929838e-05, + "loss": 308.2779, + "step": 20690 + }, + { + "epoch": 0.398113289194686, + "grad_norm": 1979.9944757036844, + "learning_rate": 1.3327159488120784e-05, + "loss": 299.6597, + "step": 20700 + }, + { + "epoch": 0.3983056144551666, + "grad_norm": 1970.288215913745, + "learning_rate": 1.3321403439999775e-05, + "loss": 302.8106, + "step": 20710 + }, + { + "epoch": 0.3984979397156471, + "grad_norm": 2021.9350583544017, + "learning_rate": 1.3315646154710835e-05, + "loss": 309.0129, + "step": 20720 + }, + { + "epoch": 0.39869026497612764, + "grad_norm": 1902.0656693466192, + "learning_rate": 1.3309887634398466e-05, + "loss": 310.4726, + "step": 20730 + }, + { + "epoch": 0.39888259023660816, + "grad_norm": 1849.287582506347, + "learning_rate": 1.3304127881207614e-05, + "loss": 297.1906, + "step": 20740 + }, + { + "epoch": 0.3990749154970887, + "grad_norm": 2095.0836566519965, + "learning_rate": 1.3298366897283697e-05, + "loss": 308.3663, + "step": 20750 + }, + { + "epoch": 0.3992672407575692, + "grad_norm": 1842.6313532937406, + "learning_rate": 1.3292604684772585e-05, + "loss": 301.0415, + "step": 20760 + }, + { + "epoch": 0.3994595660180497, + "grad_norm": 1924.561840897817, + "learning_rate": 1.3286841245820605e-05, + "loss": 299.9735, + "step": 20770 + }, + { + "epoch": 0.39965189127853024, + "grad_norm": 1953.7262204634574, + "learning_rate": 1.3281076582574548e-05, + "loss": 299.2476, + "step": 20780 + }, + { + "epoch": 0.39984421653901075, + "grad_norm": 1771.9099335540877, + "learning_rate": 1.3275310697181652e-05, + "loss": 297.1938, + "step": 20790 + }, + { + "epoch": 0.4000365417994913, + "grad_norm": 1885.1043841854043, + "learning_rate": 1.3269543591789616e-05, + "loss": 294.3169, + "step": 20800 + }, + { + "epoch": 0.40022886705997185, + "grad_norm": 1833.7788057586558, + "learning_rate": 1.3263775268546588e-05, + "loss": 304.3086, + "step": 20810 + }, + { + "epoch": 0.40042119232045237, + "grad_norm": 1917.3282746074783, + "learning_rate": 1.3258005729601178e-05, + "loss": 306.5842, + "step": 20820 + }, + { + "epoch": 0.4006135175809329, + "grad_norm": 2121.8848780096496, + "learning_rate": 1.325223497710244e-05, + "loss": 307.2001, + "step": 20830 + }, + { + "epoch": 0.4008058428414134, + "grad_norm": 1960.7797063608668, + "learning_rate": 1.3246463013199882e-05, + "loss": 308.4385, + "step": 20840 + }, + { + "epoch": 0.4009981681018939, + "grad_norm": 1834.4042510846705, + "learning_rate": 1.3240689840043475e-05, + "loss": 302.6972, + "step": 20850 + }, + { + "epoch": 0.40119049336237445, + "grad_norm": 2089.5596117458117, + "learning_rate": 1.323491545978362e-05, + "loss": 297.695, + "step": 20860 + }, + { + "epoch": 0.40138281862285496, + "grad_norm": 1804.7775436660631, + "learning_rate": 1.3229139874571186e-05, + "loss": 299.2692, + "step": 20870 + }, + { + "epoch": 0.4015751438833355, + "grad_norm": 2222.277733768969, + "learning_rate": 1.3223363086557477e-05, + "loss": 296.3286, + "step": 20880 + }, + { + "epoch": 0.401767469143816, + "grad_norm": 1854.8714144108574, + "learning_rate": 1.3217585097894255e-05, + "loss": 290.2188, + "step": 20890 + }, + { + "epoch": 0.4019597944042965, + "grad_norm": 1811.285512528795, + "learning_rate": 1.3211805910733724e-05, + "loss": 308.2051, + "step": 20900 + }, + { + "epoch": 0.4021521196647771, + "grad_norm": 1660.2408208230754, + "learning_rate": 1.3206025527228535e-05, + "loss": 298.5014, + "step": 20910 + }, + { + "epoch": 0.4023444449252576, + "grad_norm": 1792.3885659954376, + "learning_rate": 1.3200243949531788e-05, + "loss": 306.2331, + "step": 20920 + }, + { + "epoch": 0.40253677018573814, + "grad_norm": 1845.8499639942563, + "learning_rate": 1.3194461179797023e-05, + "loss": 297.2011, + "step": 20930 + }, + { + "epoch": 0.40272909544621865, + "grad_norm": 1914.73668626949, + "learning_rate": 1.3188677220178225e-05, + "loss": 287.7009, + "step": 20940 + }, + { + "epoch": 0.4029214207066992, + "grad_norm": 2056.1501768728363, + "learning_rate": 1.3182892072829828e-05, + "loss": 287.964, + "step": 20950 + }, + { + "epoch": 0.4031137459671797, + "grad_norm": 2399.747600124868, + "learning_rate": 1.3177105739906702e-05, + "loss": 290.4619, + "step": 20960 + }, + { + "epoch": 0.4033060712276602, + "grad_norm": 1773.4294751260477, + "learning_rate": 1.3171318223564156e-05, + "loss": 297.6337, + "step": 20970 + }, + { + "epoch": 0.40349839648814073, + "grad_norm": 1786.5106947788709, + "learning_rate": 1.3165529525957947e-05, + "loss": 302.6508, + "step": 20980 + }, + { + "epoch": 0.40369072174862125, + "grad_norm": 1928.64473627544, + "learning_rate": 1.3159739649244271e-05, + "loss": 300.5086, + "step": 20990 + }, + { + "epoch": 0.40388304700910177, + "grad_norm": 1869.8660979602523, + "learning_rate": 1.3153948595579764e-05, + "loss": 303.0783, + "step": 21000 + }, + { + "epoch": 0.40407537226958234, + "grad_norm": 1920.4635089054786, + "learning_rate": 1.3148156367121491e-05, + "loss": 309.3283, + "step": 21010 + }, + { + "epoch": 0.40426769753006286, + "grad_norm": 2272.931885824946, + "learning_rate": 1.3142362966026967e-05, + "loss": 300.1064, + "step": 21020 + }, + { + "epoch": 0.4044600227905434, + "grad_norm": 1858.2356739937206, + "learning_rate": 1.3136568394454136e-05, + "loss": 300.9337, + "step": 21030 + }, + { + "epoch": 0.4046523480510239, + "grad_norm": 1615.2452610517569, + "learning_rate": 1.313077265456138e-05, + "loss": 294.8649, + "step": 21040 + }, + { + "epoch": 0.4048446733115044, + "grad_norm": 1877.643361455623, + "learning_rate": 1.3124975748507514e-05, + "loss": 303.0341, + "step": 21050 + }, + { + "epoch": 0.40503699857198494, + "grad_norm": 1802.6308984113073, + "learning_rate": 1.3119177678451793e-05, + "loss": 308.1125, + "step": 21060 + }, + { + "epoch": 0.40522932383246546, + "grad_norm": 1901.1710674314595, + "learning_rate": 1.3113378446553903e-05, + "loss": 307.1291, + "step": 21070 + }, + { + "epoch": 0.405421649092946, + "grad_norm": 1838.5408841054675, + "learning_rate": 1.3107578054973962e-05, + "loss": 304.0225, + "step": 21080 + }, + { + "epoch": 0.4056139743534265, + "grad_norm": 1714.6882619276512, + "learning_rate": 1.3101776505872516e-05, + "loss": 301.9726, + "step": 21090 + }, + { + "epoch": 0.405806299613907, + "grad_norm": 2140.1223557205826, + "learning_rate": 1.309597380141055e-05, + "loss": 298.8791, + "step": 21100 + }, + { + "epoch": 0.40599862487438754, + "grad_norm": 1714.7941183356802, + "learning_rate": 1.3090169943749475e-05, + "loss": 297.6602, + "step": 21110 + }, + { + "epoch": 0.4061909501348681, + "grad_norm": 1948.562309954345, + "learning_rate": 1.3084364935051132e-05, + "loss": 298.1119, + "step": 21120 + }, + { + "epoch": 0.40638327539534863, + "grad_norm": 1972.0588281432786, + "learning_rate": 1.307855877747779e-05, + "loss": 308.4491, + "step": 21130 + }, + { + "epoch": 0.40657560065582915, + "grad_norm": 1708.7027356324083, + "learning_rate": 1.307275147319215e-05, + "loss": 289.4312, + "step": 21140 + }, + { + "epoch": 0.40676792591630967, + "grad_norm": 1920.7208460277584, + "learning_rate": 1.3066943024357333e-05, + "loss": 290.8358, + "step": 21150 + }, + { + "epoch": 0.4069602511767902, + "grad_norm": 2134.1834226023575, + "learning_rate": 1.306113343313689e-05, + "loss": 301.6934, + "step": 21160 + }, + { + "epoch": 0.4071525764372707, + "grad_norm": 1747.1162765035936, + "learning_rate": 1.3055322701694801e-05, + "loss": 307.3142, + "step": 21170 + }, + { + "epoch": 0.4073449016977512, + "grad_norm": 2325.287656446818, + "learning_rate": 1.3049510832195466e-05, + "loss": 304.0766, + "step": 21180 + }, + { + "epoch": 0.40753722695823175, + "grad_norm": 1728.5381080702648, + "learning_rate": 1.3043697826803707e-05, + "loss": 294.9708, + "step": 21190 + }, + { + "epoch": 0.40772955221871227, + "grad_norm": 1811.3401316095064, + "learning_rate": 1.303788368768478e-05, + "loss": 287.6326, + "step": 21200 + }, + { + "epoch": 0.4079218774791928, + "grad_norm": 1945.005544787725, + "learning_rate": 1.3032068417004351e-05, + "loss": 302.9698, + "step": 21210 + }, + { + "epoch": 0.40811420273967336, + "grad_norm": 1974.8318234836372, + "learning_rate": 1.302625201692851e-05, + "loss": 299.1098, + "step": 21220 + }, + { + "epoch": 0.4083065280001539, + "grad_norm": 1721.9382760643002, + "learning_rate": 1.302043448962378e-05, + "loss": 292.9459, + "step": 21230 + }, + { + "epoch": 0.4084988532606344, + "grad_norm": 2057.284639654734, + "learning_rate": 1.301461583725708e-05, + "loss": 304.0902, + "step": 21240 + }, + { + "epoch": 0.4086911785211149, + "grad_norm": 2448.143948204674, + "learning_rate": 1.3008796061995772e-05, + "loss": 303.6405, + "step": 21250 + }, + { + "epoch": 0.40888350378159544, + "grad_norm": 2091.8278752606043, + "learning_rate": 1.3002975166007618e-05, + "loss": 295.297, + "step": 21260 + }, + { + "epoch": 0.40907582904207596, + "grad_norm": 1952.5980407949598, + "learning_rate": 1.2997153151460814e-05, + "loss": 299.297, + "step": 21270 + }, + { + "epoch": 0.4092681543025565, + "grad_norm": 1742.8795948181055, + "learning_rate": 1.299133002052396e-05, + "loss": 295.3625, + "step": 21280 + }, + { + "epoch": 0.409460479563037, + "grad_norm": 1778.9719901360481, + "learning_rate": 1.2985505775366079e-05, + "loss": 301.1708, + "step": 21290 + }, + { + "epoch": 0.4096528048235175, + "grad_norm": 2001.3645957659803, + "learning_rate": 1.2979680418156604e-05, + "loss": 307.0015, + "step": 21300 + }, + { + "epoch": 0.40984513008399803, + "grad_norm": 1931.998494168223, + "learning_rate": 1.297385395106538e-05, + "loss": 304.0401, + "step": 21310 + }, + { + "epoch": 0.4100374553444786, + "grad_norm": 1928.5697027821686, + "learning_rate": 1.2968026376262679e-05, + "loss": 286.0681, + "step": 21320 + }, + { + "epoch": 0.4102297806049591, + "grad_norm": 1879.6835077467238, + "learning_rate": 1.2962197695919167e-05, + "loss": 304.8239, + "step": 21330 + }, + { + "epoch": 0.41042210586543965, + "grad_norm": 1806.144973724193, + "learning_rate": 1.2956367912205939e-05, + "loss": 301.0433, + "step": 21340 + }, + { + "epoch": 0.41061443112592017, + "grad_norm": 1724.298103341707, + "learning_rate": 1.2950537027294487e-05, + "loss": 294.6038, + "step": 21350 + }, + { + "epoch": 0.4108067563864007, + "grad_norm": 1857.6353073166906, + "learning_rate": 1.2944705043356722e-05, + "loss": 297.4807, + "step": 21360 + }, + { + "epoch": 0.4109990816468812, + "grad_norm": 1900.9297964014493, + "learning_rate": 1.2938871962564965e-05, + "loss": 292.8086, + "step": 21370 + }, + { + "epoch": 0.4111914069073617, + "grad_norm": 1930.3994623822425, + "learning_rate": 1.2933037787091935e-05, + "loss": 305.0081, + "step": 21380 + }, + { + "epoch": 0.41138373216784224, + "grad_norm": 2107.1891030578204, + "learning_rate": 1.2927202519110775e-05, + "loss": 308.3201, + "step": 21390 + }, + { + "epoch": 0.41157605742832276, + "grad_norm": 1970.3479751807247, + "learning_rate": 1.2921366160795017e-05, + "loss": 296.5559, + "step": 21400 + }, + { + "epoch": 0.4117683826888033, + "grad_norm": 1719.1955993990437, + "learning_rate": 1.2915528714318612e-05, + "loss": 306.5933, + "step": 21410 + }, + { + "epoch": 0.41196070794928386, + "grad_norm": 1813.5370900230837, + "learning_rate": 1.2909690181855914e-05, + "loss": 295.4979, + "step": 21420 + }, + { + "epoch": 0.4121530332097644, + "grad_norm": 1926.807755797552, + "learning_rate": 1.290385056558168e-05, + "loss": 298.9159, + "step": 21430 + }, + { + "epoch": 0.4123453584702449, + "grad_norm": 1727.0178089454132, + "learning_rate": 1.2898009867671066e-05, + "loss": 295.8993, + "step": 21440 + }, + { + "epoch": 0.4125376837307254, + "grad_norm": 1960.7607981142648, + "learning_rate": 1.2892168090299639e-05, + "loss": 290.4833, + "step": 21450 + }, + { + "epoch": 0.41273000899120593, + "grad_norm": 2084.632148230582, + "learning_rate": 1.2886325235643367e-05, + "loss": 293.5019, + "step": 21460 + }, + { + "epoch": 0.41292233425168645, + "grad_norm": 1892.695566989941, + "learning_rate": 1.288048130587861e-05, + "loss": 291.6956, + "step": 21470 + }, + { + "epoch": 0.41311465951216697, + "grad_norm": 1956.193304032728, + "learning_rate": 1.287463630318214e-05, + "loss": 296.915, + "step": 21480 + }, + { + "epoch": 0.4133069847726475, + "grad_norm": 1957.0029367183324, + "learning_rate": 1.2868790229731123e-05, + "loss": 298.4166, + "step": 21490 + }, + { + "epoch": 0.413499310033128, + "grad_norm": 1936.4240602095022, + "learning_rate": 1.2862943087703127e-05, + "loss": 294.1665, + "step": 21500 + }, + { + "epoch": 0.41369163529360853, + "grad_norm": 1900.9234262394655, + "learning_rate": 1.2857094879276115e-05, + "loss": 301.6358, + "step": 21510 + }, + { + "epoch": 0.4138839605540891, + "grad_norm": 2008.8984612329446, + "learning_rate": 1.2851245606628447e-05, + "loss": 297.9974, + "step": 21520 + }, + { + "epoch": 0.4140762858145696, + "grad_norm": 2062.091397879479, + "learning_rate": 1.2845395271938876e-05, + "loss": 294.9293, + "step": 21530 + }, + { + "epoch": 0.41426861107505014, + "grad_norm": 1970.1115120327734, + "learning_rate": 1.2839543877386562e-05, + "loss": 290.6354, + "step": 21540 + }, + { + "epoch": 0.41446093633553066, + "grad_norm": 1866.7710249497457, + "learning_rate": 1.283369142515105e-05, + "loss": 306.8433, + "step": 21550 + }, + { + "epoch": 0.4146532615960112, + "grad_norm": 2121.5235414930767, + "learning_rate": 1.282783791741228e-05, + "loss": 308.4364, + "step": 21560 + }, + { + "epoch": 0.4148455868564917, + "grad_norm": 5781.957453285093, + "learning_rate": 1.2821983356350593e-05, + "loss": 316.6105, + "step": 21570 + }, + { + "epoch": 0.4150379121169722, + "grad_norm": 1955.5440952715064, + "learning_rate": 1.2816127744146711e-05, + "loss": 297.5697, + "step": 21580 + }, + { + "epoch": 0.41523023737745274, + "grad_norm": 1869.1392561426703, + "learning_rate": 1.2810271082981755e-05, + "loss": 297.6666, + "step": 21590 + }, + { + "epoch": 0.41542256263793326, + "grad_norm": 1755.9972261241396, + "learning_rate": 1.2804413375037232e-05, + "loss": 289.4357, + "step": 21600 + }, + { + "epoch": 0.4156148878984138, + "grad_norm": 1923.5048604579317, + "learning_rate": 1.2798554622495042e-05, + "loss": 294.3641, + "step": 21610 + }, + { + "epoch": 0.4158072131588943, + "grad_norm": 1709.1281205509256, + "learning_rate": 1.2792694827537477e-05, + "loss": 300.6993, + "step": 21620 + }, + { + "epoch": 0.41599953841937487, + "grad_norm": 2116.160721850635, + "learning_rate": 1.2786833992347212e-05, + "loss": 293.2287, + "step": 21630 + }, + { + "epoch": 0.4161918636798554, + "grad_norm": 1815.0267312970952, + "learning_rate": 1.2780972119107312e-05, + "loss": 299.084, + "step": 21640 + }, + { + "epoch": 0.4163841889403359, + "grad_norm": 1764.3040461231517, + "learning_rate": 1.2775109210001225e-05, + "loss": 288.4564, + "step": 21650 + }, + { + "epoch": 0.41657651420081643, + "grad_norm": 1834.684604356838, + "learning_rate": 1.2769245267212789e-05, + "loss": 296.033, + "step": 21660 + }, + { + "epoch": 0.41676883946129695, + "grad_norm": 1926.984096764738, + "learning_rate": 1.2763380292926227e-05, + "loss": 294.0977, + "step": 21670 + }, + { + "epoch": 0.41696116472177747, + "grad_norm": 1782.2764671826128, + "learning_rate": 1.2757514289326146e-05, + "loss": 293.2214, + "step": 21680 + }, + { + "epoch": 0.417153489982258, + "grad_norm": 1790.3535213323487, + "learning_rate": 1.2751647258597531e-05, + "loss": 295.3477, + "step": 21690 + }, + { + "epoch": 0.4173458152427385, + "grad_norm": 1735.5075479791724, + "learning_rate": 1.274577920292576e-05, + "loss": 285.6985, + "step": 21700 + }, + { + "epoch": 0.417538140503219, + "grad_norm": 1883.5984831296273, + "learning_rate": 1.2739910124496585e-05, + "loss": 299.5036, + "step": 21710 + }, + { + "epoch": 0.41773046576369954, + "grad_norm": 1738.302029158757, + "learning_rate": 1.2734040025496137e-05, + "loss": 293.0357, + "step": 21720 + }, + { + "epoch": 0.4179227910241801, + "grad_norm": 1887.6161143895417, + "learning_rate": 1.2728168908110937e-05, + "loss": 294.5104, + "step": 21730 + }, + { + "epoch": 0.41811511628466064, + "grad_norm": 1738.6950897243764, + "learning_rate": 1.2722296774527871e-05, + "loss": 285.5324, + "step": 21740 + }, + { + "epoch": 0.41830744154514116, + "grad_norm": 1559.3244221477023, + "learning_rate": 1.271642362693422e-05, + "loss": 306.2529, + "step": 21750 + }, + { + "epoch": 0.4184997668056217, + "grad_norm": 1916.9145767723796, + "learning_rate": 1.2710549467517628e-05, + "loss": 292.2417, + "step": 21760 + }, + { + "epoch": 0.4186920920661022, + "grad_norm": 1635.4967774731526, + "learning_rate": 1.270467429846613e-05, + "loss": 280.6471, + "step": 21770 + }, + { + "epoch": 0.4188844173265827, + "grad_norm": 1710.6036949396544, + "learning_rate": 1.2698798121968122e-05, + "loss": 292.6485, + "step": 21780 + }, + { + "epoch": 0.41907674258706323, + "grad_norm": 1725.0476783710178, + "learning_rate": 1.2692920940212387e-05, + "loss": 304.322, + "step": 21790 + }, + { + "epoch": 0.41926906784754375, + "grad_norm": 1682.6806608105035, + "learning_rate": 1.2687042755388077e-05, + "loss": 287.9139, + "step": 21800 + }, + { + "epoch": 0.41946139310802427, + "grad_norm": 1717.7820827129565, + "learning_rate": 1.2681163569684718e-05, + "loss": 297.2933, + "step": 21810 + }, + { + "epoch": 0.4196537183685048, + "grad_norm": 1681.5071264004243, + "learning_rate": 1.2675283385292212e-05, + "loss": 297.7878, + "step": 21820 + }, + { + "epoch": 0.41984604362898537, + "grad_norm": 1983.29666861836, + "learning_rate": 1.2669402204400825e-05, + "loss": 289.575, + "step": 21830 + }, + { + "epoch": 0.4200383688894659, + "grad_norm": 1901.4875038480411, + "learning_rate": 1.2663520029201206e-05, + "loss": 286.8185, + "step": 21840 + }, + { + "epoch": 0.4202306941499464, + "grad_norm": 2029.6549501739983, + "learning_rate": 1.2657636861884363e-05, + "loss": 289.6501, + "step": 21850 + }, + { + "epoch": 0.4204230194104269, + "grad_norm": 2115.6233724820286, + "learning_rate": 1.2651752704641686e-05, + "loss": 294.1346, + "step": 21860 + }, + { + "epoch": 0.42061534467090744, + "grad_norm": 1708.5996574100225, + "learning_rate": 1.2645867559664918e-05, + "loss": 296.3937, + "step": 21870 + }, + { + "epoch": 0.42080766993138796, + "grad_norm": 1717.8765160615162, + "learning_rate": 1.2639981429146184e-05, + "loss": 284.5227, + "step": 21880 + }, + { + "epoch": 0.4209999951918685, + "grad_norm": 1909.8869030102762, + "learning_rate": 1.2634094315277967e-05, + "loss": 290.336, + "step": 21890 + }, + { + "epoch": 0.421192320452349, + "grad_norm": 2021.3751083371153, + "learning_rate": 1.262820622025312e-05, + "loss": 300.9393, + "step": 21900 + }, + { + "epoch": 0.4213846457128295, + "grad_norm": 1873.8783271538828, + "learning_rate": 1.2622317146264864e-05, + "loss": 296.5559, + "step": 21910 + }, + { + "epoch": 0.42157697097331004, + "grad_norm": 1882.235048962817, + "learning_rate": 1.2616427095506779e-05, + "loss": 295.0742, + "step": 21920 + }, + { + "epoch": 0.4217692962337906, + "grad_norm": 1750.0323455953414, + "learning_rate": 1.2610536070172815e-05, + "loss": 296.5745, + "step": 21930 + }, + { + "epoch": 0.42196162149427113, + "grad_norm": 1803.931236936814, + "learning_rate": 1.2604644072457277e-05, + "loss": 292.0239, + "step": 21940 + }, + { + "epoch": 0.42215394675475165, + "grad_norm": 1818.828242287104, + "learning_rate": 1.259875110455484e-05, + "loss": 294.9897, + "step": 21950 + }, + { + "epoch": 0.42234627201523217, + "grad_norm": 1632.7115684899397, + "learning_rate": 1.2592857168660535e-05, + "loss": 294.3602, + "step": 21960 + }, + { + "epoch": 0.4225385972757127, + "grad_norm": 2032.3467186529804, + "learning_rate": 1.2586962266969758e-05, + "loss": 295.6209, + "step": 21970 + }, + { + "epoch": 0.4227309225361932, + "grad_norm": 2031.1830937625148, + "learning_rate": 1.2581066401678261e-05, + "loss": 307.6854, + "step": 21980 + }, + { + "epoch": 0.42292324779667373, + "grad_norm": 1714.389078398596, + "learning_rate": 1.2575169574982158e-05, + "loss": 290.2839, + "step": 21990 + }, + { + "epoch": 0.42311557305715425, + "grad_norm": 1773.1484100021128, + "learning_rate": 1.2569271789077919e-05, + "loss": 298.0011, + "step": 22000 + }, + { + "epoch": 0.42330789831763477, + "grad_norm": 1852.364034041167, + "learning_rate": 1.256337304616237e-05, + "loss": 287.7284, + "step": 22010 + }, + { + "epoch": 0.4235002235781153, + "grad_norm": 1801.260639873809, + "learning_rate": 1.25574733484327e-05, + "loss": 289.85, + "step": 22020 + }, + { + "epoch": 0.4236925488385958, + "grad_norm": 1993.5948357991335, + "learning_rate": 1.2551572698086446e-05, + "loss": 296.8924, + "step": 22030 + }, + { + "epoch": 0.4238848740990764, + "grad_norm": 1768.890574758428, + "learning_rate": 1.2545671097321497e-05, + "loss": 292.4219, + "step": 22040 + }, + { + "epoch": 0.4240771993595569, + "grad_norm": 1729.1688215889747, + "learning_rate": 1.2539768548336112e-05, + "loss": 294.4896, + "step": 22050 + }, + { + "epoch": 0.4242695246200374, + "grad_norm": 1634.3742657371652, + "learning_rate": 1.2533865053328886e-05, + "loss": 292.933, + "step": 22060 + }, + { + "epoch": 0.42446184988051794, + "grad_norm": 1865.495961916923, + "learning_rate": 1.2527960614498778e-05, + "loss": 299.1005, + "step": 22070 + }, + { + "epoch": 0.42465417514099846, + "grad_norm": 1926.8130712866237, + "learning_rate": 1.252205523404509e-05, + "loss": 299.8442, + "step": 22080 + }, + { + "epoch": 0.424846500401479, + "grad_norm": 1801.0788168391084, + "learning_rate": 1.2516148914167481e-05, + "loss": 299.7834, + "step": 22090 + }, + { + "epoch": 0.4250388256619595, + "grad_norm": 1906.336234849879, + "learning_rate": 1.2510241657065958e-05, + "loss": 284.22, + "step": 22100 + }, + { + "epoch": 0.42523115092244, + "grad_norm": 1854.4176695067338, + "learning_rate": 1.2504333464940874e-05, + "loss": 284.0715, + "step": 22110 + }, + { + "epoch": 0.42542347618292053, + "grad_norm": 2191.795101238567, + "learning_rate": 1.2498424339992934e-05, + "loss": 289.333, + "step": 22120 + }, + { + "epoch": 0.42561580144340105, + "grad_norm": 1915.6945693128519, + "learning_rate": 1.2492514284423194e-05, + "loss": 294.488, + "step": 22130 + }, + { + "epoch": 0.42580812670388163, + "grad_norm": 1861.301185668194, + "learning_rate": 1.2486603300433045e-05, + "loss": 290.9472, + "step": 22140 + }, + { + "epoch": 0.42600045196436215, + "grad_norm": 1772.4191910270065, + "learning_rate": 1.2480691390224238e-05, + "loss": 297.0469, + "step": 22150 + }, + { + "epoch": 0.42619277722484267, + "grad_norm": 1754.227022755561, + "learning_rate": 1.2474778555998855e-05, + "loss": 278.9458, + "step": 22160 + }, + { + "epoch": 0.4263851024853232, + "grad_norm": 1846.1901970919557, + "learning_rate": 1.246886479995933e-05, + "loss": 298.2821, + "step": 22170 + }, + { + "epoch": 0.4265774277458037, + "grad_norm": 1906.5567654606382, + "learning_rate": 1.2462950124308444e-05, + "loss": 296.0458, + "step": 22180 + }, + { + "epoch": 0.4267697530062842, + "grad_norm": 1927.1360362454009, + "learning_rate": 1.2457034531249313e-05, + "loss": 290.0084, + "step": 22190 + }, + { + "epoch": 0.42696207826676474, + "grad_norm": 1932.2271998168626, + "learning_rate": 1.2451118022985402e-05, + "loss": 291.821, + "step": 22200 + }, + { + "epoch": 0.42715440352724526, + "grad_norm": 1802.4321113262986, + "learning_rate": 1.2445200601720504e-05, + "loss": 284.2274, + "step": 22210 + }, + { + "epoch": 0.4273467287877258, + "grad_norm": 1843.828194362936, + "learning_rate": 1.243928226965877e-05, + "loss": 289.67, + "step": 22220 + }, + { + "epoch": 0.4275390540482063, + "grad_norm": 1745.8425061119474, + "learning_rate": 1.2433363029004675e-05, + "loss": 289.6597, + "step": 22230 + }, + { + "epoch": 0.4277313793086869, + "grad_norm": 1720.8414756983705, + "learning_rate": 1.2427442881963042e-05, + "loss": 290.4238, + "step": 22240 + }, + { + "epoch": 0.4279237045691674, + "grad_norm": 1861.0984929042972, + "learning_rate": 1.242152183073902e-05, + "loss": 294.6113, + "step": 22250 + }, + { + "epoch": 0.4281160298296479, + "grad_norm": 1764.0034169805558, + "learning_rate": 1.2415599877538111e-05, + "loss": 283.536, + "step": 22260 + }, + { + "epoch": 0.42830835509012843, + "grad_norm": 1803.4229172536875, + "learning_rate": 1.2409677024566145e-05, + "loss": 301.8564, + "step": 22270 + }, + { + "epoch": 0.42850068035060895, + "grad_norm": 2321.4059910452024, + "learning_rate": 1.2403753274029281e-05, + "loss": 295.7579, + "step": 22280 + }, + { + "epoch": 0.4286930056110895, + "grad_norm": 1855.341243688981, + "learning_rate": 1.2397828628134028e-05, + "loss": 283.3616, + "step": 22290 + }, + { + "epoch": 0.42888533087157, + "grad_norm": 1755.6097579861346, + "learning_rate": 1.2391903089087208e-05, + "loss": 291.0702, + "step": 22300 + }, + { + "epoch": 0.4290776561320505, + "grad_norm": 1735.3184787380108, + "learning_rate": 1.2385976659095993e-05, + "loss": 289.4383, + "step": 22310 + }, + { + "epoch": 0.42926998139253103, + "grad_norm": 1654.0841749641856, + "learning_rate": 1.2380049340367876e-05, + "loss": 285.3722, + "step": 22320 + }, + { + "epoch": 0.42946230665301155, + "grad_norm": 1937.8697323262231, + "learning_rate": 1.2374121135110688e-05, + "loss": 299.4644, + "step": 22330 + }, + { + "epoch": 0.4296546319134921, + "grad_norm": 1885.82108573323, + "learning_rate": 1.2368192045532586e-05, + "loss": 292.687, + "step": 22340 + }, + { + "epoch": 0.42984695717397264, + "grad_norm": 1746.53780413847, + "learning_rate": 1.236226207384206e-05, + "loss": 288.4655, + "step": 22350 + }, + { + "epoch": 0.43003928243445316, + "grad_norm": 1912.713094679185, + "learning_rate": 1.2356331222247929e-05, + "loss": 295.9019, + "step": 22360 + }, + { + "epoch": 0.4302316076949337, + "grad_norm": 1860.961584211503, + "learning_rate": 1.2350399492959328e-05, + "loss": 297.3277, + "step": 22370 + }, + { + "epoch": 0.4304239329554142, + "grad_norm": 1996.6017135778345, + "learning_rate": 1.234446688818574e-05, + "loss": 293.0891, + "step": 22380 + }, + { + "epoch": 0.4306162582158947, + "grad_norm": 1920.130615923429, + "learning_rate": 1.233853341013695e-05, + "loss": 291.4277, + "step": 22390 + }, + { + "epoch": 0.43080858347637524, + "grad_norm": 1734.5926270743116, + "learning_rate": 1.233259906102309e-05, + "loss": 288.4785, + "step": 22400 + }, + { + "epoch": 0.43100090873685576, + "grad_norm": 1671.5169904532902, + "learning_rate": 1.2326663843054603e-05, + "loss": 287.3598, + "step": 22410 + }, + { + "epoch": 0.4311932339973363, + "grad_norm": 1878.0200892778894, + "learning_rate": 1.2320727758442264e-05, + "loss": 295.8173, + "step": 22420 + }, + { + "epoch": 0.4313855592578168, + "grad_norm": 1826.1557725608554, + "learning_rate": 1.231479080939716e-05, + "loss": 301.0681, + "step": 22430 + }, + { + "epoch": 0.43157788451829737, + "grad_norm": 1624.1604849581604, + "learning_rate": 1.230885299813071e-05, + "loss": 300.6742, + "step": 22440 + }, + { + "epoch": 0.4317702097787779, + "grad_norm": 1989.9818544823925, + "learning_rate": 1.2302914326854651e-05, + "loss": 291.5841, + "step": 22450 + }, + { + "epoch": 0.4319625350392584, + "grad_norm": 1713.6037141490233, + "learning_rate": 1.2296974797781036e-05, + "loss": 299.4552, + "step": 22460 + }, + { + "epoch": 0.43215486029973893, + "grad_norm": 1932.6392639156013, + "learning_rate": 1.2291034413122247e-05, + "loss": 303.7552, + "step": 22470 + }, + { + "epoch": 0.43234718556021945, + "grad_norm": 1819.0821810113941, + "learning_rate": 1.2285093175090976e-05, + "loss": 289.8333, + "step": 22480 + }, + { + "epoch": 0.43253951082069997, + "grad_norm": 1824.8548153464003, + "learning_rate": 1.227915108590024e-05, + "loss": 285.3101, + "step": 22490 + }, + { + "epoch": 0.4327318360811805, + "grad_norm": 1747.1056902099006, + "learning_rate": 1.2273208147763363e-05, + "loss": 279.0743, + "step": 22500 + }, + { + "epoch": 0.432924161341661, + "grad_norm": 1719.7361446852408, + "learning_rate": 1.2267264362893995e-05, + "loss": 292.7379, + "step": 22510 + }, + { + "epoch": 0.4331164866021415, + "grad_norm": 1833.2847314860103, + "learning_rate": 1.2261319733506096e-05, + "loss": 290.1692, + "step": 22520 + }, + { + "epoch": 0.43330881186262205, + "grad_norm": 1779.5976041555969, + "learning_rate": 1.2255374261813944e-05, + "loss": 297.8661, + "step": 22530 + }, + { + "epoch": 0.43350113712310256, + "grad_norm": 1805.6607169983013, + "learning_rate": 1.2249427950032127e-05, + "loss": 294.5839, + "step": 22540 + }, + { + "epoch": 0.43369346238358314, + "grad_norm": 2916.730721179416, + "learning_rate": 1.224348080037555e-05, + "loss": 302.201, + "step": 22550 + }, + { + "epoch": 0.43388578764406366, + "grad_norm": 1699.518198099036, + "learning_rate": 1.2237532815059427e-05, + "loss": 289.6796, + "step": 22560 + }, + { + "epoch": 0.4340781129045442, + "grad_norm": 1795.4791786912174, + "learning_rate": 1.2231583996299285e-05, + "loss": 288.5549, + "step": 22570 + }, + { + "epoch": 0.4342704381650247, + "grad_norm": 1738.0039835942123, + "learning_rate": 1.2225634346310962e-05, + "loss": 285.4092, + "step": 22580 + }, + { + "epoch": 0.4344627634255052, + "grad_norm": 1664.6936558757486, + "learning_rate": 1.22196838673106e-05, + "loss": 294.4807, + "step": 22590 + }, + { + "epoch": 0.43465508868598574, + "grad_norm": 1738.6415660923024, + "learning_rate": 1.2213732561514657e-05, + "loss": 296.1545, + "step": 22600 + }, + { + "epoch": 0.43484741394646625, + "grad_norm": 1868.8050573850862, + "learning_rate": 1.2207780431139894e-05, + "loss": 295.5609, + "step": 22610 + }, + { + "epoch": 0.4350397392069468, + "grad_norm": 2003.515807142469, + "learning_rate": 1.2201827478403385e-05, + "loss": 289.1937, + "step": 22620 + }, + { + "epoch": 0.4352320644674273, + "grad_norm": 1656.9276532432098, + "learning_rate": 1.2195873705522508e-05, + "loss": 276.789, + "step": 22630 + }, + { + "epoch": 0.4354243897279078, + "grad_norm": 10790.686677964712, + "learning_rate": 1.2189919114714936e-05, + "loss": 281.9262, + "step": 22640 + }, + { + "epoch": 0.4356167149883884, + "grad_norm": 1764.029363039468, + "learning_rate": 1.2183963708198668e-05, + "loss": 301.1744, + "step": 22650 + }, + { + "epoch": 0.4358090402488689, + "grad_norm": 1914.4914198195202, + "learning_rate": 1.2178007488191983e-05, + "loss": 289.2565, + "step": 22660 + }, + { + "epoch": 0.4360013655093494, + "grad_norm": 1621.3208806654015, + "learning_rate": 1.2172050456913482e-05, + "loss": 293.0277, + "step": 22670 + }, + { + "epoch": 0.43619369076982994, + "grad_norm": 1748.3025486960762, + "learning_rate": 1.2166092616582055e-05, + "loss": 279.6763, + "step": 22680 + }, + { + "epoch": 0.43638601603031046, + "grad_norm": 1908.817111333349, + "learning_rate": 1.2160133969416903e-05, + "loss": 298.3905, + "step": 22690 + }, + { + "epoch": 0.436578341290791, + "grad_norm": 1757.4049454032338, + "learning_rate": 1.2154174517637526e-05, + "loss": 289.2108, + "step": 22700 + }, + { + "epoch": 0.4367706665512715, + "grad_norm": 1775.150754177834, + "learning_rate": 1.2148214263463718e-05, + "loss": 280.9745, + "step": 22710 + }, + { + "epoch": 0.436962991811752, + "grad_norm": 1960.9874272261552, + "learning_rate": 1.2142253209115577e-05, + "loss": 288.9633, + "step": 22720 + }, + { + "epoch": 0.43715531707223254, + "grad_norm": 1910.6484392043762, + "learning_rate": 1.2136291356813494e-05, + "loss": 300.4317, + "step": 22730 + }, + { + "epoch": 0.43734764233271306, + "grad_norm": 1973.128983668307, + "learning_rate": 1.2130328708778162e-05, + "loss": 287.7798, + "step": 22740 + }, + { + "epoch": 0.43753996759319363, + "grad_norm": 1780.8425845537547, + "learning_rate": 1.2124365267230571e-05, + "loss": 293.087, + "step": 22750 + }, + { + "epoch": 0.43773229285367415, + "grad_norm": 1995.534795662323, + "learning_rate": 1.2118401034392003e-05, + "loss": 293.0213, + "step": 22760 + }, + { + "epoch": 0.4379246181141547, + "grad_norm": 1691.903001368237, + "learning_rate": 1.2112436012484035e-05, + "loss": 283.7337, + "step": 22770 + }, + { + "epoch": 0.4381169433746352, + "grad_norm": 1746.81748547236, + "learning_rate": 1.210647020372854e-05, + "loss": 293.1092, + "step": 22780 + }, + { + "epoch": 0.4383092686351157, + "grad_norm": 1764.215225084088, + "learning_rate": 1.2100503610347686e-05, + "loss": 286.2631, + "step": 22790 + }, + { + "epoch": 0.43850159389559623, + "grad_norm": 1879.1926409448206, + "learning_rate": 1.2094536234563927e-05, + "loss": 293.0421, + "step": 22800 + }, + { + "epoch": 0.43869391915607675, + "grad_norm": 1790.099066455191, + "learning_rate": 1.2088568078600013e-05, + "loss": 288.7428, + "step": 22810 + }, + { + "epoch": 0.43888624441655727, + "grad_norm": 1849.6678853059964, + "learning_rate": 1.2082599144678983e-05, + "loss": 296.7231, + "step": 22820 + }, + { + "epoch": 0.4390785696770378, + "grad_norm": 1801.5874065013898, + "learning_rate": 1.2076629435024168e-05, + "loss": 291.6802, + "step": 22830 + }, + { + "epoch": 0.4392708949375183, + "grad_norm": 1898.256536186525, + "learning_rate": 1.2070658951859183e-05, + "loss": 302.706, + "step": 22840 + }, + { + "epoch": 0.4394632201979989, + "grad_norm": 1615.3688707153901, + "learning_rate": 1.2064687697407939e-05, + "loss": 300.7423, + "step": 22850 + }, + { + "epoch": 0.4396555454584794, + "grad_norm": 1799.5121917128301, + "learning_rate": 1.2058715673894625e-05, + "loss": 283.2971, + "step": 22860 + }, + { + "epoch": 0.4398478707189599, + "grad_norm": 2187.5173816919432, + "learning_rate": 1.2052742883543724e-05, + "loss": 303.7761, + "step": 22870 + }, + { + "epoch": 0.44004019597944044, + "grad_norm": 1543.7112171266506, + "learning_rate": 1.2046769328580004e-05, + "loss": 283.914, + "step": 22880 + }, + { + "epoch": 0.44023252123992096, + "grad_norm": 1938.9783279348553, + "learning_rate": 1.204079501122851e-05, + "loss": 289.9621, + "step": 22890 + }, + { + "epoch": 0.4404248465004015, + "grad_norm": 1854.0355565012467, + "learning_rate": 1.2034819933714576e-05, + "loss": 286.943, + "step": 22900 + }, + { + "epoch": 0.440617171760882, + "grad_norm": 1720.5437543112669, + "learning_rate": 1.2028844098263827e-05, + "loss": 291.134, + "step": 22910 + }, + { + "epoch": 0.4408094970213625, + "grad_norm": 1988.576575048972, + "learning_rate": 1.2022867507102159e-05, + "loss": 295.0517, + "step": 22920 + }, + { + "epoch": 0.44100182228184304, + "grad_norm": 1673.8616203733277, + "learning_rate": 1.2016890162455752e-05, + "loss": 289.4667, + "step": 22930 + }, + { + "epoch": 0.44119414754232356, + "grad_norm": 1873.379645644224, + "learning_rate": 1.2010912066551072e-05, + "loss": 283.8355, + "step": 22940 + }, + { + "epoch": 0.4413864728028041, + "grad_norm": 1770.043212712193, + "learning_rate": 1.2004933221614854e-05, + "loss": 281.9152, + "step": 22950 + }, + { + "epoch": 0.44157879806328465, + "grad_norm": 2606.506205559988, + "learning_rate": 1.1998953629874126e-05, + "loss": 282.2, + "step": 22960 + }, + { + "epoch": 0.44177112332376517, + "grad_norm": 1732.050881790884, + "learning_rate": 1.1992973293556184e-05, + "loss": 297.1208, + "step": 22970 + }, + { + "epoch": 0.4419634485842457, + "grad_norm": 1973.102295231436, + "learning_rate": 1.1986992214888607e-05, + "loss": 296.9179, + "step": 22980 + }, + { + "epoch": 0.4421557738447262, + "grad_norm": 1811.2823120979465, + "learning_rate": 1.1981010396099244e-05, + "loss": 288.4724, + "step": 22990 + }, + { + "epoch": 0.4423480991052067, + "grad_norm": 1867.996128919048, + "learning_rate": 1.1975027839416227e-05, + "loss": 287.2646, + "step": 23000 + }, + { + "epoch": 0.44254042436568725, + "grad_norm": 1913.4885113246035, + "learning_rate": 1.1969044547067961e-05, + "loss": 304.0793, + "step": 23010 + }, + { + "epoch": 0.44273274962616777, + "grad_norm": 1780.974856084601, + "learning_rate": 1.1963060521283117e-05, + "loss": 281.992, + "step": 23020 + }, + { + "epoch": 0.4429250748866483, + "grad_norm": 1683.1468078575483, + "learning_rate": 1.1957075764290652e-05, + "loss": 286.047, + "step": 23030 + }, + { + "epoch": 0.4431174001471288, + "grad_norm": 1734.3989975089044, + "learning_rate": 1.1951090278319784e-05, + "loss": 297.9622, + "step": 23040 + }, + { + "epoch": 0.4433097254076093, + "grad_norm": 1701.7504968822498, + "learning_rate": 1.1945104065600013e-05, + "loss": 281.6445, + "step": 23050 + }, + { + "epoch": 0.4435020506680899, + "grad_norm": 1704.3040180379824, + "learning_rate": 1.1939117128361101e-05, + "loss": 287.8655, + "step": 23060 + }, + { + "epoch": 0.4436943759285704, + "grad_norm": 2258.7872575451747, + "learning_rate": 1.1933129468833087e-05, + "loss": 281.5917, + "step": 23070 + }, + { + "epoch": 0.44388670118905094, + "grad_norm": 1928.2623730642117, + "learning_rate": 1.192714108924627e-05, + "loss": 283.3175, + "step": 23080 + }, + { + "epoch": 0.44407902644953146, + "grad_norm": 2073.9861314727987, + "learning_rate": 1.1921151991831225e-05, + "loss": 291.7861, + "step": 23090 + }, + { + "epoch": 0.444271351710012, + "grad_norm": 1715.9587300330145, + "learning_rate": 1.1915162178818793e-05, + "loss": 295.6284, + "step": 23100 + }, + { + "epoch": 0.4444636769704925, + "grad_norm": 1837.7060794350339, + "learning_rate": 1.1909171652440079e-05, + "loss": 290.2767, + "step": 23110 + }, + { + "epoch": 0.444656002230973, + "grad_norm": 1797.4906408081708, + "learning_rate": 1.1903180414926457e-05, + "loss": 286.0442, + "step": 23120 + }, + { + "epoch": 0.44484832749145353, + "grad_norm": 1759.1466779667753, + "learning_rate": 1.189718846850956e-05, + "loss": 282.2319, + "step": 23130 + }, + { + "epoch": 0.44504065275193405, + "grad_norm": 1704.9890405638937, + "learning_rate": 1.1891195815421293e-05, + "loss": 273.0284, + "step": 23140 + }, + { + "epoch": 0.44523297801241457, + "grad_norm": 1812.7961728657122, + "learning_rate": 1.1885202457893819e-05, + "loss": 286.3382, + "step": 23150 + }, + { + "epoch": 0.44542530327289515, + "grad_norm": 1710.1556883789276, + "learning_rate": 1.1879208398159563e-05, + "loss": 288.4673, + "step": 23160 + }, + { + "epoch": 0.44561762853337566, + "grad_norm": 1678.5718853197513, + "learning_rate": 1.1873213638451215e-05, + "loss": 279.5454, + "step": 23170 + }, + { + "epoch": 0.4458099537938562, + "grad_norm": 1874.0836182268522, + "learning_rate": 1.1867218181001725e-05, + "loss": 288.6174, + "step": 23180 + }, + { + "epoch": 0.4460022790543367, + "grad_norm": 2670.9328264712767, + "learning_rate": 1.1861222028044301e-05, + "loss": 296.0979, + "step": 23190 + }, + { + "epoch": 0.4461946043148172, + "grad_norm": 1665.7025297889513, + "learning_rate": 1.1855225181812408e-05, + "loss": 280.7955, + "step": 23200 + }, + { + "epoch": 0.44638692957529774, + "grad_norm": 1692.5609582789361, + "learning_rate": 1.1849227644539781e-05, + "loss": 280.3237, + "step": 23210 + }, + { + "epoch": 0.44657925483577826, + "grad_norm": 1516.822947210408, + "learning_rate": 1.1843229418460391e-05, + "loss": 279.9126, + "step": 23220 + }, + { + "epoch": 0.4467715800962588, + "grad_norm": 1635.8471836627796, + "learning_rate": 1.1837230505808485e-05, + "loss": 278.7231, + "step": 23230 + }, + { + "epoch": 0.4469639053567393, + "grad_norm": 1604.722505836218, + "learning_rate": 1.1831230908818563e-05, + "loss": 281.3688, + "step": 23240 + }, + { + "epoch": 0.4471562306172198, + "grad_norm": 1812.974517169146, + "learning_rate": 1.1825230629725366e-05, + "loss": 293.6629, + "step": 23250 + }, + { + "epoch": 0.4473485558777004, + "grad_norm": 1774.454309007271, + "learning_rate": 1.1819229670763908e-05, + "loss": 289.6647, + "step": 23260 + }, + { + "epoch": 0.4475408811381809, + "grad_norm": 2079.130727773057, + "learning_rate": 1.1813228034169442e-05, + "loss": 296.6703, + "step": 23270 + }, + { + "epoch": 0.44773320639866143, + "grad_norm": 1775.0031349756969, + "learning_rate": 1.180722572217748e-05, + "loss": 291.4848, + "step": 23280 + }, + { + "epoch": 0.44792553165914195, + "grad_norm": 1835.0883911093642, + "learning_rate": 1.1801222737023783e-05, + "loss": 291.9565, + "step": 23290 + }, + { + "epoch": 0.44811785691962247, + "grad_norm": 1732.2632603627926, + "learning_rate": 1.1795219080944366e-05, + "loss": 287.1501, + "step": 23300 + }, + { + "epoch": 0.448310182180103, + "grad_norm": 1760.7354771369664, + "learning_rate": 1.178921475617549e-05, + "loss": 285.9664, + "step": 23310 + }, + { + "epoch": 0.4485025074405835, + "grad_norm": 1826.707406558517, + "learning_rate": 1.1783209764953666e-05, + "loss": 292.7454, + "step": 23320 + }, + { + "epoch": 0.44869483270106403, + "grad_norm": 1970.5524085679651, + "learning_rate": 1.1777204109515653e-05, + "loss": 295.9027, + "step": 23330 + }, + { + "epoch": 0.44888715796154455, + "grad_norm": 1909.2435260195157, + "learning_rate": 1.1771197792098465e-05, + "loss": 289.9696, + "step": 23340 + }, + { + "epoch": 0.44907948322202507, + "grad_norm": 1683.089366796813, + "learning_rate": 1.1765190814939351e-05, + "loss": 287.4251, + "step": 23350 + }, + { + "epoch": 0.4492718084825056, + "grad_norm": 1795.7155474563117, + "learning_rate": 1.175918318027581e-05, + "loss": 280.0622, + "step": 23360 + }, + { + "epoch": 0.44946413374298616, + "grad_norm": 1756.949841339035, + "learning_rate": 1.1753174890345591e-05, + "loss": 287.0618, + "step": 23370 + }, + { + "epoch": 0.4496564590034667, + "grad_norm": 1910.6755429209422, + "learning_rate": 1.174716594738668e-05, + "loss": 289.142, + "step": 23380 + }, + { + "epoch": 0.4498487842639472, + "grad_norm": 1911.7015281491028, + "learning_rate": 1.1741156353637304e-05, + "loss": 280.1322, + "step": 23390 + }, + { + "epoch": 0.4500411095244277, + "grad_norm": 1773.9867806509214, + "learning_rate": 1.1735146111335945e-05, + "loss": 282.747, + "step": 23400 + }, + { + "epoch": 0.45023343478490824, + "grad_norm": 1676.6937716961493, + "learning_rate": 1.1729135222721315e-05, + "loss": 282.6137, + "step": 23410 + }, + { + "epoch": 0.45042576004538876, + "grad_norm": 1808.9680360244736, + "learning_rate": 1.1723123690032376e-05, + "loss": 272.8415, + "step": 23420 + }, + { + "epoch": 0.4506180853058693, + "grad_norm": 1653.1305496027244, + "learning_rate": 1.171711151550832e-05, + "loss": 299.3093, + "step": 23430 + }, + { + "epoch": 0.4508104105663498, + "grad_norm": 1812.7404402262237, + "learning_rate": 1.1711098701388581e-05, + "loss": 285.1163, + "step": 23440 + }, + { + "epoch": 0.4510027358268303, + "grad_norm": 1702.8119340459302, + "learning_rate": 1.1705085249912837e-05, + "loss": 282.1939, + "step": 23450 + }, + { + "epoch": 0.45119506108731083, + "grad_norm": 1583.8576373399394, + "learning_rate": 1.1699071163320997e-05, + "loss": 289.1969, + "step": 23460 + }, + { + "epoch": 0.4513873863477914, + "grad_norm": 1734.128409584299, + "learning_rate": 1.169305644385321e-05, + "loss": 281.8136, + "step": 23470 + }, + { + "epoch": 0.4515797116082719, + "grad_norm": 1722.558155028593, + "learning_rate": 1.168704109374986e-05, + "loss": 287.1599, + "step": 23480 + }, + { + "epoch": 0.45177203686875245, + "grad_norm": 1710.6931929672164, + "learning_rate": 1.1681025115251566e-05, + "loss": 288.8959, + "step": 23490 + }, + { + "epoch": 0.45196436212923297, + "grad_norm": 2047.1026638475935, + "learning_rate": 1.1675008510599176e-05, + "loss": 278.7546, + "step": 23500 + }, + { + "epoch": 0.4521566873897135, + "grad_norm": 1851.0792011260548, + "learning_rate": 1.1668991282033784e-05, + "loss": 284.0779, + "step": 23510 + }, + { + "epoch": 0.452349012650194, + "grad_norm": 1846.816831283124, + "learning_rate": 1.16629734317967e-05, + "loss": 290.6807, + "step": 23520 + }, + { + "epoch": 0.4525413379106745, + "grad_norm": 1878.5260776922232, + "learning_rate": 1.165695496212948e-05, + "loss": 295.8176, + "step": 23530 + }, + { + "epoch": 0.45273366317115504, + "grad_norm": 1816.7253291343181, + "learning_rate": 1.1650935875273901e-05, + "loss": 288.7826, + "step": 23540 + }, + { + "epoch": 0.45292598843163556, + "grad_norm": 1586.9737205937136, + "learning_rate": 1.1644916173471976e-05, + "loss": 283.6865, + "step": 23550 + }, + { + "epoch": 0.4531183136921161, + "grad_norm": 1703.918771905539, + "learning_rate": 1.1638895858965942e-05, + "loss": 283.1265, + "step": 23560 + }, + { + "epoch": 0.45331063895259666, + "grad_norm": 1742.0075842146127, + "learning_rate": 1.1632874933998268e-05, + "loss": 282.4831, + "step": 23570 + }, + { + "epoch": 0.4535029642130772, + "grad_norm": 1810.1424471119528, + "learning_rate": 1.1626853400811649e-05, + "loss": 286.5923, + "step": 23580 + }, + { + "epoch": 0.4536952894735577, + "grad_norm": 1494.5794457017953, + "learning_rate": 1.1620831261649003e-05, + "loss": 279.275, + "step": 23590 + }, + { + "epoch": 0.4538876147340382, + "grad_norm": 1587.080233581412, + "learning_rate": 1.1614808518753485e-05, + "loss": 285.0605, + "step": 23600 + }, + { + "epoch": 0.45407993999451873, + "grad_norm": 1655.8667909162855, + "learning_rate": 1.1608785174368461e-05, + "loss": 286.6222, + "step": 23610 + }, + { + "epoch": 0.45427226525499925, + "grad_norm": 1764.1897700555548, + "learning_rate": 1.1602761230737531e-05, + "loss": 297.7672, + "step": 23620 + }, + { + "epoch": 0.45446459051547977, + "grad_norm": 1780.384728045414, + "learning_rate": 1.1596736690104514e-05, + "loss": 300.0785, + "step": 23630 + }, + { + "epoch": 0.4546569157759603, + "grad_norm": 1755.779092767589, + "learning_rate": 1.1590711554713452e-05, + "loss": 280.6965, + "step": 23640 + }, + { + "epoch": 0.4548492410364408, + "grad_norm": 1807.9899391052184, + "learning_rate": 1.1584685826808604e-05, + "loss": 295.9619, + "step": 23650 + }, + { + "epoch": 0.45504156629692133, + "grad_norm": 2064.8159441450043, + "learning_rate": 1.157865950863446e-05, + "loss": 287.0157, + "step": 23660 + }, + { + "epoch": 0.4552338915574019, + "grad_norm": 2137.3376159337927, + "learning_rate": 1.1572632602435717e-05, + "loss": 293.902, + "step": 23670 + }, + { + "epoch": 0.4554262168178824, + "grad_norm": 1569.9931877270574, + "learning_rate": 1.1566605110457305e-05, + "loss": 280.1166, + "step": 23680 + }, + { + "epoch": 0.45561854207836294, + "grad_norm": 2007.340270423367, + "learning_rate": 1.1560577034944364e-05, + "loss": 289.0956, + "step": 23690 + }, + { + "epoch": 0.45581086733884346, + "grad_norm": 1931.3024264232283, + "learning_rate": 1.1554548378142249e-05, + "loss": 288.4179, + "step": 23700 + }, + { + "epoch": 0.456003192599324, + "grad_norm": 1944.1780294249734, + "learning_rate": 1.1548519142296541e-05, + "loss": 286.3278, + "step": 23710 + }, + { + "epoch": 0.4561955178598045, + "grad_norm": 1689.7150106930471, + "learning_rate": 1.1542489329653024e-05, + "loss": 291.3526, + "step": 23720 + }, + { + "epoch": 0.456387843120285, + "grad_norm": 1807.9609317369523, + "learning_rate": 1.153645894245771e-05, + "loss": 275.6158, + "step": 23730 + }, + { + "epoch": 0.45658016838076554, + "grad_norm": 1822.6861262073455, + "learning_rate": 1.1530427982956813e-05, + "loss": 292.7068, + "step": 23740 + }, + { + "epoch": 0.45677249364124606, + "grad_norm": 1798.8724708841796, + "learning_rate": 1.1524396453396767e-05, + "loss": 284.5452, + "step": 23750 + }, + { + "epoch": 0.4569648189017266, + "grad_norm": 2078.4341054267998, + "learning_rate": 1.1518364356024219e-05, + "loss": 289.6863, + "step": 23760 + }, + { + "epoch": 0.45715714416220715, + "grad_norm": 2248.5845611815375, + "learning_rate": 1.1512331693086025e-05, + "loss": 277.8321, + "step": 23770 + }, + { + "epoch": 0.45734946942268767, + "grad_norm": 1927.9092338464047, + "learning_rate": 1.1506298466829256e-05, + "loss": 292.2804, + "step": 23780 + }, + { + "epoch": 0.4575417946831682, + "grad_norm": 1850.887892926861, + "learning_rate": 1.1500264679501181e-05, + "loss": 283.8637, + "step": 23790 + }, + { + "epoch": 0.4577341199436487, + "grad_norm": 1693.6868742509823, + "learning_rate": 1.1494230333349292e-05, + "loss": 288.8228, + "step": 23800 + }, + { + "epoch": 0.45792644520412923, + "grad_norm": 1936.015090643325, + "learning_rate": 1.1488195430621284e-05, + "loss": 284.9828, + "step": 23810 + }, + { + "epoch": 0.45811877046460975, + "grad_norm": 1682.8866890385339, + "learning_rate": 1.1482159973565051e-05, + "loss": 293.2018, + "step": 23820 + }, + { + "epoch": 0.45831109572509027, + "grad_norm": 1938.7381966171101, + "learning_rate": 1.147612396442871e-05, + "loss": 285.0724, + "step": 23830 + }, + { + "epoch": 0.4585034209855708, + "grad_norm": 1641.6893164987168, + "learning_rate": 1.1470087405460572e-05, + "loss": 277.8538, + "step": 23840 + }, + { + "epoch": 0.4586957462460513, + "grad_norm": 1850.2389463573038, + "learning_rate": 1.1464050298909153e-05, + "loss": 277.7248, + "step": 23850 + }, + { + "epoch": 0.4588880715065318, + "grad_norm": 1544.748625565167, + "learning_rate": 1.1458012647023178e-05, + "loss": 267.8215, + "step": 23860 + }, + { + "epoch": 0.45908039676701234, + "grad_norm": 1835.520167322254, + "learning_rate": 1.1451974452051572e-05, + "loss": 287.3512, + "step": 23870 + }, + { + "epoch": 0.4592727220274929, + "grad_norm": 2124.162675866568, + "learning_rate": 1.1445935716243463e-05, + "loss": 282.1791, + "step": 23880 + }, + { + "epoch": 0.45946504728797344, + "grad_norm": 1864.3869241400778, + "learning_rate": 1.143989644184818e-05, + "loss": 286.0018, + "step": 23890 + }, + { + "epoch": 0.45965737254845396, + "grad_norm": 1823.1616715390342, + "learning_rate": 1.1433856631115252e-05, + "loss": 292.7189, + "step": 23900 + }, + { + "epoch": 0.4598496978089345, + "grad_norm": 1659.4276317327933, + "learning_rate": 1.142781628629441e-05, + "loss": 279.31, + "step": 23910 + }, + { + "epoch": 0.460042023069415, + "grad_norm": 1648.992431356757, + "learning_rate": 1.1421775409635585e-05, + "loss": 283.0493, + "step": 23920 + }, + { + "epoch": 0.4602343483298955, + "grad_norm": 2809.6358145128875, + "learning_rate": 1.1415734003388899e-05, + "loss": 289.6218, + "step": 23930 + }, + { + "epoch": 0.46042667359037603, + "grad_norm": 1761.138388944903, + "learning_rate": 1.1409692069804678e-05, + "loss": 290.4586, + "step": 23940 + }, + { + "epoch": 0.46061899885085655, + "grad_norm": 1699.1472868601832, + "learning_rate": 1.1403649611133444e-05, + "loss": 275.4669, + "step": 23950 + }, + { + "epoch": 0.4608113241113371, + "grad_norm": 1790.2134656203066, + "learning_rate": 1.1397606629625913e-05, + "loss": 289.6756, + "step": 23960 + }, + { + "epoch": 0.4610036493718176, + "grad_norm": 1724.0363318964176, + "learning_rate": 1.1391563127532992e-05, + "loss": 282.2445, + "step": 23970 + }, + { + "epoch": 0.46119597463229817, + "grad_norm": 1709.0754815797438, + "learning_rate": 1.1385519107105791e-05, + "loss": 283.2302, + "step": 23980 + }, + { + "epoch": 0.4613882998927787, + "grad_norm": 1967.664449518948, + "learning_rate": 1.1379474570595604e-05, + "loss": 287.0517, + "step": 23990 + }, + { + "epoch": 0.4615806251532592, + "grad_norm": 1903.6430453002488, + "learning_rate": 1.1373429520253922e-05, + "loss": 275.3917, + "step": 24000 + }, + { + "epoch": 0.4617729504137397, + "grad_norm": 1953.0533945189911, + "learning_rate": 1.1367383958332427e-05, + "loss": 278.4549, + "step": 24010 + }, + { + "epoch": 0.46196527567422024, + "grad_norm": 1901.03403093669, + "learning_rate": 1.1361337887082991e-05, + "loss": 282.2804, + "step": 24020 + }, + { + "epoch": 0.46215760093470076, + "grad_norm": 1905.0695224264493, + "learning_rate": 1.1355291308757672e-05, + "loss": 287.2164, + "step": 24030 + }, + { + "epoch": 0.4623499261951813, + "grad_norm": 1725.2841825427975, + "learning_rate": 1.1349244225608727e-05, + "loss": 283.5318, + "step": 24040 + }, + { + "epoch": 0.4625422514556618, + "grad_norm": 1868.2311153844091, + "learning_rate": 1.1343196639888591e-05, + "loss": 286.734, + "step": 24050 + }, + { + "epoch": 0.4627345767161423, + "grad_norm": 1888.79687534906, + "learning_rate": 1.133714855384989e-05, + "loss": 289.3315, + "step": 24060 + }, + { + "epoch": 0.46292690197662284, + "grad_norm": 1879.3003434705133, + "learning_rate": 1.1331099969745439e-05, + "loss": 282.837, + "step": 24070 + }, + { + "epoch": 0.4631192272371034, + "grad_norm": 1886.172658678069, + "learning_rate": 1.132505088982823e-05, + "loss": 296.6889, + "step": 24080 + }, + { + "epoch": 0.46331155249758393, + "grad_norm": 1743.3502478122045, + "learning_rate": 1.131900131635145e-05, + "loss": 277.7363, + "step": 24090 + }, + { + "epoch": 0.46350387775806445, + "grad_norm": 1830.5505160450712, + "learning_rate": 1.1312951251568461e-05, + "loss": 285.4822, + "step": 24100 + }, + { + "epoch": 0.463696203018545, + "grad_norm": 1655.2835366167606, + "learning_rate": 1.1306900697732816e-05, + "loss": 277.2251, + "step": 24110 + }, + { + "epoch": 0.4638885282790255, + "grad_norm": 1804.5381177198728, + "learning_rate": 1.1300849657098248e-05, + "loss": 283.2133, + "step": 24120 + }, + { + "epoch": 0.464080853539506, + "grad_norm": 1984.019623330842, + "learning_rate": 1.1294798131918665e-05, + "loss": 277.3018, + "step": 24130 + }, + { + "epoch": 0.46427317879998653, + "grad_norm": 1767.2289431539864, + "learning_rate": 1.1288746124448164e-05, + "loss": 280.9935, + "step": 24140 + }, + { + "epoch": 0.46446550406046705, + "grad_norm": 1732.5699189471168, + "learning_rate": 1.1282693636941013e-05, + "loss": 273.8635, + "step": 24150 + }, + { + "epoch": 0.46465782932094757, + "grad_norm": 1655.3718854805231, + "learning_rate": 1.1276640671651671e-05, + "loss": 292.1583, + "step": 24160 + }, + { + "epoch": 0.4648501545814281, + "grad_norm": 1757.5040467921374, + "learning_rate": 1.1270587230834757e-05, + "loss": 286.4842, + "step": 24170 + }, + { + "epoch": 0.46504247984190866, + "grad_norm": 1718.2535882858665, + "learning_rate": 1.1264533316745088e-05, + "loss": 280.9207, + "step": 24180 + }, + { + "epoch": 0.4652348051023892, + "grad_norm": 1646.2519027718342, + "learning_rate": 1.1258478931637641e-05, + "loss": 280.6315, + "step": 24190 + }, + { + "epoch": 0.4654271303628697, + "grad_norm": 1735.7570762363196, + "learning_rate": 1.1252424077767577e-05, + "loss": 267.6875, + "step": 24200 + }, + { + "epoch": 0.4656194556233502, + "grad_norm": 1794.7969598830223, + "learning_rate": 1.1246368757390231e-05, + "loss": 277.728, + "step": 24210 + }, + { + "epoch": 0.46581178088383074, + "grad_norm": 1839.0279282002361, + "learning_rate": 1.1240312972761105e-05, + "loss": 276.9731, + "step": 24220 + }, + { + "epoch": 0.46600410614431126, + "grad_norm": 1829.699830664948, + "learning_rate": 1.1234256726135882e-05, + "loss": 281.7794, + "step": 24230 + }, + { + "epoch": 0.4661964314047918, + "grad_norm": 1604.2336744731938, + "learning_rate": 1.1228200019770412e-05, + "loss": 267.9988, + "step": 24240 + }, + { + "epoch": 0.4663887566652723, + "grad_norm": 2740.5041147937695, + "learning_rate": 1.1222142855920719e-05, + "loss": 286.5131, + "step": 24250 + }, + { + "epoch": 0.4665810819257528, + "grad_norm": 1768.08052342765, + "learning_rate": 1.1216085236842997e-05, + "loss": 279.2538, + "step": 24260 + }, + { + "epoch": 0.46677340718623334, + "grad_norm": 1682.2930516996632, + "learning_rate": 1.1210027164793609e-05, + "loss": 287.3463, + "step": 24270 + }, + { + "epoch": 0.46696573244671385, + "grad_norm": 1745.260429319336, + "learning_rate": 1.1203968642029086e-05, + "loss": 287.067, + "step": 24280 + }, + { + "epoch": 0.46715805770719443, + "grad_norm": 2030.0964252395556, + "learning_rate": 1.1197909670806126e-05, + "loss": 281.6308, + "step": 24290 + }, + { + "epoch": 0.46735038296767495, + "grad_norm": 2263.056414037873, + "learning_rate": 1.1191850253381602e-05, + "loss": 292.0448, + "step": 24300 + }, + { + "epoch": 0.46754270822815547, + "grad_norm": 1608.135946897927, + "learning_rate": 1.1185790392012538e-05, + "loss": 280.7332, + "step": 24310 + }, + { + "epoch": 0.467735033488636, + "grad_norm": 1841.1976983480774, + "learning_rate": 1.117973008895614e-05, + "loss": 282.3082, + "step": 24320 + }, + { + "epoch": 0.4679273587491165, + "grad_norm": 1687.6509817266824, + "learning_rate": 1.1173669346469767e-05, + "loss": 288.2399, + "step": 24330 + }, + { + "epoch": 0.468119684009597, + "grad_norm": 1615.3864068301991, + "learning_rate": 1.1167608166810948e-05, + "loss": 279.0982, + "step": 24340 + }, + { + "epoch": 0.46831200927007754, + "grad_norm": 1957.4190617024055, + "learning_rate": 1.1161546552237368e-05, + "loss": 294.8661, + "step": 24350 + }, + { + "epoch": 0.46850433453055806, + "grad_norm": 1943.3842650418649, + "learning_rate": 1.1155484505006884e-05, + "loss": 283.2988, + "step": 24360 + }, + { + "epoch": 0.4686966597910386, + "grad_norm": 1892.399811926419, + "learning_rate": 1.1149422027377501e-05, + "loss": 284.6024, + "step": 24370 + }, + { + "epoch": 0.4688889850515191, + "grad_norm": 1720.6943931501082, + "learning_rate": 1.1143359121607397e-05, + "loss": 278.5779, + "step": 24380 + }, + { + "epoch": 0.4690813103119997, + "grad_norm": 1693.030082106743, + "learning_rate": 1.1137295789954904e-05, + "loss": 275.7291, + "step": 24390 + }, + { + "epoch": 0.4692736355724802, + "grad_norm": 1663.5824178421692, + "learning_rate": 1.1131232034678513e-05, + "loss": 280.3416, + "step": 24400 + }, + { + "epoch": 0.4694659608329607, + "grad_norm": 1633.5569561764394, + "learning_rate": 1.1125167858036874e-05, + "loss": 281.3934, + "step": 24410 + }, + { + "epoch": 0.46965828609344124, + "grad_norm": 3232.357659216158, + "learning_rate": 1.1119103262288788e-05, + "loss": 280.0552, + "step": 24420 + }, + { + "epoch": 0.46985061135392175, + "grad_norm": 1792.070896933448, + "learning_rate": 1.1113038249693221e-05, + "loss": 278.2444, + "step": 24430 + }, + { + "epoch": 0.4700429366144023, + "grad_norm": 2616.900151591412, + "learning_rate": 1.1106972822509287e-05, + "loss": 279.481, + "step": 24440 + }, + { + "epoch": 0.4702352618748828, + "grad_norm": 2391.309370893597, + "learning_rate": 1.1100906982996257e-05, + "loss": 291.4892, + "step": 24450 + }, + { + "epoch": 0.4704275871353633, + "grad_norm": 1729.293782646985, + "learning_rate": 1.109484073341356e-05, + "loss": 281.5636, + "step": 24460 + }, + { + "epoch": 0.47061991239584383, + "grad_norm": 1787.1199819749916, + "learning_rate": 1.1088774076020772e-05, + "loss": 271.1173, + "step": 24470 + }, + { + "epoch": 0.47081223765632435, + "grad_norm": 1748.516395411248, + "learning_rate": 1.1082707013077625e-05, + "loss": 276.029, + "step": 24480 + }, + { + "epoch": 0.4710045629168049, + "grad_norm": 1806.315034393149, + "learning_rate": 1.1076639546843993e-05, + "loss": 282.0608, + "step": 24490 + }, + { + "epoch": 0.47119688817728544, + "grad_norm": 1714.881780505176, + "learning_rate": 1.1070571679579913e-05, + "loss": 283.65, + "step": 24500 + }, + { + "epoch": 0.47138921343776596, + "grad_norm": 1725.475171527444, + "learning_rate": 1.1064503413545565e-05, + "loss": 285.5003, + "step": 24510 + }, + { + "epoch": 0.4715815386982465, + "grad_norm": 1921.2247765509198, + "learning_rate": 1.1058434751001272e-05, + "loss": 281.6898, + "step": 24520 + }, + { + "epoch": 0.471773863958727, + "grad_norm": 1757.3312740082472, + "learning_rate": 1.1052365694207516e-05, + "loss": 277.4069, + "step": 24530 + }, + { + "epoch": 0.4719661892192075, + "grad_norm": 1573.0580372973177, + "learning_rate": 1.1046296245424922e-05, + "loss": 273.5803, + "step": 24540 + }, + { + "epoch": 0.47215851447968804, + "grad_norm": 1684.4195762894176, + "learning_rate": 1.1040226406914254e-05, + "loss": 275.6242, + "step": 24550 + }, + { + "epoch": 0.47235083974016856, + "grad_norm": 1709.783815174474, + "learning_rate": 1.103415618093643e-05, + "loss": 289.4204, + "step": 24560 + }, + { + "epoch": 0.4725431650006491, + "grad_norm": 1866.0051373995607, + "learning_rate": 1.1028085569752512e-05, + "loss": 281.4208, + "step": 24570 + }, + { + "epoch": 0.4727354902611296, + "grad_norm": 1581.4615809336772, + "learning_rate": 1.1022014575623695e-05, + "loss": 283.6304, + "step": 24580 + }, + { + "epoch": 0.4729278155216102, + "grad_norm": 1879.0791094284268, + "learning_rate": 1.1015943200811325e-05, + "loss": 277.4734, + "step": 24590 + }, + { + "epoch": 0.4731201407820907, + "grad_norm": 1661.6447599006524, + "learning_rate": 1.1009871447576894e-05, + "loss": 272.8037, + "step": 24600 + }, + { + "epoch": 0.4733124660425712, + "grad_norm": 1725.0222049205809, + "learning_rate": 1.100379931818203e-05, + "loss": 284.0248, + "step": 24610 + }, + { + "epoch": 0.47350479130305173, + "grad_norm": 1942.6387207529522, + "learning_rate": 1.0997726814888497e-05, + "loss": 293.1799, + "step": 24620 + }, + { + "epoch": 0.47369711656353225, + "grad_norm": 1743.039914004875, + "learning_rate": 1.0991653939958203e-05, + "loss": 271.784, + "step": 24630 + }, + { + "epoch": 0.47388944182401277, + "grad_norm": 1686.7357424112322, + "learning_rate": 1.0985580695653193e-05, + "loss": 284.0414, + "step": 24640 + }, + { + "epoch": 0.4740817670844933, + "grad_norm": 1842.0619794252013, + "learning_rate": 1.0979507084235653e-05, + "loss": 283.0204, + "step": 24650 + }, + { + "epoch": 0.4742740923449738, + "grad_norm": 1585.062797187579, + "learning_rate": 1.0973433107967901e-05, + "loss": 276.5904, + "step": 24660 + }, + { + "epoch": 0.4744664176054543, + "grad_norm": 2080.3863754800336, + "learning_rate": 1.0967358769112389e-05, + "loss": 282.0562, + "step": 24670 + }, + { + "epoch": 0.47465874286593485, + "grad_norm": 1716.4446207902547, + "learning_rate": 1.0961284069931717e-05, + "loss": 274.7074, + "step": 24680 + }, + { + "epoch": 0.4748510681264154, + "grad_norm": 1728.7992809529417, + "learning_rate": 1.0955209012688602e-05, + "loss": 279.8301, + "step": 24690 + }, + { + "epoch": 0.47504339338689594, + "grad_norm": 1667.5814763707388, + "learning_rate": 1.094913359964591e-05, + "loss": 274.3013, + "step": 24700 + }, + { + "epoch": 0.47523571864737646, + "grad_norm": 2065.739433043234, + "learning_rate": 1.0943057833066622e-05, + "loss": 285.344, + "step": 24710 + }, + { + "epoch": 0.475428043907857, + "grad_norm": 1673.6681994801904, + "learning_rate": 1.093698171521387e-05, + "loss": 279.6878, + "step": 24720 + }, + { + "epoch": 0.4756203691683375, + "grad_norm": 1943.167747370926, + "learning_rate": 1.0930905248350903e-05, + "loss": 269.1851, + "step": 24730 + }, + { + "epoch": 0.475812694428818, + "grad_norm": 1763.5127728618127, + "learning_rate": 1.0924828434741101e-05, + "loss": 280.9409, + "step": 24740 + }, + { + "epoch": 0.47600501968929854, + "grad_norm": 1737.2621383584503, + "learning_rate": 1.0918751276647988e-05, + "loss": 282.1235, + "step": 24750 + }, + { + "epoch": 0.47619734494977906, + "grad_norm": 1689.3505373660075, + "learning_rate": 1.0912673776335194e-05, + "loss": 275.5593, + "step": 24760 + }, + { + "epoch": 0.4763896702102596, + "grad_norm": 1750.2103559510556, + "learning_rate": 1.0906595936066496e-05, + "loss": 279.0149, + "step": 24770 + }, + { + "epoch": 0.4765819954707401, + "grad_norm": 1574.4191111061614, + "learning_rate": 1.090051775810578e-05, + "loss": 276.1497, + "step": 24780 + }, + { + "epoch": 0.4767743207312206, + "grad_norm": 1777.465945592367, + "learning_rate": 1.0894439244717075e-05, + "loss": 285.6875, + "step": 24790 + }, + { + "epoch": 0.4769666459917012, + "grad_norm": 1870.3403189173935, + "learning_rate": 1.0888360398164521e-05, + "loss": 291.24, + "step": 24800 + }, + { + "epoch": 0.4771589712521817, + "grad_norm": 1657.42980807789, + "learning_rate": 1.088228122071239e-05, + "loss": 282.8729, + "step": 24810 + }, + { + "epoch": 0.4773512965126622, + "grad_norm": 1662.491142966301, + "learning_rate": 1.0876201714625076e-05, + "loss": 281.3687, + "step": 24820 + }, + { + "epoch": 0.47754362177314275, + "grad_norm": 1753.9557880837808, + "learning_rate": 1.0870121882167095e-05, + "loss": 279.9903, + "step": 24830 + }, + { + "epoch": 0.47773594703362326, + "grad_norm": 1725.727367314517, + "learning_rate": 1.0864041725603085e-05, + "loss": 276.5499, + "step": 24840 + }, + { + "epoch": 0.4779282722941038, + "grad_norm": 1801.003114626843, + "learning_rate": 1.08579612471978e-05, + "loss": 273.0663, + "step": 24850 + }, + { + "epoch": 0.4781205975545843, + "grad_norm": 2355.5563038069777, + "learning_rate": 1.0851880449216123e-05, + "loss": 272.5237, + "step": 24860 + }, + { + "epoch": 0.4783129228150648, + "grad_norm": 1653.960207940239, + "learning_rate": 1.0845799333923045e-05, + "loss": 283.3812, + "step": 24870 + }, + { + "epoch": 0.47850524807554534, + "grad_norm": 1701.787098663378, + "learning_rate": 1.0839717903583684e-05, + "loss": 276.0204, + "step": 24880 + }, + { + "epoch": 0.47869757333602586, + "grad_norm": 1913.5958147198544, + "learning_rate": 1.0833636160463273e-05, + "loss": 277.9247, + "step": 24890 + }, + { + "epoch": 0.47888989859650644, + "grad_norm": 1777.9577341763338, + "learning_rate": 1.0827554106827162e-05, + "loss": 280.5851, + "step": 24900 + }, + { + "epoch": 0.47908222385698696, + "grad_norm": 1703.7082344355422, + "learning_rate": 1.0821471744940812e-05, + "loss": 274.7295, + "step": 24910 + }, + { + "epoch": 0.4792745491174675, + "grad_norm": 1587.6434780646814, + "learning_rate": 1.0815389077069805e-05, + "loss": 273.788, + "step": 24920 + }, + { + "epoch": 0.479466874377948, + "grad_norm": 2014.059943348886, + "learning_rate": 1.0809306105479834e-05, + "loss": 281.809, + "step": 24930 + }, + { + "epoch": 0.4796591996384285, + "grad_norm": 1655.0458377770333, + "learning_rate": 1.0803222832436701e-05, + "loss": 272.5006, + "step": 24940 + }, + { + "epoch": 0.47985152489890903, + "grad_norm": 2044.3995723079227, + "learning_rate": 1.0797139260206331e-05, + "loss": 278.8814, + "step": 24950 + }, + { + "epoch": 0.48004385015938955, + "grad_norm": 1703.1689266297653, + "learning_rate": 1.079105539105475e-05, + "loss": 283.2675, + "step": 24960 + }, + { + "epoch": 0.48023617541987007, + "grad_norm": 2852.4551116791026, + "learning_rate": 1.0784971227248104e-05, + "loss": 285.7957, + "step": 24970 + }, + { + "epoch": 0.4804285006803506, + "grad_norm": 1584.9577317053706, + "learning_rate": 1.077888677105264e-05, + "loss": 284.5297, + "step": 24980 + }, + { + "epoch": 0.4806208259408311, + "grad_norm": 1608.1657154046318, + "learning_rate": 1.0772802024734716e-05, + "loss": 274.6846, + "step": 24990 + }, + { + "epoch": 0.4808131512013117, + "grad_norm": 1684.024173161361, + "learning_rate": 1.07667169905608e-05, + "loss": 284.1644, + "step": 25000 + }, + { + "epoch": 0.4810054764617922, + "grad_norm": 1790.628843096983, + "learning_rate": 1.0760631670797468e-05, + "loss": 274.1171, + "step": 25010 + }, + { + "epoch": 0.4811978017222727, + "grad_norm": 1842.0984959429345, + "learning_rate": 1.07545460677114e-05, + "loss": 270.6828, + "step": 25020 + }, + { + "epoch": 0.48139012698275324, + "grad_norm": 1928.2675026309344, + "learning_rate": 1.0748460183569385e-05, + "loss": 280.9262, + "step": 25030 + }, + { + "epoch": 0.48158245224323376, + "grad_norm": 1770.4835210400588, + "learning_rate": 1.0742374020638315e-05, + "loss": 277.8285, + "step": 25040 + }, + { + "epoch": 0.4817747775037143, + "grad_norm": 1783.6631353171954, + "learning_rate": 1.073628758118518e-05, + "loss": 286.9687, + "step": 25050 + }, + { + "epoch": 0.4819671027641948, + "grad_norm": 1761.2541630268242, + "learning_rate": 1.0730200867477083e-05, + "loss": 275.5293, + "step": 25060 + }, + { + "epoch": 0.4821594280246753, + "grad_norm": 1959.9590857632304, + "learning_rate": 1.0724113881781222e-05, + "loss": 277.5024, + "step": 25070 + }, + { + "epoch": 0.48235175328515584, + "grad_norm": 2043.7565981906453, + "learning_rate": 1.0718026626364902e-05, + "loss": 288.158, + "step": 25080 + }, + { + "epoch": 0.48254407854563636, + "grad_norm": 1748.9083129201, + "learning_rate": 1.0711939103495515e-05, + "loss": 276.0483, + "step": 25090 + }, + { + "epoch": 0.48273640380611693, + "grad_norm": 1934.4530760864109, + "learning_rate": 1.0705851315440575e-05, + "loss": 278.8729, + "step": 25100 + }, + { + "epoch": 0.48292872906659745, + "grad_norm": 1777.035986055601, + "learning_rate": 1.0699763264467675e-05, + "loss": 277.8787, + "step": 25110 + }, + { + "epoch": 0.48312105432707797, + "grad_norm": 1881.9017495627606, + "learning_rate": 1.069367495284452e-05, + "loss": 276.2537, + "step": 25120 + }, + { + "epoch": 0.4833133795875585, + "grad_norm": 1664.6538773127083, + "learning_rate": 1.06875863828389e-05, + "loss": 267.9381, + "step": 25130 + }, + { + "epoch": 0.483505704848039, + "grad_norm": 1815.7195932497489, + "learning_rate": 1.0681497556718706e-05, + "loss": 264.0365, + "step": 25140 + }, + { + "epoch": 0.4836980301085195, + "grad_norm": 1719.857512381996, + "learning_rate": 1.067540847675193e-05, + "loss": 301.7791, + "step": 25150 + }, + { + "epoch": 0.48389035536900005, + "grad_norm": 1671.2205614469908, + "learning_rate": 1.0669319145206646e-05, + "loss": 279.6609, + "step": 25160 + }, + { + "epoch": 0.48408268062948057, + "grad_norm": 1991.3580821715914, + "learning_rate": 1.066322956435104e-05, + "loss": 293.9594, + "step": 25170 + }, + { + "epoch": 0.4842750058899611, + "grad_norm": 1797.9206966496258, + "learning_rate": 1.0657139736453375e-05, + "loss": 287.1266, + "step": 25180 + }, + { + "epoch": 0.4844673311504416, + "grad_norm": 1802.0197193014135, + "learning_rate": 1.0651049663782007e-05, + "loss": 286.4166, + "step": 25190 + }, + { + "epoch": 0.4846596564109221, + "grad_norm": 1774.6020667486755, + "learning_rate": 1.0644959348605397e-05, + "loss": 278.7697, + "step": 25200 + }, + { + "epoch": 0.4848519816714027, + "grad_norm": 1821.700872460919, + "learning_rate": 1.0638868793192079e-05, + "loss": 283.0953, + "step": 25210 + }, + { + "epoch": 0.4850443069318832, + "grad_norm": 1807.7977795813345, + "learning_rate": 1.0632777999810685e-05, + "loss": 270.5381, + "step": 25220 + }, + { + "epoch": 0.48523663219236374, + "grad_norm": 1659.3755943189267, + "learning_rate": 1.062668697072994e-05, + "loss": 276.1015, + "step": 25230 + }, + { + "epoch": 0.48542895745284426, + "grad_norm": 1793.1507979005469, + "learning_rate": 1.0620595708218646e-05, + "loss": 274.4119, + "step": 25240 + }, + { + "epoch": 0.4856212827133248, + "grad_norm": 1649.2407056113195, + "learning_rate": 1.06145042145457e-05, + "loss": 282.1158, + "step": 25250 + }, + { + "epoch": 0.4858136079738053, + "grad_norm": 1836.4639436265707, + "learning_rate": 1.0608412491980084e-05, + "loss": 269.347, + "step": 25260 + }, + { + "epoch": 0.4860059332342858, + "grad_norm": 1771.3113262949394, + "learning_rate": 1.0602320542790866e-05, + "loss": 276.8815, + "step": 25270 + }, + { + "epoch": 0.48619825849476633, + "grad_norm": 1880.434198165726, + "learning_rate": 1.0596228369247188e-05, + "loss": 284.8107, + "step": 25280 + }, + { + "epoch": 0.48639058375524685, + "grad_norm": 1716.0271201448497, + "learning_rate": 1.0590135973618294e-05, + "loss": 273.0092, + "step": 25290 + }, + { + "epoch": 0.48658290901572737, + "grad_norm": 1816.324271525453, + "learning_rate": 1.0584043358173492e-05, + "loss": 275.8546, + "step": 25300 + }, + { + "epoch": 0.48677523427620795, + "grad_norm": 1700.7637828142767, + "learning_rate": 1.0577950525182189e-05, + "loss": 277.8783, + "step": 25310 + }, + { + "epoch": 0.48696755953668847, + "grad_norm": 1689.9329002120899, + "learning_rate": 1.0571857476913856e-05, + "loss": 267.9604, + "step": 25320 + }, + { + "epoch": 0.487159884797169, + "grad_norm": 1833.7864871145116, + "learning_rate": 1.056576421563806e-05, + "loss": 280.6204, + "step": 25330 + }, + { + "epoch": 0.4873522100576495, + "grad_norm": 1629.7841715102484, + "learning_rate": 1.0559670743624439e-05, + "loss": 266.1679, + "step": 25340 + }, + { + "epoch": 0.48754453531813, + "grad_norm": 1885.512054481977, + "learning_rate": 1.0553577063142705e-05, + "loss": 285.2523, + "step": 25350 + }, + { + "epoch": 0.48773686057861054, + "grad_norm": 1814.4307838412424, + "learning_rate": 1.0547483176462662e-05, + "loss": 282.387, + "step": 25360 + }, + { + "epoch": 0.48792918583909106, + "grad_norm": 8283.33005281376, + "learning_rate": 1.0541389085854177e-05, + "loss": 280.0253, + "step": 25370 + }, + { + "epoch": 0.4881215110995716, + "grad_norm": 1914.0782489842227, + "learning_rate": 1.0535294793587197e-05, + "loss": 279.8125, + "step": 25380 + }, + { + "epoch": 0.4883138363600521, + "grad_norm": 1697.7411988739045, + "learning_rate": 1.0529200301931747e-05, + "loss": 267.1816, + "step": 25390 + }, + { + "epoch": 0.4885061616205326, + "grad_norm": 1607.8453944014698, + "learning_rate": 1.052310561315793e-05, + "loss": 275.4394, + "step": 25400 + }, + { + "epoch": 0.4886984868810132, + "grad_norm": 1717.7467186318952, + "learning_rate": 1.0517010729535903e-05, + "loss": 281.75, + "step": 25410 + }, + { + "epoch": 0.4888908121414937, + "grad_norm": 1914.089401758374, + "learning_rate": 1.0510915653335925e-05, + "loss": 273.0778, + "step": 25420 + }, + { + "epoch": 0.48908313740197423, + "grad_norm": 1741.6983232242, + "learning_rate": 1.05048203868283e-05, + "loss": 285.0327, + "step": 25430 + }, + { + "epoch": 0.48927546266245475, + "grad_norm": 1975.1225339249038, + "learning_rate": 1.0498724932283419e-05, + "loss": 275.4808, + "step": 25440 + }, + { + "epoch": 0.48946778792293527, + "grad_norm": 1832.3843377850574, + "learning_rate": 1.0492629291971738e-05, + "loss": 279.7704, + "step": 25450 + }, + { + "epoch": 0.4896601131834158, + "grad_norm": 1545.4531884640614, + "learning_rate": 1.0486533468163782e-05, + "loss": 273.2373, + "step": 25460 + }, + { + "epoch": 0.4898524384438963, + "grad_norm": 1555.5081987489202, + "learning_rate": 1.0480437463130145e-05, + "loss": 270.6621, + "step": 25470 + }, + { + "epoch": 0.49004476370437683, + "grad_norm": 1828.9902942376457, + "learning_rate": 1.0474341279141486e-05, + "loss": 282.0716, + "step": 25480 + }, + { + "epoch": 0.49023708896485735, + "grad_norm": 1687.9795908257415, + "learning_rate": 1.0468244918468538e-05, + "loss": 279.4738, + "step": 25490 + }, + { + "epoch": 0.49042941422533787, + "grad_norm": 1769.5628320782544, + "learning_rate": 1.0462148383382086e-05, + "loss": 273.6405, + "step": 25500 + }, + { + "epoch": 0.49062173948581844, + "grad_norm": 1799.4230786330918, + "learning_rate": 1.0456051676152996e-05, + "loss": 272.1144, + "step": 25510 + }, + { + "epoch": 0.49081406474629896, + "grad_norm": 2529.581452200891, + "learning_rate": 1.0449954799052189e-05, + "loss": 281.015, + "step": 25520 + }, + { + "epoch": 0.4910063900067795, + "grad_norm": 2142.8966451075707, + "learning_rate": 1.044385775435065e-05, + "loss": 283.0552, + "step": 25530 + }, + { + "epoch": 0.49119871526726, + "grad_norm": 1722.2710304151983, + "learning_rate": 1.043776054431943e-05, + "loss": 274.963, + "step": 25540 + }, + { + "epoch": 0.4913910405277405, + "grad_norm": 1664.9203377356225, + "learning_rate": 1.0431663171229636e-05, + "loss": 275.5726, + "step": 25550 + }, + { + "epoch": 0.49158336578822104, + "grad_norm": 1857.1865724237516, + "learning_rate": 1.0425565637352441e-05, + "loss": 278.0228, + "step": 25560 + }, + { + "epoch": 0.49177569104870156, + "grad_norm": 1971.1161842338283, + "learning_rate": 1.0419467944959072e-05, + "loss": 279.5196, + "step": 25570 + }, + { + "epoch": 0.4919680163091821, + "grad_norm": 1880.027071619858, + "learning_rate": 1.0413370096320823e-05, + "loss": 266.9959, + "step": 25580 + }, + { + "epoch": 0.4921603415696626, + "grad_norm": 1655.724603171346, + "learning_rate": 1.0407272093709038e-05, + "loss": 273.6131, + "step": 25590 + }, + { + "epoch": 0.4923526668301431, + "grad_norm": 1776.7069566842088, + "learning_rate": 1.0401173939395128e-05, + "loss": 272.8131, + "step": 25600 + }, + { + "epoch": 0.4925449920906237, + "grad_norm": 1844.6656748396456, + "learning_rate": 1.0395075635650549e-05, + "loss": 270.34, + "step": 25610 + }, + { + "epoch": 0.4927373173511042, + "grad_norm": 1572.2616924510403, + "learning_rate": 1.038897718474682e-05, + "loss": 275.1192, + "step": 25620 + }, + { + "epoch": 0.49292964261158473, + "grad_norm": 1788.643043149432, + "learning_rate": 1.0382878588955517e-05, + "loss": 280.5417, + "step": 25630 + }, + { + "epoch": 0.49312196787206525, + "grad_norm": 1636.245284263101, + "learning_rate": 1.0376779850548257e-05, + "loss": 275.8214, + "step": 25640 + }, + { + "epoch": 0.49331429313254577, + "grad_norm": 1752.808939684616, + "learning_rate": 1.0370680971796732e-05, + "loss": 269.9422, + "step": 25650 + }, + { + "epoch": 0.4935066183930263, + "grad_norm": 1747.1700432609318, + "learning_rate": 1.0364581954972662e-05, + "loss": 269.1088, + "step": 25660 + }, + { + "epoch": 0.4936989436535068, + "grad_norm": 1830.7041813039798, + "learning_rate": 1.0358482802347838e-05, + "loss": 277.7571, + "step": 25670 + }, + { + "epoch": 0.4938912689139873, + "grad_norm": 2036.4426659652127, + "learning_rate": 1.0352383516194088e-05, + "loss": 277.2694, + "step": 25680 + }, + { + "epoch": 0.49408359417446784, + "grad_norm": 1772.4798752197973, + "learning_rate": 1.0346284098783304e-05, + "loss": 276.9773, + "step": 25690 + }, + { + "epoch": 0.49427591943494836, + "grad_norm": 1604.2219490851046, + "learning_rate": 1.0340184552387406e-05, + "loss": 274.673, + "step": 25700 + }, + { + "epoch": 0.4944682446954289, + "grad_norm": 1990.425648617706, + "learning_rate": 1.0334084879278381e-05, + "loss": 275.3094, + "step": 25710 + }, + { + "epoch": 0.49466056995590946, + "grad_norm": 1580.7324982115563, + "learning_rate": 1.032798508172826e-05, + "loss": 275.8235, + "step": 25720 + }, + { + "epoch": 0.49485289521639, + "grad_norm": 1580.1661572774167, + "learning_rate": 1.0321885162009111e-05, + "loss": 266.4813, + "step": 25730 + }, + { + "epoch": 0.4950452204768705, + "grad_norm": 1664.8125716677737, + "learning_rate": 1.0315785122393053e-05, + "loss": 270.8755, + "step": 25740 + }, + { + "epoch": 0.495237545737351, + "grad_norm": 1913.6285760673086, + "learning_rate": 1.0309684965152254e-05, + "loss": 278.3399, + "step": 25750 + }, + { + "epoch": 0.49542987099783153, + "grad_norm": 1672.6336172762456, + "learning_rate": 1.030358469255892e-05, + "loss": 279.4013, + "step": 25760 + }, + { + "epoch": 0.49562219625831205, + "grad_norm": 1894.4729594011826, + "learning_rate": 1.0297484306885304e-05, + "loss": 273.5778, + "step": 25770 + }, + { + "epoch": 0.4958145215187926, + "grad_norm": 1486.428909526691, + "learning_rate": 1.0291383810403697e-05, + "loss": 274.573, + "step": 25780 + }, + { + "epoch": 0.4960068467792731, + "grad_norm": 1636.4533208711755, + "learning_rate": 1.028528320538643e-05, + "loss": 276.0129, + "step": 25790 + }, + { + "epoch": 0.4961991720397536, + "grad_norm": 1735.0772739518704, + "learning_rate": 1.0279182494105879e-05, + "loss": 269.5101, + "step": 25800 + }, + { + "epoch": 0.49639149730023413, + "grad_norm": 1657.1296433778853, + "learning_rate": 1.0273081678834462e-05, + "loss": 269.3597, + "step": 25810 + }, + { + "epoch": 0.4965838225607147, + "grad_norm": 1677.74422572379, + "learning_rate": 1.026698076184463e-05, + "loss": 267.2675, + "step": 25820 + }, + { + "epoch": 0.4967761478211952, + "grad_norm": 1723.5692957252045, + "learning_rate": 1.0260879745408876e-05, + "loss": 276.1706, + "step": 25830 + }, + { + "epoch": 0.49696847308167574, + "grad_norm": 1980.9613986069344, + "learning_rate": 1.0254778631799722e-05, + "loss": 265.838, + "step": 25840 + }, + { + "epoch": 0.49716079834215626, + "grad_norm": 1712.3810455210344, + "learning_rate": 1.024867742328974e-05, + "loss": 280.8373, + "step": 25850 + }, + { + "epoch": 0.4973531236026368, + "grad_norm": 1844.5937338169515, + "learning_rate": 1.024257612215152e-05, + "loss": 276.4784, + "step": 25860 + }, + { + "epoch": 0.4975454488631173, + "grad_norm": 1770.8577750523227, + "learning_rate": 1.02364747306577e-05, + "loss": 276.7522, + "step": 25870 + }, + { + "epoch": 0.4977377741235978, + "grad_norm": 1938.988621651512, + "learning_rate": 1.023037325108095e-05, + "loss": 272.975, + "step": 25880 + }, + { + "epoch": 0.49793009938407834, + "grad_norm": 1811.7810293825148, + "learning_rate": 1.022427168569397e-05, + "loss": 268.9581, + "step": 25890 + }, + { + "epoch": 0.49812242464455886, + "grad_norm": 1591.4568115613229, + "learning_rate": 1.021817003676949e-05, + "loss": 275.2877, + "step": 25900 + }, + { + "epoch": 0.4983147499050394, + "grad_norm": 1962.51063734328, + "learning_rate": 1.0212068306580274e-05, + "loss": 279.2284, + "step": 25910 + }, + { + "epoch": 0.49850707516551995, + "grad_norm": 1689.4127112193344, + "learning_rate": 1.0205966497399118e-05, + "loss": 263.6699, + "step": 25920 + }, + { + "epoch": 0.49869940042600047, + "grad_norm": 1880.813468352572, + "learning_rate": 1.0199864611498841e-05, + "loss": 279.2016, + "step": 25930 + }, + { + "epoch": 0.498891725686481, + "grad_norm": 1607.8035022145914, + "learning_rate": 1.0193762651152299e-05, + "loss": 279.518, + "step": 25940 + }, + { + "epoch": 0.4990840509469615, + "grad_norm": 1864.2411447773225, + "learning_rate": 1.018766061863237e-05, + "loss": 271.3069, + "step": 25950 + }, + { + "epoch": 0.49927637620744203, + "grad_norm": 2091.9565295944117, + "learning_rate": 1.018155851621196e-05, + "loss": 273.8984, + "step": 25960 + }, + { + "epoch": 0.49946870146792255, + "grad_norm": 1964.398008919894, + "learning_rate": 1.0175456346164e-05, + "loss": 270.9842, + "step": 25970 + }, + { + "epoch": 0.49966102672840307, + "grad_norm": 1672.5696589957342, + "learning_rate": 1.0169354110761447e-05, + "loss": 271.7555, + "step": 25980 + }, + { + "epoch": 0.4998533519888836, + "grad_norm": 1513.4552497496738, + "learning_rate": 1.0163251812277289e-05, + "loss": 269.53, + "step": 25990 + }, + { + "epoch": 0.5000456772493641, + "grad_norm": 1524.6941834301404, + "learning_rate": 1.0157149452984523e-05, + "loss": 272.5135, + "step": 26000 + }, + { + "epoch": 0.5002380025098446, + "grad_norm": 1683.4352601114072, + "learning_rate": 1.0151047035156182e-05, + "loss": 273.8935, + "step": 26010 + }, + { + "epoch": 0.5004303277703251, + "grad_norm": 2080.4936321093123, + "learning_rate": 1.0144944561065316e-05, + "loss": 270.1687, + "step": 26020 + }, + { + "epoch": 0.5006226530308057, + "grad_norm": 1632.5934581019233, + "learning_rate": 1.0138842032984996e-05, + "loss": 286.5729, + "step": 26030 + }, + { + "epoch": 0.5008149782912862, + "grad_norm": 1680.7945305414196, + "learning_rate": 1.013273945318831e-05, + "loss": 279.0429, + "step": 26040 + }, + { + "epoch": 0.5010073035517667, + "grad_norm": 1664.2913124351896, + "learning_rate": 1.0126636823948373e-05, + "loss": 268.6135, + "step": 26050 + }, + { + "epoch": 0.5011996288122472, + "grad_norm": 1848.315319928652, + "learning_rate": 1.0120534147538305e-05, + "loss": 276.1851, + "step": 26060 + }, + { + "epoch": 0.5013919540727279, + "grad_norm": 1558.6579765155225, + "learning_rate": 1.011443142623126e-05, + "loss": 280.8799, + "step": 26070 + }, + { + "epoch": 0.5015842793332084, + "grad_norm": 1793.5497181113903, + "learning_rate": 1.0108328662300399e-05, + "loss": 271.3325, + "step": 26080 + }, + { + "epoch": 0.5017766045936889, + "grad_norm": 2003.0828263051126, + "learning_rate": 1.0102225858018902e-05, + "loss": 266.1945, + "step": 26090 + }, + { + "epoch": 0.5019689298541694, + "grad_norm": 1789.1073389557582, + "learning_rate": 1.009612301565996e-05, + "loss": 274.8918, + "step": 26100 + }, + { + "epoch": 0.5021612551146499, + "grad_norm": 1603.861578608274, + "learning_rate": 1.0090020137496783e-05, + "loss": 269.6036, + "step": 26110 + }, + { + "epoch": 0.5023535803751304, + "grad_norm": 2100.0472697596374, + "learning_rate": 1.008391722580259e-05, + "loss": 283.053, + "step": 26120 + }, + { + "epoch": 0.502545905635611, + "grad_norm": 1780.7006841160278, + "learning_rate": 1.0077814282850617e-05, + "loss": 290.6751, + "step": 26130 + }, + { + "epoch": 0.5027382308960915, + "grad_norm": 1702.6634408727787, + "learning_rate": 1.0071711310914111e-05, + "loss": 273.0348, + "step": 26140 + }, + { + "epoch": 0.502930556156572, + "grad_norm": 1692.7111133172946, + "learning_rate": 1.0065608312266324e-05, + "loss": 275.5359, + "step": 26150 + }, + { + "epoch": 0.5031228814170525, + "grad_norm": 1798.8909913614705, + "learning_rate": 1.005950528918052e-05, + "loss": 274.5655, + "step": 26160 + }, + { + "epoch": 0.503315206677533, + "grad_norm": 1733.10211000895, + "learning_rate": 1.0053402243929986e-05, + "loss": 280.753, + "step": 26170 + }, + { + "epoch": 0.5035075319380136, + "grad_norm": 1902.6321891726207, + "learning_rate": 1.0047299178787993e-05, + "loss": 274.4896, + "step": 26180 + }, + { + "epoch": 0.5036998571984941, + "grad_norm": 1700.5182253775538, + "learning_rate": 1.0041196096027841e-05, + "loss": 269.7237, + "step": 26190 + }, + { + "epoch": 0.5038921824589746, + "grad_norm": 1635.4956676626955, + "learning_rate": 1.003509299792282e-05, + "loss": 271.1262, + "step": 26200 + }, + { + "epoch": 0.5040845077194551, + "grad_norm": 1752.5666472258445, + "learning_rate": 1.0028989886746241e-05, + "loss": 262.8223, + "step": 26210 + }, + { + "epoch": 0.5042768329799356, + "grad_norm": 1784.563574089057, + "learning_rate": 1.0022886764771405e-05, + "loss": 270.4692, + "step": 26220 + }, + { + "epoch": 0.5044691582404162, + "grad_norm": 1870.167366417679, + "learning_rate": 1.0016783634271626e-05, + "loss": 269.1117, + "step": 26230 + }, + { + "epoch": 0.5046614835008967, + "grad_norm": 1729.0544402515077, + "learning_rate": 1.001068049752022e-05, + "loss": 272.5295, + "step": 26240 + }, + { + "epoch": 0.5048538087613772, + "grad_norm": 1643.144669762963, + "learning_rate": 1.0004577356790506e-05, + "loss": 268.1054, + "step": 26250 + }, + { + "epoch": 0.5050461340218577, + "grad_norm": 1740.6069494075255, + "learning_rate": 9.998474214355805e-06, + "loss": 271.8513, + "step": 26260 + }, + { + "epoch": 0.5052384592823382, + "grad_norm": 1992.7094120220727, + "learning_rate": 9.992371072489434e-06, + "loss": 278.6317, + "step": 26270 + }, + { + "epoch": 0.5054307845428189, + "grad_norm": 1708.1682303439318, + "learning_rate": 9.986267933464707e-06, + "loss": 281.2483, + "step": 26280 + }, + { + "epoch": 0.5056231098032994, + "grad_norm": 1647.0960935851654, + "learning_rate": 9.980164799554953e-06, + "loss": 277.364, + "step": 26290 + }, + { + "epoch": 0.5058154350637799, + "grad_norm": 1831.5940511449849, + "learning_rate": 9.97406167303348e-06, + "loss": 272.5185, + "step": 26300 + }, + { + "epoch": 0.5060077603242604, + "grad_norm": 1850.4748919082288, + "learning_rate": 9.967958556173612e-06, + "loss": 275.872, + "step": 26310 + }, + { + "epoch": 0.5062000855847409, + "grad_norm": 1981.073526222446, + "learning_rate": 9.961855451248645e-06, + "loss": 267.4932, + "step": 26320 + }, + { + "epoch": 0.5063924108452215, + "grad_norm": 1713.9122190963226, + "learning_rate": 9.955752360531896e-06, + "loss": 279.3994, + "step": 26330 + }, + { + "epoch": 0.506584736105702, + "grad_norm": 1988.6807078523354, + "learning_rate": 9.949649286296663e-06, + "loss": 272.955, + "step": 26340 + }, + { + "epoch": 0.5067770613661825, + "grad_norm": 1795.7312095104523, + "learning_rate": 9.943546230816236e-06, + "loss": 272.6052, + "step": 26350 + }, + { + "epoch": 0.506969386626663, + "grad_norm": 1891.0151326283483, + "learning_rate": 9.937443196363908e-06, + "loss": 279.6853, + "step": 26360 + }, + { + "epoch": 0.5071617118871435, + "grad_norm": 1702.973385216345, + "learning_rate": 9.931340185212955e-06, + "loss": 265.7823, + "step": 26370 + }, + { + "epoch": 0.5073540371476241, + "grad_norm": 1795.5546094161111, + "learning_rate": 9.925237199636652e-06, + "loss": 264.735, + "step": 26380 + }, + { + "epoch": 0.5075463624081046, + "grad_norm": 1740.8570507590339, + "learning_rate": 9.919134241908252e-06, + "loss": 269.8005, + "step": 26390 + }, + { + "epoch": 0.5077386876685851, + "grad_norm": 1863.9503903518575, + "learning_rate": 9.913031314301016e-06, + "loss": 278.6392, + "step": 26400 + }, + { + "epoch": 0.5079310129290656, + "grad_norm": 1638.866319568942, + "learning_rate": 9.906928419088178e-06, + "loss": 266.4949, + "step": 26410 + }, + { + "epoch": 0.5081233381895461, + "grad_norm": 1886.6857498121033, + "learning_rate": 9.900825558542965e-06, + "loss": 276.7004, + "step": 26420 + }, + { + "epoch": 0.5083156634500267, + "grad_norm": 1782.98923090286, + "learning_rate": 9.894722734938595e-06, + "loss": 282.0657, + "step": 26430 + }, + { + "epoch": 0.5085079887105072, + "grad_norm": 2400.2128392775367, + "learning_rate": 9.888619950548267e-06, + "loss": 269.9959, + "step": 26440 + }, + { + "epoch": 0.5087003139709877, + "grad_norm": 1693.9313596502593, + "learning_rate": 9.88251720764517e-06, + "loss": 265.1197, + "step": 26450 + }, + { + "epoch": 0.5088926392314682, + "grad_norm": 1729.9994544333172, + "learning_rate": 9.876414508502468e-06, + "loss": 270.7896, + "step": 26460 + }, + { + "epoch": 0.5090849644919487, + "grad_norm": 1742.3993622554844, + "learning_rate": 9.870311855393324e-06, + "loss": 275.8884, + "step": 26470 + }, + { + "epoch": 0.5092772897524294, + "grad_norm": 1715.8933194161416, + "learning_rate": 9.864209250590875e-06, + "loss": 263.0368, + "step": 26480 + }, + { + "epoch": 0.5094696150129099, + "grad_norm": 1640.5133977051426, + "learning_rate": 9.858106696368235e-06, + "loss": 274.8562, + "step": 26490 + }, + { + "epoch": 0.5096619402733904, + "grad_norm": 1588.4028118143278, + "learning_rate": 9.852004194998503e-06, + "loss": 268.0271, + "step": 26500 + }, + { + "epoch": 0.5098542655338709, + "grad_norm": 1600.8963906933461, + "learning_rate": 9.845901748754767e-06, + "loss": 268.6611, + "step": 26510 + }, + { + "epoch": 0.5100465907943514, + "grad_norm": 1681.4632120338292, + "learning_rate": 9.839799359910087e-06, + "loss": 273.9835, + "step": 26520 + }, + { + "epoch": 0.510238916054832, + "grad_norm": 1637.923273314008, + "learning_rate": 9.833697030737495e-06, + "loss": 283.8083, + "step": 26530 + }, + { + "epoch": 0.5104312413153125, + "grad_norm": 1714.3455707896221, + "learning_rate": 9.827594763510016e-06, + "loss": 275.4779, + "step": 26540 + }, + { + "epoch": 0.510623566575793, + "grad_norm": 1747.434790453476, + "learning_rate": 9.821492560500641e-06, + "loss": 268.2246, + "step": 26550 + }, + { + "epoch": 0.5108158918362735, + "grad_norm": 1592.2270746319073, + "learning_rate": 9.815390423982339e-06, + "loss": 270.3903, + "step": 26560 + }, + { + "epoch": 0.511008217096754, + "grad_norm": 1742.4783152382627, + "learning_rate": 9.809288356228051e-06, + "loss": 268.1029, + "step": 26570 + }, + { + "epoch": 0.5112005423572346, + "grad_norm": 1671.141526900796, + "learning_rate": 9.803186359510701e-06, + "loss": 268.3448, + "step": 26580 + }, + { + "epoch": 0.5113928676177151, + "grad_norm": 1570.6436232196118, + "learning_rate": 9.797084436103186e-06, + "loss": 271.0514, + "step": 26590 + }, + { + "epoch": 0.5115851928781956, + "grad_norm": 1693.8866383781922, + "learning_rate": 9.790982588278362e-06, + "loss": 273.5804, + "step": 26600 + }, + { + "epoch": 0.5117775181386761, + "grad_norm": 2046.825733085198, + "learning_rate": 9.784880818309076e-06, + "loss": 268.9866, + "step": 26610 + }, + { + "epoch": 0.5119698433991566, + "grad_norm": 1754.154892170759, + "learning_rate": 9.778779128468133e-06, + "loss": 288.0217, + "step": 26620 + }, + { + "epoch": 0.5121621686596372, + "grad_norm": 1762.6089269051984, + "learning_rate": 9.77267752102831e-06, + "loss": 271.5234, + "step": 26630 + }, + { + "epoch": 0.5123544939201177, + "grad_norm": 1568.8738217503458, + "learning_rate": 9.766575998262353e-06, + "loss": 266.7213, + "step": 26640 + }, + { + "epoch": 0.5125468191805982, + "grad_norm": 1547.7107254803711, + "learning_rate": 9.760474562442984e-06, + "loss": 285.0681, + "step": 26650 + }, + { + "epoch": 0.5127391444410787, + "grad_norm": 1516.5694113966097, + "learning_rate": 9.754373215842884e-06, + "loss": 272.9505, + "step": 26660 + }, + { + "epoch": 0.5129314697015592, + "grad_norm": 1885.127714472673, + "learning_rate": 9.748271960734708e-06, + "loss": 279.3075, + "step": 26670 + }, + { + "epoch": 0.5131237949620397, + "grad_norm": 1704.6934684909331, + "learning_rate": 9.742170799391063e-06, + "loss": 262.489, + "step": 26680 + }, + { + "epoch": 0.5133161202225204, + "grad_norm": 1565.6576422339674, + "learning_rate": 9.736069734084541e-06, + "loss": 261.1578, + "step": 26690 + }, + { + "epoch": 0.5135084454830009, + "grad_norm": 1626.064689973387, + "learning_rate": 9.729968767087685e-06, + "loss": 267.2632, + "step": 26700 + }, + { + "epoch": 0.5137007707434814, + "grad_norm": 1708.6445641992425, + "learning_rate": 9.723867900673e-06, + "loss": 263.0513, + "step": 26710 + }, + { + "epoch": 0.5138930960039619, + "grad_norm": 1662.7940086333215, + "learning_rate": 9.717767137112964e-06, + "loss": 270.2977, + "step": 26720 + }, + { + "epoch": 0.5140854212644425, + "grad_norm": 1692.3392392486355, + "learning_rate": 9.711666478680007e-06, + "loss": 275.7062, + "step": 26730 + }, + { + "epoch": 0.514277746524923, + "grad_norm": 1664.5381189162295, + "learning_rate": 9.705565927646526e-06, + "loss": 268.1979, + "step": 26740 + }, + { + "epoch": 0.5144700717854035, + "grad_norm": 1777.8573271768778, + "learning_rate": 9.699465486284871e-06, + "loss": 268.7613, + "step": 26750 + }, + { + "epoch": 0.514662397045884, + "grad_norm": 1745.6112233650072, + "learning_rate": 9.693365156867363e-06, + "loss": 263.1743, + "step": 26760 + }, + { + "epoch": 0.5148547223063645, + "grad_norm": 1612.7770271871843, + "learning_rate": 9.68726494166627e-06, + "loss": 265.2632, + "step": 26770 + }, + { + "epoch": 0.515047047566845, + "grad_norm": 1763.8031608827034, + "learning_rate": 9.681164842953816e-06, + "loss": 268.9233, + "step": 26780 + }, + { + "epoch": 0.5152393728273256, + "grad_norm": 1690.798057848904, + "learning_rate": 9.675064863002196e-06, + "loss": 270.2367, + "step": 26790 + }, + { + "epoch": 0.5154316980878061, + "grad_norm": 1737.4487933297435, + "learning_rate": 9.668965004083549e-06, + "loss": 275.0438, + "step": 26800 + }, + { + "epoch": 0.5156240233482866, + "grad_norm": 1887.430022879798, + "learning_rate": 9.66286526846997e-06, + "loss": 272.8342, + "step": 26810 + }, + { + "epoch": 0.5158163486087671, + "grad_norm": 1691.1122652435636, + "learning_rate": 9.656765658433507e-06, + "loss": 265.7619, + "step": 26820 + }, + { + "epoch": 0.5160086738692476, + "grad_norm": 1991.6050596638497, + "learning_rate": 9.650666176246171e-06, + "loss": 268.2168, + "step": 26830 + }, + { + "epoch": 0.5162009991297282, + "grad_norm": 1803.3879335031136, + "learning_rate": 9.644566824179916e-06, + "loss": 262.2074, + "step": 26840 + }, + { + "epoch": 0.5163933243902087, + "grad_norm": 1669.6728527011023, + "learning_rate": 9.638467604506648e-06, + "loss": 271.3996, + "step": 26850 + }, + { + "epoch": 0.5165856496506892, + "grad_norm": 1532.422202290953, + "learning_rate": 9.632368519498224e-06, + "loss": 266.5838, + "step": 26860 + }, + { + "epoch": 0.5167779749111697, + "grad_norm": 1746.4595464416727, + "learning_rate": 9.626269571426456e-06, + "loss": 276.9618, + "step": 26870 + }, + { + "epoch": 0.5169703001716502, + "grad_norm": 1552.2117536956355, + "learning_rate": 9.620170762563103e-06, + "loss": 262.0165, + "step": 26880 + }, + { + "epoch": 0.5171626254321309, + "grad_norm": 1611.71030835006, + "learning_rate": 9.614072095179862e-06, + "loss": 267.8775, + "step": 26890 + }, + { + "epoch": 0.5173549506926114, + "grad_norm": 1793.0356958685372, + "learning_rate": 9.607973571548396e-06, + "loss": 277.4816, + "step": 26900 + }, + { + "epoch": 0.5175472759530919, + "grad_norm": 1694.433011116257, + "learning_rate": 9.601875193940301e-06, + "loss": 261.8774, + "step": 26910 + }, + { + "epoch": 0.5177396012135724, + "grad_norm": 1742.9504962111776, + "learning_rate": 9.595776964627119e-06, + "loss": 264.9292, + "step": 26920 + }, + { + "epoch": 0.517931926474053, + "grad_norm": 1567.7765524754527, + "learning_rate": 9.589678885880342e-06, + "loss": 271.9219, + "step": 26930 + }, + { + "epoch": 0.5181242517345335, + "grad_norm": 1795.1615088469457, + "learning_rate": 9.583580959971402e-06, + "loss": 268.8843, + "step": 26940 + }, + { + "epoch": 0.518316576995014, + "grad_norm": 1707.4568846423876, + "learning_rate": 9.577483189171681e-06, + "loss": 275.2421, + "step": 26950 + }, + { + "epoch": 0.5185089022554945, + "grad_norm": 1689.4044028785813, + "learning_rate": 9.571385575752487e-06, + "loss": 263.9782, + "step": 26960 + }, + { + "epoch": 0.518701227515975, + "grad_norm": 1892.2200129508271, + "learning_rate": 9.565288121985093e-06, + "loss": 276.4928, + "step": 26970 + }, + { + "epoch": 0.5188935527764555, + "grad_norm": 1697.2964052702898, + "learning_rate": 9.559190830140695e-06, + "loss": 272.9482, + "step": 26980 + }, + { + "epoch": 0.5190858780369361, + "grad_norm": 1695.1652072817276, + "learning_rate": 9.553093702490433e-06, + "loss": 264.6189, + "step": 26990 + }, + { + "epoch": 0.5192782032974166, + "grad_norm": 1700.957498664333, + "learning_rate": 9.54699674130538e-06, + "loss": 266.9093, + "step": 27000 + }, + { + "epoch": 0.5194705285578971, + "grad_norm": 1644.3914203519191, + "learning_rate": 9.540899948856561e-06, + "loss": 259.7535, + "step": 27010 + }, + { + "epoch": 0.5196628538183776, + "grad_norm": 1505.4662412764394, + "learning_rate": 9.534803327414931e-06, + "loss": 272.6865, + "step": 27020 + }, + { + "epoch": 0.5198551790788581, + "grad_norm": 1612.389307080163, + "learning_rate": 9.52870687925138e-06, + "loss": 269.3465, + "step": 27030 + }, + { + "epoch": 0.5200475043393387, + "grad_norm": 1757.804421178999, + "learning_rate": 9.522610606636728e-06, + "loss": 270.4947, + "step": 27040 + }, + { + "epoch": 0.5202398295998192, + "grad_norm": 1679.9345432220853, + "learning_rate": 9.516514511841745e-06, + "loss": 270.8916, + "step": 27050 + }, + { + "epoch": 0.5204321548602997, + "grad_norm": 1642.4428909824912, + "learning_rate": 9.51041859713712e-06, + "loss": 258.8593, + "step": 27060 + }, + { + "epoch": 0.5206244801207802, + "grad_norm": 1735.534557700401, + "learning_rate": 9.50432286479348e-06, + "loss": 266.2346, + "step": 27070 + }, + { + "epoch": 0.5208168053812607, + "grad_norm": 1812.4163287774832, + "learning_rate": 9.498227317081387e-06, + "loss": 268.837, + "step": 27080 + }, + { + "epoch": 0.5210091306417413, + "grad_norm": 1964.8421362879276, + "learning_rate": 9.492131956271334e-06, + "loss": 267.5575, + "step": 27090 + }, + { + "epoch": 0.5212014559022219, + "grad_norm": 1825.0592922536978, + "learning_rate": 9.486036784633738e-06, + "loss": 269.7385, + "step": 27100 + }, + { + "epoch": 0.5213937811627024, + "grad_norm": 1655.977637532272, + "learning_rate": 9.47994180443895e-06, + "loss": 262.6288, + "step": 27110 + }, + { + "epoch": 0.5215861064231829, + "grad_norm": 1753.050934234177, + "learning_rate": 9.473847017957254e-06, + "loss": 267.9633, + "step": 27120 + }, + { + "epoch": 0.5217784316836634, + "grad_norm": 1665.7673290698224, + "learning_rate": 9.467752427458851e-06, + "loss": 269.7881, + "step": 27130 + }, + { + "epoch": 0.521970756944144, + "grad_norm": 1700.701063654924, + "learning_rate": 9.461658035213878e-06, + "loss": 266.743, + "step": 27140 + }, + { + "epoch": 0.5221630822046245, + "grad_norm": 1630.676990055864, + "learning_rate": 9.455563843492397e-06, + "loss": 269.311, + "step": 27150 + }, + { + "epoch": 0.522355407465105, + "grad_norm": 1798.3596045117974, + "learning_rate": 9.449469854564393e-06, + "loss": 262.1842, + "step": 27160 + }, + { + "epoch": 0.5225477327255855, + "grad_norm": 1506.0457521575593, + "learning_rate": 9.443376070699778e-06, + "loss": 270.4075, + "step": 27170 + }, + { + "epoch": 0.522740057986066, + "grad_norm": 1784.7343947685292, + "learning_rate": 9.437282494168379e-06, + "loss": 264.4419, + "step": 27180 + }, + { + "epoch": 0.5229323832465466, + "grad_norm": 1702.8371620709388, + "learning_rate": 9.431189127239962e-06, + "loss": 276.2573, + "step": 27190 + }, + { + "epoch": 0.5231247085070271, + "grad_norm": 1666.6848796432125, + "learning_rate": 9.4250959721842e-06, + "loss": 264.0803, + "step": 27200 + }, + { + "epoch": 0.5233170337675076, + "grad_norm": 1813.2448375008926, + "learning_rate": 9.419003031270692e-06, + "loss": 267.787, + "step": 27210 + }, + { + "epoch": 0.5235093590279881, + "grad_norm": 1609.1142744358856, + "learning_rate": 9.412910306768959e-06, + "loss": 261.8443, + "step": 27220 + }, + { + "epoch": 0.5237016842884686, + "grad_norm": 1924.4247481088046, + "learning_rate": 9.40681780094844e-06, + "loss": 260.3859, + "step": 27230 + }, + { + "epoch": 0.5238940095489492, + "grad_norm": 1674.2312576088304, + "learning_rate": 9.400725516078496e-06, + "loss": 266.449, + "step": 27240 + }, + { + "epoch": 0.5240863348094297, + "grad_norm": 1722.3043112679425, + "learning_rate": 9.394633454428396e-06, + "loss": 273.4871, + "step": 27250 + }, + { + "epoch": 0.5242786600699102, + "grad_norm": 1522.6320537332201, + "learning_rate": 9.388541618267341e-06, + "loss": 270.7666, + "step": 27260 + }, + { + "epoch": 0.5244709853303907, + "grad_norm": 1659.1718924580953, + "learning_rate": 9.382450009864434e-06, + "loss": 271.6879, + "step": 27270 + }, + { + "epoch": 0.5246633105908712, + "grad_norm": 1570.790918449188, + "learning_rate": 9.376358631488697e-06, + "loss": 257.0079, + "step": 27280 + }, + { + "epoch": 0.5248556358513518, + "grad_norm": 1573.7836391516046, + "learning_rate": 9.37026748540907e-06, + "loss": 270.6402, + "step": 27290 + }, + { + "epoch": 0.5250479611118324, + "grad_norm": 1642.2766486115481, + "learning_rate": 9.364176573894404e-06, + "loss": 260.5158, + "step": 27300 + }, + { + "epoch": 0.5252402863723129, + "grad_norm": 1835.4146849417075, + "learning_rate": 9.358085899213467e-06, + "loss": 267.7711, + "step": 27310 + }, + { + "epoch": 0.5254326116327934, + "grad_norm": 1719.6920763262058, + "learning_rate": 9.351995463634925e-06, + "loss": 266.0158, + "step": 27320 + }, + { + "epoch": 0.5256249368932739, + "grad_norm": 1679.1312513435219, + "learning_rate": 9.345905269427374e-06, + "loss": 264.1468, + "step": 27330 + }, + { + "epoch": 0.5258172621537545, + "grad_norm": 1626.3173692713149, + "learning_rate": 9.33981531885931e-06, + "loss": 270.7995, + "step": 27340 + }, + { + "epoch": 0.526009587414235, + "grad_norm": 1811.6392817061899, + "learning_rate": 9.333725614199132e-06, + "loss": 269.1323, + "step": 27350 + }, + { + "epoch": 0.5262019126747155, + "grad_norm": 2241.589188191697, + "learning_rate": 9.32763615771516e-06, + "loss": 271.5982, + "step": 27360 + }, + { + "epoch": 0.526394237935196, + "grad_norm": 2055.8316599772043, + "learning_rate": 9.321546951675616e-06, + "loss": 269.8291, + "step": 27370 + }, + { + "epoch": 0.5265865631956765, + "grad_norm": 1653.4648633453337, + "learning_rate": 9.315457998348627e-06, + "loss": 263.0201, + "step": 27380 + }, + { + "epoch": 0.5267788884561571, + "grad_norm": 1539.8485747905363, + "learning_rate": 9.309369300002224e-06, + "loss": 276.6529, + "step": 27390 + }, + { + "epoch": 0.5269712137166376, + "grad_norm": 1764.5718062380618, + "learning_rate": 9.303280858904356e-06, + "loss": 261.8161, + "step": 27400 + }, + { + "epoch": 0.5271635389771181, + "grad_norm": 2128.8481479222705, + "learning_rate": 9.297192677322862e-06, + "loss": 273.249, + "step": 27410 + }, + { + "epoch": 0.5273558642375986, + "grad_norm": 1617.9349758229725, + "learning_rate": 9.291104757525486e-06, + "loss": 272.4803, + "step": 27420 + }, + { + "epoch": 0.5275481894980791, + "grad_norm": 1757.5701618188102, + "learning_rate": 9.28501710177988e-06, + "loss": 258.0056, + "step": 27430 + }, + { + "epoch": 0.5277405147585597, + "grad_norm": 1739.9126733629178, + "learning_rate": 9.278929712353595e-06, + "loss": 272.746, + "step": 27440 + }, + { + "epoch": 0.5279328400190402, + "grad_norm": 1649.6529111185948, + "learning_rate": 9.272842591514083e-06, + "loss": 265.0695, + "step": 27450 + }, + { + "epoch": 0.5281251652795207, + "grad_norm": 1636.081173647623, + "learning_rate": 9.2667557415287e-06, + "loss": 269.9739, + "step": 27460 + }, + { + "epoch": 0.5283174905400012, + "grad_norm": 1833.3392132795411, + "learning_rate": 9.260669164664687e-06, + "loss": 272.6387, + "step": 27470 + }, + { + "epoch": 0.5285098158004817, + "grad_norm": 1689.9206501238227, + "learning_rate": 9.254582863189205e-06, + "loss": 274.3722, + "step": 27480 + }, + { + "epoch": 0.5287021410609622, + "grad_norm": 2022.3859797909827, + "learning_rate": 9.248496839369293e-06, + "loss": 260.403, + "step": 27490 + }, + { + "epoch": 0.5288944663214428, + "grad_norm": 1779.9215541039039, + "learning_rate": 9.242411095471897e-06, + "loss": 285.2193, + "step": 27500 + }, + { + "epoch": 0.5290867915819234, + "grad_norm": 1766.390633558896, + "learning_rate": 9.236325633763856e-06, + "loss": 265.0325, + "step": 27510 + }, + { + "epoch": 0.5292791168424039, + "grad_norm": 1569.3923986919492, + "learning_rate": 9.230240456511905e-06, + "loss": 278.332, + "step": 27520 + }, + { + "epoch": 0.5294714421028844, + "grad_norm": 1873.8427468584869, + "learning_rate": 9.224155565982673e-06, + "loss": 262.1785, + "step": 27530 + }, + { + "epoch": 0.529663767363365, + "grad_norm": 1705.317041833594, + "learning_rate": 9.218070964442673e-06, + "loss": 267.993, + "step": 27540 + }, + { + "epoch": 0.5298560926238455, + "grad_norm": 1818.2525733384166, + "learning_rate": 9.21198665415833e-06, + "loss": 266.7808, + "step": 27550 + }, + { + "epoch": 0.530048417884326, + "grad_norm": 1640.422556601716, + "learning_rate": 9.205902637395943e-06, + "loss": 268.128, + "step": 27560 + }, + { + "epoch": 0.5302407431448065, + "grad_norm": 1760.3109550929307, + "learning_rate": 9.199818916421706e-06, + "loss": 261.4038, + "step": 27570 + }, + { + "epoch": 0.530433068405287, + "grad_norm": 1620.3411373485635, + "learning_rate": 9.193735493501707e-06, + "loss": 266.1088, + "step": 27580 + }, + { + "epoch": 0.5306253936657676, + "grad_norm": 1605.0512944882896, + "learning_rate": 9.187652370901925e-06, + "loss": 272.0439, + "step": 27590 + }, + { + "epoch": 0.5308177189262481, + "grad_norm": 1665.9501984803335, + "learning_rate": 9.181569550888217e-06, + "loss": 271.1273, + "step": 27600 + }, + { + "epoch": 0.5310100441867286, + "grad_norm": 1696.5075718828227, + "learning_rate": 9.175487035726332e-06, + "loss": 263.8659, + "step": 27610 + }, + { + "epoch": 0.5312023694472091, + "grad_norm": 1672.7171767690058, + "learning_rate": 9.169404827681912e-06, + "loss": 261.8372, + "step": 27620 + }, + { + "epoch": 0.5313946947076896, + "grad_norm": 1611.2612620256748, + "learning_rate": 9.163322929020476e-06, + "loss": 261.2308, + "step": 27630 + }, + { + "epoch": 0.5315870199681701, + "grad_norm": 1581.610396039253, + "learning_rate": 9.157241342007428e-06, + "loss": 265.8153, + "step": 27640 + }, + { + "epoch": 0.5317793452286507, + "grad_norm": 1721.8160664638826, + "learning_rate": 9.15116006890806e-06, + "loss": 268.9988, + "step": 27650 + }, + { + "epoch": 0.5319716704891312, + "grad_norm": 1605.6097117632758, + "learning_rate": 9.145079111987552e-06, + "loss": 270.3846, + "step": 27660 + }, + { + "epoch": 0.5321639957496117, + "grad_norm": 1590.0273875726666, + "learning_rate": 9.138998473510953e-06, + "loss": 264.0372, + "step": 27670 + }, + { + "epoch": 0.5323563210100922, + "grad_norm": 1595.7658820030006, + "learning_rate": 9.1329181557432e-06, + "loss": 267.6364, + "step": 27680 + }, + { + "epoch": 0.5325486462705727, + "grad_norm": 1679.5579486409333, + "learning_rate": 9.126838160949119e-06, + "loss": 266.3184, + "step": 27690 + }, + { + "epoch": 0.5327409715310533, + "grad_norm": 1539.515659871773, + "learning_rate": 9.120758491393402e-06, + "loss": 272.8407, + "step": 27700 + }, + { + "epoch": 0.5329332967915339, + "grad_norm": 1479.431659387387, + "learning_rate": 9.114679149340623e-06, + "loss": 273.1751, + "step": 27710 + }, + { + "epoch": 0.5331256220520144, + "grad_norm": 1575.3753378800268, + "learning_rate": 9.10860013705524e-06, + "loss": 264.8448, + "step": 27720 + }, + { + "epoch": 0.5333179473124949, + "grad_norm": 1676.6388846882585, + "learning_rate": 9.102521456801582e-06, + "loss": 259.9461, + "step": 27730 + }, + { + "epoch": 0.5335102725729755, + "grad_norm": 1705.92009299794, + "learning_rate": 9.096443110843864e-06, + "loss": 264.2077, + "step": 27740 + }, + { + "epoch": 0.533702597833456, + "grad_norm": 1612.866193190612, + "learning_rate": 9.090365101446157e-06, + "loss": 265.0348, + "step": 27750 + }, + { + "epoch": 0.5338949230939365, + "grad_norm": 1681.3447580738216, + "learning_rate": 9.084287430872435e-06, + "loss": 265.8832, + "step": 27760 + }, + { + "epoch": 0.534087248354417, + "grad_norm": 1651.3258990916645, + "learning_rate": 9.078210101386518e-06, + "loss": 261.6132, + "step": 27770 + }, + { + "epoch": 0.5342795736148975, + "grad_norm": 1773.57898334408, + "learning_rate": 9.072133115252113e-06, + "loss": 264.9888, + "step": 27780 + }, + { + "epoch": 0.534471898875378, + "grad_norm": 1744.3508470384886, + "learning_rate": 9.066056474732798e-06, + "loss": 264.6408, + "step": 27790 + }, + { + "epoch": 0.5346642241358586, + "grad_norm": 1595.6669729080709, + "learning_rate": 9.059980182092022e-06, + "loss": 259.3286, + "step": 27800 + }, + { + "epoch": 0.5348565493963391, + "grad_norm": 1709.518386029998, + "learning_rate": 9.053904239593106e-06, + "loss": 266.3236, + "step": 27810 + }, + { + "epoch": 0.5350488746568196, + "grad_norm": 2429.771177624152, + "learning_rate": 9.047828649499236e-06, + "loss": 273.9908, + "step": 27820 + }, + { + "epoch": 0.5352411999173001, + "grad_norm": 1686.4281521790188, + "learning_rate": 9.041753414073463e-06, + "loss": 262.8393, + "step": 27830 + }, + { + "epoch": 0.5354335251777806, + "grad_norm": 1658.942456732611, + "learning_rate": 9.035678535578723e-06, + "loss": 264.2115, + "step": 27840 + }, + { + "epoch": 0.5356258504382612, + "grad_norm": 1581.2670375800524, + "learning_rate": 9.029604016277798e-06, + "loss": 264.842, + "step": 27850 + }, + { + "epoch": 0.5358181756987417, + "grad_norm": 1767.3090528996986, + "learning_rate": 9.02352985843335e-06, + "loss": 271.3129, + "step": 27860 + }, + { + "epoch": 0.5360105009592222, + "grad_norm": 1493.242922428013, + "learning_rate": 9.017456064307904e-06, + "loss": 266.3733, + "step": 27870 + }, + { + "epoch": 0.5362028262197027, + "grad_norm": 1804.544461083966, + "learning_rate": 9.01138263616385e-06, + "loss": 266.0222, + "step": 27880 + }, + { + "epoch": 0.5363951514801832, + "grad_norm": 1657.9095763685818, + "learning_rate": 9.005309576263436e-06, + "loss": 263.0513, + "step": 27890 + }, + { + "epoch": 0.5365874767406638, + "grad_norm": 1744.940196801218, + "learning_rate": 8.999236886868772e-06, + "loss": 259.8919, + "step": 27900 + }, + { + "epoch": 0.5367798020011444, + "grad_norm": 1733.4525089973727, + "learning_rate": 8.993164570241844e-06, + "loss": 261.6144, + "step": 27910 + }, + { + "epoch": 0.5369721272616249, + "grad_norm": 1752.5580786166774, + "learning_rate": 8.987092628644483e-06, + "loss": 276.0592, + "step": 27920 + }, + { + "epoch": 0.5371644525221054, + "grad_norm": 1704.7271751714843, + "learning_rate": 8.981021064338388e-06, + "loss": 263.5728, + "step": 27930 + }, + { + "epoch": 0.537356777782586, + "grad_norm": 1745.2344509657614, + "learning_rate": 8.974949879585118e-06, + "loss": 266.2813, + "step": 27940 + }, + { + "epoch": 0.5375491030430665, + "grad_norm": 1947.4409417900872, + "learning_rate": 8.968879076646093e-06, + "loss": 263.7487, + "step": 27950 + }, + { + "epoch": 0.537741428303547, + "grad_norm": 1546.9149549898798, + "learning_rate": 8.96280865778258e-06, + "loss": 262.628, + "step": 27960 + }, + { + "epoch": 0.5379337535640275, + "grad_norm": 1658.7955073371197, + "learning_rate": 8.956738625255709e-06, + "loss": 268.2876, + "step": 27970 + }, + { + "epoch": 0.538126078824508, + "grad_norm": 1608.7908764996746, + "learning_rate": 8.950668981326473e-06, + "loss": 260.7712, + "step": 27980 + }, + { + "epoch": 0.5383184040849885, + "grad_norm": 1814.1484165008458, + "learning_rate": 8.94459972825571e-06, + "loss": 263.5938, + "step": 27990 + }, + { + "epoch": 0.5385107293454691, + "grad_norm": 1639.1999684789391, + "learning_rate": 8.938530868304121e-06, + "loss": 261.185, + "step": 28000 + }, + { + "epoch": 0.5387030546059496, + "grad_norm": 1715.0675156113984, + "learning_rate": 8.932462403732248e-06, + "loss": 263.6565, + "step": 28010 + }, + { + "epoch": 0.5388953798664301, + "grad_norm": 1641.1183656892497, + "learning_rate": 8.926394336800502e-06, + "loss": 271.369, + "step": 28020 + }, + { + "epoch": 0.5390877051269106, + "grad_norm": 1667.5529741573303, + "learning_rate": 8.920326669769134e-06, + "loss": 269.715, + "step": 28030 + }, + { + "epoch": 0.5392800303873911, + "grad_norm": 1905.3607460298292, + "learning_rate": 8.914259404898247e-06, + "loss": 264.3525, + "step": 28040 + }, + { + "epoch": 0.5394723556478717, + "grad_norm": 1607.8311224232446, + "learning_rate": 8.908192544447803e-06, + "loss": 264.5651, + "step": 28050 + }, + { + "epoch": 0.5396646809083522, + "grad_norm": 1607.0847150935763, + "learning_rate": 8.902126090677605e-06, + "loss": 253.4698, + "step": 28060 + }, + { + "epoch": 0.5398570061688327, + "grad_norm": 1551.4778511500265, + "learning_rate": 8.896060045847305e-06, + "loss": 264.4631, + "step": 28070 + }, + { + "epoch": 0.5400493314293132, + "grad_norm": 1601.4403829853502, + "learning_rate": 8.889994412216403e-06, + "loss": 261.2042, + "step": 28080 + }, + { + "epoch": 0.5402416566897937, + "grad_norm": 1571.7127505384396, + "learning_rate": 8.883929192044254e-06, + "loss": 258.9222, + "step": 28090 + }, + { + "epoch": 0.5404339819502743, + "grad_norm": 1580.4622930781718, + "learning_rate": 8.877864387590049e-06, + "loss": 267.5097, + "step": 28100 + }, + { + "epoch": 0.5406263072107548, + "grad_norm": 1770.563420245373, + "learning_rate": 8.871800001112822e-06, + "loss": 266.7638, + "step": 28110 + }, + { + "epoch": 0.5408186324712354, + "grad_norm": 1744.5356029850248, + "learning_rate": 8.865736034871468e-06, + "loss": 271.1773, + "step": 28120 + }, + { + "epoch": 0.5410109577317159, + "grad_norm": 1803.642666608406, + "learning_rate": 8.859672491124706e-06, + "loss": 265.9221, + "step": 28130 + }, + { + "epoch": 0.5412032829921964, + "grad_norm": 1669.1577635476885, + "learning_rate": 8.853609372131105e-06, + "loss": 267.0815, + "step": 28140 + }, + { + "epoch": 0.541395608252677, + "grad_norm": 1723.4174579656947, + "learning_rate": 8.84754668014908e-06, + "loss": 264.052, + "step": 28150 + }, + { + "epoch": 0.5415879335131575, + "grad_norm": 1730.0654044504843, + "learning_rate": 8.841484417436886e-06, + "loss": 260.9908, + "step": 28160 + }, + { + "epoch": 0.541780258773638, + "grad_norm": 1673.2819251610888, + "learning_rate": 8.835422586252613e-06, + "loss": 271.824, + "step": 28170 + }, + { + "epoch": 0.5419725840341185, + "grad_norm": 1659.9596457926832, + "learning_rate": 8.829361188854194e-06, + "loss": 253.8619, + "step": 28180 + }, + { + "epoch": 0.542164909294599, + "grad_norm": 1716.7488308107304, + "learning_rate": 8.823300227499393e-06, + "loss": 260.1895, + "step": 28190 + }, + { + "epoch": 0.5423572345550796, + "grad_norm": 1846.4098048384983, + "learning_rate": 8.817239704445827e-06, + "loss": 266.1777, + "step": 28200 + }, + { + "epoch": 0.5425495598155601, + "grad_norm": 1755.0675953207456, + "learning_rate": 8.811179621950937e-06, + "loss": 257.8344, + "step": 28210 + }, + { + "epoch": 0.5427418850760406, + "grad_norm": 1897.4077009929483, + "learning_rate": 8.805119982272001e-06, + "loss": 264.3596, + "step": 28220 + }, + { + "epoch": 0.5429342103365211, + "grad_norm": 1718.2129387106945, + "learning_rate": 8.799060787666142e-06, + "loss": 267.1837, + "step": 28230 + }, + { + "epoch": 0.5431265355970016, + "grad_norm": 1721.6662863786794, + "learning_rate": 8.793002040390304e-06, + "loss": 267.4376, + "step": 28240 + }, + { + "epoch": 0.5433188608574822, + "grad_norm": 1516.9921656670845, + "learning_rate": 8.786943742701273e-06, + "loss": 261.2276, + "step": 28250 + }, + { + "epoch": 0.5435111861179627, + "grad_norm": 1839.8244921803764, + "learning_rate": 8.780885896855659e-06, + "loss": 262.1788, + "step": 28260 + }, + { + "epoch": 0.5437035113784432, + "grad_norm": 2024.4472195977369, + "learning_rate": 8.774828505109918e-06, + "loss": 271.277, + "step": 28270 + }, + { + "epoch": 0.5438958366389237, + "grad_norm": 1809.448040103689, + "learning_rate": 8.768771569720324e-06, + "loss": 264.9433, + "step": 28280 + }, + { + "epoch": 0.5440881618994042, + "grad_norm": 1755.5076131439093, + "learning_rate": 8.762715092942983e-06, + "loss": 273.2599, + "step": 28290 + }, + { + "epoch": 0.5442804871598848, + "grad_norm": 1507.6318245205412, + "learning_rate": 8.756659077033838e-06, + "loss": 264.8037, + "step": 28300 + }, + { + "epoch": 0.5444728124203653, + "grad_norm": 1655.4461040879078, + "learning_rate": 8.750603524248653e-06, + "loss": 271.1872, + "step": 28310 + }, + { + "epoch": 0.5446651376808459, + "grad_norm": 1549.3562031492345, + "learning_rate": 8.744548436843021e-06, + "loss": 266.1502, + "step": 28320 + }, + { + "epoch": 0.5448574629413264, + "grad_norm": 2123.7346842577267, + "learning_rate": 8.738493817072359e-06, + "loss": 264.7796, + "step": 28330 + }, + { + "epoch": 0.5450497882018069, + "grad_norm": 1771.267338219201, + "learning_rate": 8.73243966719192e-06, + "loss": 266.8416, + "step": 28340 + }, + { + "epoch": 0.5452421134622875, + "grad_norm": 1542.6316820384213, + "learning_rate": 8.726385989456764e-06, + "loss": 261.7626, + "step": 28350 + }, + { + "epoch": 0.545434438722768, + "grad_norm": 1732.3366773590503, + "learning_rate": 8.7203327861218e-06, + "loss": 265.4007, + "step": 28360 + }, + { + "epoch": 0.5456267639832485, + "grad_norm": 1666.2656426740543, + "learning_rate": 8.71428005944173e-06, + "loss": 263.2684, + "step": 28370 + }, + { + "epoch": 0.545819089243729, + "grad_norm": 1696.6021335501719, + "learning_rate": 8.708227811671112e-06, + "loss": 260.3924, + "step": 28380 + }, + { + "epoch": 0.5460114145042095, + "grad_norm": 1723.0235671410373, + "learning_rate": 8.702176045064296e-06, + "loss": 263.2037, + "step": 28390 + }, + { + "epoch": 0.54620373976469, + "grad_norm": 1539.2516405996203, + "learning_rate": 8.696124761875467e-06, + "loss": 265.2386, + "step": 28400 + }, + { + "epoch": 0.5463960650251706, + "grad_norm": 1562.5699357508377, + "learning_rate": 8.690073964358635e-06, + "loss": 271.1584, + "step": 28410 + }, + { + "epoch": 0.5465883902856511, + "grad_norm": 1771.977996789139, + "learning_rate": 8.684023654767613e-06, + "loss": 263.7209, + "step": 28420 + }, + { + "epoch": 0.5467807155461316, + "grad_norm": 1630.0735219365288, + "learning_rate": 8.677973835356048e-06, + "loss": 258.8074, + "step": 28430 + }, + { + "epoch": 0.5469730408066121, + "grad_norm": 1512.5582064074229, + "learning_rate": 8.671924508377392e-06, + "loss": 259.4953, + "step": 28440 + }, + { + "epoch": 0.5471653660670927, + "grad_norm": 1719.477551194441, + "learning_rate": 8.665875676084927e-06, + "loss": 263.0208, + "step": 28450 + }, + { + "epoch": 0.5473576913275732, + "grad_norm": 1723.6964463058296, + "learning_rate": 8.659827340731738e-06, + "loss": 267.5635, + "step": 28460 + }, + { + "epoch": 0.5475500165880537, + "grad_norm": 1746.722074664869, + "learning_rate": 8.653779504570728e-06, + "loss": 263.6304, + "step": 28470 + }, + { + "epoch": 0.5477423418485342, + "grad_norm": 1565.9587145262878, + "learning_rate": 8.647732169854622e-06, + "loss": 265.3575, + "step": 28480 + }, + { + "epoch": 0.5479346671090147, + "grad_norm": 1518.788829151476, + "learning_rate": 8.641685338835947e-06, + "loss": 262.618, + "step": 28490 + }, + { + "epoch": 0.5481269923694952, + "grad_norm": 1491.4182713254686, + "learning_rate": 8.635639013767053e-06, + "loss": 263.9698, + "step": 28500 + }, + { + "epoch": 0.5483193176299758, + "grad_norm": 1700.1457657151984, + "learning_rate": 8.629593196900088e-06, + "loss": 261.8302, + "step": 28510 + }, + { + "epoch": 0.5485116428904563, + "grad_norm": 1753.6427483796228, + "learning_rate": 8.62354789048703e-06, + "loss": 262.7962, + "step": 28520 + }, + { + "epoch": 0.5487039681509369, + "grad_norm": 1655.5452149103307, + "learning_rate": 8.617503096779648e-06, + "loss": 270.4195, + "step": 28530 + }, + { + "epoch": 0.5488962934114174, + "grad_norm": 1661.5759968315624, + "learning_rate": 8.61145881802953e-06, + "loss": 265.9621, + "step": 28540 + }, + { + "epoch": 0.549088618671898, + "grad_norm": 1603.5227939985791, + "learning_rate": 8.605415056488067e-06, + "loss": 263.2188, + "step": 28550 + }, + { + "epoch": 0.5492809439323785, + "grad_norm": 1702.672125098128, + "learning_rate": 8.599371814406465e-06, + "loss": 256.8539, + "step": 28560 + }, + { + "epoch": 0.549473269192859, + "grad_norm": 1708.7709976273409, + "learning_rate": 8.59332909403573e-06, + "loss": 287.2967, + "step": 28570 + }, + { + "epoch": 0.5496655944533395, + "grad_norm": 1910.401314671144, + "learning_rate": 8.587286897626672e-06, + "loss": 262.7298, + "step": 28580 + }, + { + "epoch": 0.54985791971382, + "grad_norm": 1755.2194390445322, + "learning_rate": 8.581245227429918e-06, + "loss": 267.085, + "step": 28590 + }, + { + "epoch": 0.5500502449743006, + "grad_norm": 1711.487715647836, + "learning_rate": 8.575204085695887e-06, + "loss": 256.2586, + "step": 28600 + }, + { + "epoch": 0.5502425702347811, + "grad_norm": 1774.5002180338, + "learning_rate": 8.5691634746748e-06, + "loss": 267.2271, + "step": 28610 + }, + { + "epoch": 0.5504348954952616, + "grad_norm": 1628.19399877106, + "learning_rate": 8.563123396616683e-06, + "loss": 255.9816, + "step": 28620 + }, + { + "epoch": 0.5506272207557421, + "grad_norm": 1626.4219694223898, + "learning_rate": 8.557083853771377e-06, + "loss": 260.0803, + "step": 28630 + }, + { + "epoch": 0.5508195460162226, + "grad_norm": 2346.0292739618067, + "learning_rate": 8.551044848388502e-06, + "loss": 265.1259, + "step": 28640 + }, + { + "epoch": 0.5510118712767031, + "grad_norm": 1645.7422381334331, + "learning_rate": 8.545006382717487e-06, + "loss": 263.926, + "step": 28650 + }, + { + "epoch": 0.5512041965371837, + "grad_norm": 1641.5596737815654, + "learning_rate": 8.538968459007569e-06, + "loss": 271.0264, + "step": 28660 + }, + { + "epoch": 0.5513965217976642, + "grad_norm": 1504.2272061625495, + "learning_rate": 8.532931079507772e-06, + "loss": 261.5937, + "step": 28670 + }, + { + "epoch": 0.5515888470581447, + "grad_norm": 1717.4515410807696, + "learning_rate": 8.526894246466916e-06, + "loss": 259.6705, + "step": 28680 + }, + { + "epoch": 0.5517811723186252, + "grad_norm": 1679.497250678857, + "learning_rate": 8.520857962133623e-06, + "loss": 258.9284, + "step": 28690 + }, + { + "epoch": 0.5519734975791057, + "grad_norm": 1859.0425088816849, + "learning_rate": 8.514822228756311e-06, + "loss": 266.5152, + "step": 28700 + }, + { + "epoch": 0.5521658228395863, + "grad_norm": 1601.5290974606924, + "learning_rate": 8.508787048583191e-06, + "loss": 262.185, + "step": 28710 + }, + { + "epoch": 0.5523581481000668, + "grad_norm": 1604.3446769992377, + "learning_rate": 8.502752423862264e-06, + "loss": 265.8423, + "step": 28720 + }, + { + "epoch": 0.5525504733605474, + "grad_norm": 1621.264889435611, + "learning_rate": 8.496718356841335e-06, + "loss": 256.4238, + "step": 28730 + }, + { + "epoch": 0.5527427986210279, + "grad_norm": 1549.595125678023, + "learning_rate": 8.49068484976799e-06, + "loss": 262.9498, + "step": 28740 + }, + { + "epoch": 0.5529351238815085, + "grad_norm": 1656.3031253152483, + "learning_rate": 8.484651904889614e-06, + "loss": 257.4689, + "step": 28750 + }, + { + "epoch": 0.553127449141989, + "grad_norm": 1641.9218524564199, + "learning_rate": 8.478619524453369e-06, + "loss": 258.6582, + "step": 28760 + }, + { + "epoch": 0.5533197744024695, + "grad_norm": 1759.0087233263441, + "learning_rate": 8.472587710706232e-06, + "loss": 255.9802, + "step": 28770 + }, + { + "epoch": 0.55351209966295, + "grad_norm": 1636.1809978566096, + "learning_rate": 8.466556465894942e-06, + "loss": 263.2994, + "step": 28780 + }, + { + "epoch": 0.5537044249234305, + "grad_norm": 1808.53240251678, + "learning_rate": 8.460525792266046e-06, + "loss": 267.5326, + "step": 28790 + }, + { + "epoch": 0.553896750183911, + "grad_norm": 1747.4212950937745, + "learning_rate": 8.454495692065862e-06, + "loss": 270.017, + "step": 28800 + }, + { + "epoch": 0.5540890754443916, + "grad_norm": 1669.61785488322, + "learning_rate": 8.448466167540514e-06, + "loss": 257.3167, + "step": 28810 + }, + { + "epoch": 0.5542814007048721, + "grad_norm": 1695.3462001112866, + "learning_rate": 8.442437220935893e-06, + "loss": 269.7112, + "step": 28820 + }, + { + "epoch": 0.5544737259653526, + "grad_norm": 1603.8919651161252, + "learning_rate": 8.436408854497679e-06, + "loss": 264.117, + "step": 28830 + }, + { + "epoch": 0.5546660512258331, + "grad_norm": 1679.5842600495218, + "learning_rate": 8.430381070471348e-06, + "loss": 264.923, + "step": 28840 + }, + { + "epoch": 0.5548583764863136, + "grad_norm": 1751.3710382623956, + "learning_rate": 8.424353871102144e-06, + "loss": 258.5021, + "step": 28850 + }, + { + "epoch": 0.5550507017467942, + "grad_norm": 1860.8253738714443, + "learning_rate": 8.4183272586351e-06, + "loss": 263.138, + "step": 28860 + }, + { + "epoch": 0.5552430270072747, + "grad_norm": 1719.623460434945, + "learning_rate": 8.412301235315026e-06, + "loss": 272.6122, + "step": 28870 + }, + { + "epoch": 0.5554353522677552, + "grad_norm": 1771.4754301382038, + "learning_rate": 8.406275803386525e-06, + "loss": 263.0697, + "step": 28880 + }, + { + "epoch": 0.5556276775282357, + "grad_norm": 1623.082697350942, + "learning_rate": 8.400250965093968e-06, + "loss": 263.7715, + "step": 28890 + }, + { + "epoch": 0.5558200027887162, + "grad_norm": 1712.7251600085062, + "learning_rate": 8.394226722681498e-06, + "loss": 268.2861, + "step": 28900 + }, + { + "epoch": 0.5560123280491968, + "grad_norm": 1690.7204189287131, + "learning_rate": 8.38820307839306e-06, + "loss": 261.4057, + "step": 28910 + }, + { + "epoch": 0.5562046533096773, + "grad_norm": 1540.101827431106, + "learning_rate": 8.382180034472353e-06, + "loss": 252.8094, + "step": 28920 + }, + { + "epoch": 0.5563969785701578, + "grad_norm": 1534.4748097839247, + "learning_rate": 8.376157593162867e-06, + "loss": 257.1021, + "step": 28930 + }, + { + "epoch": 0.5565893038306384, + "grad_norm": 1866.5748165768205, + "learning_rate": 8.370135756707853e-06, + "loss": 262.1258, + "step": 28940 + }, + { + "epoch": 0.556781629091119, + "grad_norm": 1626.0533696306002, + "learning_rate": 8.364114527350357e-06, + "loss": 260.1841, + "step": 28950 + }, + { + "epoch": 0.5569739543515995, + "grad_norm": 1706.7859715741658, + "learning_rate": 8.358093907333182e-06, + "loss": 256.0894, + "step": 28960 + }, + { + "epoch": 0.55716627961208, + "grad_norm": 1773.8256385724405, + "learning_rate": 8.35207389889891e-06, + "loss": 264.1106, + "step": 28970 + }, + { + "epoch": 0.5573586048725605, + "grad_norm": 1713.528596028282, + "learning_rate": 8.346054504289888e-06, + "loss": 267.1071, + "step": 28980 + }, + { + "epoch": 0.557550930133041, + "grad_norm": 1617.3470108224242, + "learning_rate": 8.34003572574825e-06, + "loss": 257.4415, + "step": 28990 + }, + { + "epoch": 0.5577432553935215, + "grad_norm": 1908.921435238911, + "learning_rate": 8.334017565515892e-06, + "loss": 263.7025, + "step": 29000 + }, + { + "epoch": 0.5579355806540021, + "grad_norm": 1812.21547786451, + "learning_rate": 8.328000025834472e-06, + "loss": 251.759, + "step": 29010 + }, + { + "epoch": 0.5581279059144826, + "grad_norm": 1791.3729993213485, + "learning_rate": 8.321983108945431e-06, + "loss": 261.9572, + "step": 29020 + }, + { + "epoch": 0.5583202311749631, + "grad_norm": 1855.5173768517516, + "learning_rate": 8.315966817089972e-06, + "loss": 267.1535, + "step": 29030 + }, + { + "epoch": 0.5585125564354436, + "grad_norm": 1645.0745532454027, + "learning_rate": 8.309951152509057e-06, + "loss": 262.0804, + "step": 29040 + }, + { + "epoch": 0.5587048816959241, + "grad_norm": 1535.6144313916911, + "learning_rate": 8.303936117443422e-06, + "loss": 268.5148, + "step": 29050 + }, + { + "epoch": 0.5588972069564047, + "grad_norm": 1516.5967306550074, + "learning_rate": 8.297921714133576e-06, + "loss": 263.3952, + "step": 29060 + }, + { + "epoch": 0.5590895322168852, + "grad_norm": 1750.4754603088422, + "learning_rate": 8.291907944819782e-06, + "loss": 256.8598, + "step": 29070 + }, + { + "epoch": 0.5592818574773657, + "grad_norm": 2737.4582033448974, + "learning_rate": 8.285894811742065e-06, + "loss": 250.8736, + "step": 29080 + }, + { + "epoch": 0.5594741827378462, + "grad_norm": 1549.4098703538348, + "learning_rate": 8.279882317140224e-06, + "loss": 254.5251, + "step": 29090 + }, + { + "epoch": 0.5596665079983267, + "grad_norm": 1780.4013207779396, + "learning_rate": 8.273870463253813e-06, + "loss": 260.6593, + "step": 29100 + }, + { + "epoch": 0.5598588332588073, + "grad_norm": 1710.0118800741498, + "learning_rate": 8.267859252322144e-06, + "loss": 264.3909, + "step": 29110 + }, + { + "epoch": 0.5600511585192878, + "grad_norm": 1680.5628552528506, + "learning_rate": 8.261848686584293e-06, + "loss": 255.6524, + "step": 29120 + }, + { + "epoch": 0.5602434837797683, + "grad_norm": 1791.1790269775383, + "learning_rate": 8.255838768279106e-06, + "loss": 259.3277, + "step": 29130 + }, + { + "epoch": 0.5604358090402489, + "grad_norm": 1665.6806488812938, + "learning_rate": 8.249829499645167e-06, + "loss": 260.3279, + "step": 29140 + }, + { + "epoch": 0.5606281343007294, + "grad_norm": 1600.8695300019212, + "learning_rate": 8.243820882920837e-06, + "loss": 254.6754, + "step": 29150 + }, + { + "epoch": 0.56082045956121, + "grad_norm": 1706.1281344378983, + "learning_rate": 8.23781292034422e-06, + "loss": 258.3431, + "step": 29160 + }, + { + "epoch": 0.5610127848216905, + "grad_norm": 1500.2418716396585, + "learning_rate": 8.231805614153192e-06, + "loss": 256.5448, + "step": 29170 + }, + { + "epoch": 0.561205110082171, + "grad_norm": 1740.0418185125993, + "learning_rate": 8.22579896658537e-06, + "loss": 256.3152, + "step": 29180 + }, + { + "epoch": 0.5613974353426515, + "grad_norm": 1669.5863696687672, + "learning_rate": 8.219792979878126e-06, + "loss": 254.8009, + "step": 29190 + }, + { + "epoch": 0.561589760603132, + "grad_norm": 1549.772760257509, + "learning_rate": 8.213787656268599e-06, + "loss": 263.0438, + "step": 29200 + }, + { + "epoch": 0.5617820858636126, + "grad_norm": 1615.7651737854992, + "learning_rate": 8.20778299799367e-06, + "loss": 259.0657, + "step": 29210 + }, + { + "epoch": 0.5619744111240931, + "grad_norm": 1624.1853145065897, + "learning_rate": 8.201779007289975e-06, + "loss": 257.0698, + "step": 29220 + }, + { + "epoch": 0.5621667363845736, + "grad_norm": 1826.796077227274, + "learning_rate": 8.195775686393898e-06, + "loss": 267.6748, + "step": 29230 + }, + { + "epoch": 0.5623590616450541, + "grad_norm": 1727.4431291280343, + "learning_rate": 8.189773037541585e-06, + "loss": 262.4996, + "step": 29240 + }, + { + "epoch": 0.5625513869055346, + "grad_norm": 1587.6399997450299, + "learning_rate": 8.183771062968917e-06, + "loss": 265.5079, + "step": 29250 + }, + { + "epoch": 0.5627437121660152, + "grad_norm": 1883.7327725145358, + "learning_rate": 8.177769764911528e-06, + "loss": 263.0093, + "step": 29260 + }, + { + "epoch": 0.5629360374264957, + "grad_norm": 1694.7681624801678, + "learning_rate": 8.171769145604812e-06, + "loss": 262.3144, + "step": 29270 + }, + { + "epoch": 0.5631283626869762, + "grad_norm": 1619.7653774560392, + "learning_rate": 8.165769207283891e-06, + "loss": 256.7964, + "step": 29280 + }, + { + "epoch": 0.5633206879474567, + "grad_norm": 1749.3642936956164, + "learning_rate": 8.15976995218365e-06, + "loss": 261.9936, + "step": 29290 + }, + { + "epoch": 0.5635130132079372, + "grad_norm": 1572.1873160877105, + "learning_rate": 8.153771382538706e-06, + "loss": 252.2075, + "step": 29300 + }, + { + "epoch": 0.5637053384684177, + "grad_norm": 1574.1147980318397, + "learning_rate": 8.147773500583434e-06, + "loss": 260.9694, + "step": 29310 + }, + { + "epoch": 0.5638976637288983, + "grad_norm": 1639.7355153236501, + "learning_rate": 8.141776308551942e-06, + "loss": 259.9039, + "step": 29320 + }, + { + "epoch": 0.5640899889893788, + "grad_norm": 1733.5032381774695, + "learning_rate": 8.135779808678084e-06, + "loss": 262.8336, + "step": 29330 + }, + { + "epoch": 0.5642823142498593, + "grad_norm": 1698.116775113115, + "learning_rate": 8.129784003195458e-06, + "loss": 252.238, + "step": 29340 + }, + { + "epoch": 0.5644746395103399, + "grad_norm": 1832.8736596042636, + "learning_rate": 8.123788894337405e-06, + "loss": 256.7703, + "step": 29350 + }, + { + "epoch": 0.5646669647708205, + "grad_norm": 1722.3295017627631, + "learning_rate": 8.117794484337003e-06, + "loss": 259.3589, + "step": 29360 + }, + { + "epoch": 0.564859290031301, + "grad_norm": 1744.9682866985647, + "learning_rate": 8.111800775427066e-06, + "loss": 258.7911, + "step": 29370 + }, + { + "epoch": 0.5650516152917815, + "grad_norm": 1763.443636224553, + "learning_rate": 8.10580776984016e-06, + "loss": 258.3529, + "step": 29380 + }, + { + "epoch": 0.565243940552262, + "grad_norm": 1529.8340065435077, + "learning_rate": 8.099815469808573e-06, + "loss": 257.7073, + "step": 29390 + }, + { + "epoch": 0.5654362658127425, + "grad_norm": 1562.8055147644613, + "learning_rate": 8.093823877564343e-06, + "loss": 259.259, + "step": 29400 + }, + { + "epoch": 0.565628591073223, + "grad_norm": 1819.3072836856472, + "learning_rate": 8.087832995339236e-06, + "loss": 257.0694, + "step": 29410 + }, + { + "epoch": 0.5658209163337036, + "grad_norm": 1543.5473407925751, + "learning_rate": 8.081842825364756e-06, + "loss": 251.8829, + "step": 29420 + }, + { + "epoch": 0.5660132415941841, + "grad_norm": 1661.6098921606247, + "learning_rate": 8.075853369872149e-06, + "loss": 258.9299, + "step": 29430 + }, + { + "epoch": 0.5662055668546646, + "grad_norm": 1529.2277228028574, + "learning_rate": 8.069864631092377e-06, + "loss": 259.2561, + "step": 29440 + }, + { + "epoch": 0.5663978921151451, + "grad_norm": 1596.6967262722217, + "learning_rate": 8.063876611256158e-06, + "loss": 260.8965, + "step": 29450 + }, + { + "epoch": 0.5665902173756256, + "grad_norm": 1470.6336917309131, + "learning_rate": 8.057889312593924e-06, + "loss": 264.1809, + "step": 29460 + }, + { + "epoch": 0.5667825426361062, + "grad_norm": 9776.630849906853, + "learning_rate": 8.051902737335847e-06, + "loss": 273.7714, + "step": 29470 + }, + { + "epoch": 0.5669748678965867, + "grad_norm": 1777.4158813809197, + "learning_rate": 8.045916887711822e-06, + "loss": 263.6798, + "step": 29480 + }, + { + "epoch": 0.5671671931570672, + "grad_norm": 1687.0903027100449, + "learning_rate": 8.039931765951485e-06, + "loss": 268.1132, + "step": 29490 + }, + { + "epoch": 0.5673595184175477, + "grad_norm": 1633.984428211593, + "learning_rate": 8.033947374284196e-06, + "loss": 253.8765, + "step": 29500 + }, + { + "epoch": 0.5675518436780282, + "grad_norm": 1662.2432551323188, + "learning_rate": 8.02796371493904e-06, + "loss": 263.5848, + "step": 29510 + }, + { + "epoch": 0.5677441689385088, + "grad_norm": 1758.8880776455665, + "learning_rate": 8.021980790144828e-06, + "loss": 250.4654, + "step": 29520 + }, + { + "epoch": 0.5679364941989893, + "grad_norm": 1542.2274407858008, + "learning_rate": 8.015998602130107e-06, + "loss": 256.701, + "step": 29530 + }, + { + "epoch": 0.5681288194594698, + "grad_norm": 1685.170516342027, + "learning_rate": 8.01001715312314e-06, + "loss": 252.7837, + "step": 29540 + }, + { + "epoch": 0.5683211447199504, + "grad_norm": 1591.2610085534864, + "learning_rate": 8.00403644535191e-06, + "loss": 257.4487, + "step": 29550 + }, + { + "epoch": 0.568513469980431, + "grad_norm": 1758.5769584172062, + "learning_rate": 7.998056481044146e-06, + "loss": 260.9813, + "step": 29560 + }, + { + "epoch": 0.5687057952409115, + "grad_norm": 1804.1089571958166, + "learning_rate": 7.992077262427282e-06, + "loss": 269.32, + "step": 29570 + }, + { + "epoch": 0.568898120501392, + "grad_norm": 1606.5731674684068, + "learning_rate": 7.986098791728475e-06, + "loss": 256.1864, + "step": 29580 + }, + { + "epoch": 0.5690904457618725, + "grad_norm": 1793.0990921106627, + "learning_rate": 7.9801210711746e-06, + "loss": 263.5898, + "step": 29590 + }, + { + "epoch": 0.569282771022353, + "grad_norm": 1556.6486471242072, + "learning_rate": 7.974144102992273e-06, + "loss": 257.2144, + "step": 29600 + }, + { + "epoch": 0.5694750962828335, + "grad_norm": 1876.4372825534501, + "learning_rate": 7.968167889407813e-06, + "loss": 254.0837, + "step": 29610 + }, + { + "epoch": 0.5696674215433141, + "grad_norm": 1659.321834989625, + "learning_rate": 7.96219243264725e-06, + "loss": 260.9888, + "step": 29620 + }, + { + "epoch": 0.5698597468037946, + "grad_norm": 1702.4693842484958, + "learning_rate": 7.956217734936353e-06, + "loss": 256.2627, + "step": 29630 + }, + { + "epoch": 0.5700520720642751, + "grad_norm": 1582.4475567609404, + "learning_rate": 7.950243798500593e-06, + "loss": 253.6847, + "step": 29640 + }, + { + "epoch": 0.5702443973247556, + "grad_norm": 1655.4415922933936, + "learning_rate": 7.94427062556517e-06, + "loss": 261.9364, + "step": 29650 + }, + { + "epoch": 0.5704367225852361, + "grad_norm": 1568.66228727692, + "learning_rate": 7.938298218354985e-06, + "loss": 255.769, + "step": 29660 + }, + { + "epoch": 0.5706290478457167, + "grad_norm": 1742.6320506014838, + "learning_rate": 7.932326579094665e-06, + "loss": 261.6543, + "step": 29670 + }, + { + "epoch": 0.5708213731061972, + "grad_norm": 1907.4995829468237, + "learning_rate": 7.926355710008545e-06, + "loss": 258.1616, + "step": 29680 + }, + { + "epoch": 0.5710136983666777, + "grad_norm": 1755.1787331280811, + "learning_rate": 7.920385613320675e-06, + "loss": 254.7824, + "step": 29690 + }, + { + "epoch": 0.5712060236271582, + "grad_norm": 1666.4852055395288, + "learning_rate": 7.914416291254817e-06, + "loss": 248.0865, + "step": 29700 + }, + { + "epoch": 0.5713983488876387, + "grad_norm": 1610.2507901329434, + "learning_rate": 7.908447746034447e-06, + "loss": 251.9105, + "step": 29710 + }, + { + "epoch": 0.5715906741481193, + "grad_norm": 1511.8163434725564, + "learning_rate": 7.902479979882749e-06, + "loss": 263.9274, + "step": 29720 + }, + { + "epoch": 0.5717829994085998, + "grad_norm": 1785.3980715498606, + "learning_rate": 7.896512995022614e-06, + "loss": 260.5769, + "step": 29730 + }, + { + "epoch": 0.5719753246690803, + "grad_norm": 2264.234484715675, + "learning_rate": 7.890546793676652e-06, + "loss": 255.2928, + "step": 29740 + }, + { + "epoch": 0.5721676499295609, + "grad_norm": 1640.052917357427, + "learning_rate": 7.884581378067171e-06, + "loss": 260.5069, + "step": 29750 + }, + { + "epoch": 0.5723599751900414, + "grad_norm": 1596.1775922252384, + "learning_rate": 7.878616750416186e-06, + "loss": 255.1843, + "step": 29760 + }, + { + "epoch": 0.572552300450522, + "grad_norm": 1701.8181508222356, + "learning_rate": 7.872652912945426e-06, + "loss": 255.3512, + "step": 29770 + }, + { + "epoch": 0.5727446257110025, + "grad_norm": 1625.8288873639642, + "learning_rate": 7.866689867876323e-06, + "loss": 260.3132, + "step": 29780 + }, + { + "epoch": 0.572936950971483, + "grad_norm": 1685.9981537972806, + "learning_rate": 7.860727617430013e-06, + "loss": 257.2799, + "step": 29790 + }, + { + "epoch": 0.5731292762319635, + "grad_norm": 1530.6522926872287, + "learning_rate": 7.85476616382733e-06, + "loss": 248.3945, + "step": 29800 + }, + { + "epoch": 0.573321601492444, + "grad_norm": 1822.0703356421443, + "learning_rate": 7.848805509288824e-06, + "loss": 264.8996, + "step": 29810 + }, + { + "epoch": 0.5735139267529246, + "grad_norm": 1543.0483468032942, + "learning_rate": 7.84284565603474e-06, + "loss": 255.3366, + "step": 29820 + }, + { + "epoch": 0.5737062520134051, + "grad_norm": 1452.1735410792028, + "learning_rate": 7.83688660628502e-06, + "loss": 257.8694, + "step": 29830 + }, + { + "epoch": 0.5738985772738856, + "grad_norm": 1854.4592069948071, + "learning_rate": 7.830928362259313e-06, + "loss": 257.7086, + "step": 29840 + }, + { + "epoch": 0.5740909025343661, + "grad_norm": 1583.422677836044, + "learning_rate": 7.824970926176967e-06, + "loss": 260.0587, + "step": 29850 + }, + { + "epoch": 0.5742832277948466, + "grad_norm": 1709.211584185582, + "learning_rate": 7.819014300257033e-06, + "loss": 255.1465, + "step": 29860 + }, + { + "epoch": 0.5744755530553272, + "grad_norm": 1682.8532648989647, + "learning_rate": 7.813058486718252e-06, + "loss": 260.2551, + "step": 29870 + }, + { + "epoch": 0.5746678783158077, + "grad_norm": 1587.5191109815617, + "learning_rate": 7.80710348777906e-06, + "loss": 258.077, + "step": 29880 + }, + { + "epoch": 0.5748602035762882, + "grad_norm": 1716.47193098543, + "learning_rate": 7.801149305657609e-06, + "loss": 257.1473, + "step": 29890 + }, + { + "epoch": 0.5750525288367687, + "grad_norm": 1622.459882534187, + "learning_rate": 7.795195942571722e-06, + "loss": 267.1598, + "step": 29900 + }, + { + "epoch": 0.5752448540972492, + "grad_norm": 1633.6140126424693, + "learning_rate": 7.789243400738934e-06, + "loss": 255.4259, + "step": 29910 + }, + { + "epoch": 0.5754371793577298, + "grad_norm": 1693.6210912418912, + "learning_rate": 7.783291682376465e-06, + "loss": 265.9042, + "step": 29920 + }, + { + "epoch": 0.5756295046182103, + "grad_norm": 1658.3411836017206, + "learning_rate": 7.77734078970124e-06, + "loss": 265.6731, + "step": 29930 + }, + { + "epoch": 0.5758218298786908, + "grad_norm": 1684.4254026975589, + "learning_rate": 7.77139072492986e-06, + "loss": 255.4352, + "step": 29940 + }, + { + "epoch": 0.5760141551391713, + "grad_norm": 1544.7323611779777, + "learning_rate": 7.765441490278625e-06, + "loss": 256.1602, + "step": 29950 + }, + { + "epoch": 0.5762064803996519, + "grad_norm": 1623.63606167973, + "learning_rate": 7.759493087963535e-06, + "loss": 260.5616, + "step": 29960 + }, + { + "epoch": 0.5763988056601325, + "grad_norm": 1545.0933427402958, + "learning_rate": 7.753545520200264e-06, + "loss": 261.6, + "step": 29970 + }, + { + "epoch": 0.576591130920613, + "grad_norm": 1488.7030586598682, + "learning_rate": 7.747598789204183e-06, + "loss": 254.9581, + "step": 29980 + }, + { + "epoch": 0.5767834561810935, + "grad_norm": 1583.2270031910834, + "learning_rate": 7.741652897190355e-06, + "loss": 260.2494, + "step": 29990 + }, + { + "epoch": 0.576975781441574, + "grad_norm": 1642.5285050128755, + "learning_rate": 7.735707846373527e-06, + "loss": 253.3322, + "step": 30000 + }, + { + "epoch": 0.5771681067020545, + "grad_norm": 1634.2302429630959, + "learning_rate": 7.72976363896813e-06, + "loss": 258.2329, + "step": 30010 + }, + { + "epoch": 0.5773604319625351, + "grad_norm": 1724.195099879509, + "learning_rate": 7.723820277188278e-06, + "loss": 260.7178, + "step": 30020 + }, + { + "epoch": 0.5775527572230156, + "grad_norm": 1593.5780539050602, + "learning_rate": 7.717877763247787e-06, + "loss": 255.6292, + "step": 30030 + }, + { + "epoch": 0.5777450824834961, + "grad_norm": 1924.6729037815921, + "learning_rate": 7.711936099360138e-06, + "loss": 262.5923, + "step": 30040 + }, + { + "epoch": 0.5779374077439766, + "grad_norm": 1500.2251309822552, + "learning_rate": 7.705995287738498e-06, + "loss": 256.1763, + "step": 30050 + }, + { + "epoch": 0.5781297330044571, + "grad_norm": 1567.5068155497322, + "learning_rate": 7.70005533059573e-06, + "loss": 257.8809, + "step": 30060 + }, + { + "epoch": 0.5783220582649377, + "grad_norm": 1586.9441191292162, + "learning_rate": 7.694116230144368e-06, + "loss": 256.0111, + "step": 30070 + }, + { + "epoch": 0.5785143835254182, + "grad_norm": 1653.2544937911218, + "learning_rate": 7.688177988596628e-06, + "loss": 258.6795, + "step": 30080 + }, + { + "epoch": 0.5787067087858987, + "grad_norm": 1866.6799021531012, + "learning_rate": 7.682240608164401e-06, + "loss": 250.3602, + "step": 30090 + }, + { + "epoch": 0.5788990340463792, + "grad_norm": 1455.2311803550322, + "learning_rate": 7.676304091059273e-06, + "loss": 256.5646, + "step": 30100 + }, + { + "epoch": 0.5790913593068597, + "grad_norm": 1708.5237439901796, + "learning_rate": 7.670368439492495e-06, + "loss": 257.9868, + "step": 30110 + }, + { + "epoch": 0.5792836845673403, + "grad_norm": 1543.8437704096577, + "learning_rate": 7.664433655674995e-06, + "loss": 242.7472, + "step": 30120 + }, + { + "epoch": 0.5794760098278208, + "grad_norm": 1635.6584103819475, + "learning_rate": 7.658499741817383e-06, + "loss": 263.0051, + "step": 30130 + }, + { + "epoch": 0.5796683350883013, + "grad_norm": 1682.0024499297333, + "learning_rate": 7.65256670012995e-06, + "loss": 252.0004, + "step": 30140 + }, + { + "epoch": 0.5798606603487818, + "grad_norm": 1740.574253156307, + "learning_rate": 7.646634532822652e-06, + "loss": 258.3138, + "step": 30150 + }, + { + "epoch": 0.5800529856092624, + "grad_norm": 1706.1034769315715, + "learning_rate": 7.64070324210512e-06, + "loss": 261.4026, + "step": 30160 + }, + { + "epoch": 0.580245310869743, + "grad_norm": 1722.00151312128, + "learning_rate": 7.634772830186668e-06, + "loss": 249.8261, + "step": 30170 + }, + { + "epoch": 0.5804376361302235, + "grad_norm": 1870.2121998216276, + "learning_rate": 7.628843299276276e-06, + "loss": 262.8084, + "step": 30180 + }, + { + "epoch": 0.580629961390704, + "grad_norm": 1549.6376563168453, + "learning_rate": 7.62291465158259e-06, + "loss": 252.0434, + "step": 30190 + }, + { + "epoch": 0.5808222866511845, + "grad_norm": 1520.1811528740461, + "learning_rate": 7.616986889313939e-06, + "loss": 249.7399, + "step": 30200 + }, + { + "epoch": 0.581014611911665, + "grad_norm": 1750.1855122388404, + "learning_rate": 7.611060014678313e-06, + "loss": 252.6227, + "step": 30210 + }, + { + "epoch": 0.5812069371721456, + "grad_norm": 1569.694157718902, + "learning_rate": 7.605134029883381e-06, + "loss": 254.9713, + "step": 30220 + }, + { + "epoch": 0.5813992624326261, + "grad_norm": 1638.9948737839395, + "learning_rate": 7.599208937136465e-06, + "loss": 254.3394, + "step": 30230 + }, + { + "epoch": 0.5815915876931066, + "grad_norm": 1989.2750455640175, + "learning_rate": 7.593284738644574e-06, + "loss": 261.4517, + "step": 30240 + }, + { + "epoch": 0.5817839129535871, + "grad_norm": 1495.2031069416976, + "learning_rate": 7.58736143661437e-06, + "loss": 255.4625, + "step": 30250 + }, + { + "epoch": 0.5819762382140676, + "grad_norm": 1513.314032026537, + "learning_rate": 7.5814390332521824e-06, + "loss": 255.932, + "step": 30260 + }, + { + "epoch": 0.5821685634745482, + "grad_norm": 1670.8325677670348, + "learning_rate": 7.575517530764011e-06, + "loss": 260.4452, + "step": 30270 + }, + { + "epoch": 0.5823608887350287, + "grad_norm": 1472.943918337438, + "learning_rate": 7.569596931355517e-06, + "loss": 255.0141, + "step": 30280 + }, + { + "epoch": 0.5825532139955092, + "grad_norm": 1519.1418263985556, + "learning_rate": 7.56367723723203e-06, + "loss": 253.5092, + "step": 30290 + }, + { + "epoch": 0.5827455392559897, + "grad_norm": 1769.2208171709449, + "learning_rate": 7.557758450598534e-06, + "loss": 259.7952, + "step": 30300 + }, + { + "epoch": 0.5829378645164702, + "grad_norm": 1647.2983558351118, + "learning_rate": 7.551840573659677e-06, + "loss": 257.521, + "step": 30310 + }, + { + "epoch": 0.5831301897769507, + "grad_norm": 1746.1721663956646, + "learning_rate": 7.5459236086197775e-06, + "loss": 260.4395, + "step": 30320 + }, + { + "epoch": 0.5833225150374313, + "grad_norm": 1505.8451451374303, + "learning_rate": 7.5400075576828e-06, + "loss": 254.8534, + "step": 30330 + }, + { + "epoch": 0.5835148402979118, + "grad_norm": 1884.5504649503973, + "learning_rate": 7.534092423052382e-06, + "loss": 253.4212, + "step": 30340 + }, + { + "epoch": 0.5837071655583923, + "grad_norm": 1733.6211561157456, + "learning_rate": 7.5281782069318075e-06, + "loss": 260.8099, + "step": 30350 + }, + { + "epoch": 0.5838994908188728, + "grad_norm": 1507.4167716955735, + "learning_rate": 7.522264911524031e-06, + "loss": 249.6041, + "step": 30360 + }, + { + "epoch": 0.5840918160793535, + "grad_norm": 1655.6587040136014, + "learning_rate": 7.516352539031654e-06, + "loss": 255.6054, + "step": 30370 + }, + { + "epoch": 0.584284141339834, + "grad_norm": 3409.4158939378403, + "learning_rate": 7.510441091656933e-06, + "loss": 254.867, + "step": 30380 + }, + { + "epoch": 0.5844764666003145, + "grad_norm": 1626.0703420585514, + "learning_rate": 7.504530571601792e-06, + "loss": 257.5422, + "step": 30390 + }, + { + "epoch": 0.584668791860795, + "grad_norm": 1840.8861436924435, + "learning_rate": 7.498620981067799e-06, + "loss": 256.4879, + "step": 30400 + }, + { + "epoch": 0.5848611171212755, + "grad_norm": 1674.8549404658115, + "learning_rate": 7.492712322256177e-06, + "loss": 259.2995, + "step": 30410 + }, + { + "epoch": 0.585053442381756, + "grad_norm": 1637.4692718380159, + "learning_rate": 7.486804597367807e-06, + "loss": 250.6786, + "step": 30420 + }, + { + "epoch": 0.5852457676422366, + "grad_norm": 1693.3821592007978, + "learning_rate": 7.480897808603219e-06, + "loss": 250.0638, + "step": 30430 + }, + { + "epoch": 0.5854380929027171, + "grad_norm": 1577.6699252860035, + "learning_rate": 7.474991958162594e-06, + "loss": 252.9034, + "step": 30440 + }, + { + "epoch": 0.5856304181631976, + "grad_norm": 1551.5076809213983, + "learning_rate": 7.469087048245758e-06, + "loss": 251.5118, + "step": 30450 + }, + { + "epoch": 0.5858227434236781, + "grad_norm": 2040.7377270627353, + "learning_rate": 7.463183081052201e-06, + "loss": 267.5035, + "step": 30460 + }, + { + "epoch": 0.5860150686841586, + "grad_norm": 1936.2896028491634, + "learning_rate": 7.457280058781049e-06, + "loss": 259.5283, + "step": 30470 + }, + { + "epoch": 0.5862073939446392, + "grad_norm": 1561.9808574916208, + "learning_rate": 7.451377983631078e-06, + "loss": 263.088, + "step": 30480 + }, + { + "epoch": 0.5863997192051197, + "grad_norm": 1666.215732910661, + "learning_rate": 7.445476857800717e-06, + "loss": 254.9014, + "step": 30490 + }, + { + "epoch": 0.5865920444656002, + "grad_norm": 1616.762714608203, + "learning_rate": 7.439576683488039e-06, + "loss": 260.8711, + "step": 30500 + }, + { + "epoch": 0.5867843697260807, + "grad_norm": 1587.6657423708082, + "learning_rate": 7.4336774628907604e-06, + "loss": 257.7554, + "step": 30510 + }, + { + "epoch": 0.5869766949865612, + "grad_norm": 1597.979394881119, + "learning_rate": 7.427779198206238e-06, + "loss": 250.839, + "step": 30520 + }, + { + "epoch": 0.5871690202470418, + "grad_norm": 1783.0718997255078, + "learning_rate": 7.421881891631487e-06, + "loss": 254.9956, + "step": 30530 + }, + { + "epoch": 0.5873613455075223, + "grad_norm": 1710.507265803589, + "learning_rate": 7.415985545363152e-06, + "loss": 250.5494, + "step": 30540 + }, + { + "epoch": 0.5875536707680028, + "grad_norm": 1623.6243047475557, + "learning_rate": 7.410090161597523e-06, + "loss": 252.9712, + "step": 30550 + }, + { + "epoch": 0.5877459960284833, + "grad_norm": 1721.4330559834823, + "learning_rate": 7.404195742530533e-06, + "loss": 252.7263, + "step": 30560 + }, + { + "epoch": 0.587938321288964, + "grad_norm": 1609.3097671558262, + "learning_rate": 7.398302290357763e-06, + "loss": 253.4286, + "step": 30570 + }, + { + "epoch": 0.5881306465494445, + "grad_norm": 1588.2475167830564, + "learning_rate": 7.392409807274421e-06, + "loss": 249.8665, + "step": 30580 + }, + { + "epoch": 0.588322971809925, + "grad_norm": 1667.5022025425314, + "learning_rate": 7.386518295475355e-06, + "loss": 255.5411, + "step": 30590 + }, + { + "epoch": 0.5885152970704055, + "grad_norm": 1637.3017647446482, + "learning_rate": 7.380627757155065e-06, + "loss": 257.2046, + "step": 30600 + }, + { + "epoch": 0.588707622330886, + "grad_norm": 1575.354829647761, + "learning_rate": 7.374738194507675e-06, + "loss": 251.7738, + "step": 30610 + }, + { + "epoch": 0.5888999475913665, + "grad_norm": 1664.3297491301137, + "learning_rate": 7.3688496097269494e-06, + "loss": 253.2013, + "step": 30620 + }, + { + "epoch": 0.5890922728518471, + "grad_norm": 1751.6774219865351, + "learning_rate": 7.362962005006286e-06, + "loss": 265.0654, + "step": 30630 + }, + { + "epoch": 0.5892845981123276, + "grad_norm": 1584.3565108544628, + "learning_rate": 7.3570753825387275e-06, + "loss": 246.735, + "step": 30640 + }, + { + "epoch": 0.5894769233728081, + "grad_norm": 1619.6574234851712, + "learning_rate": 7.35118974451694e-06, + "loss": 246.4986, + "step": 30650 + }, + { + "epoch": 0.5896692486332886, + "grad_norm": 1857.8790454709745, + "learning_rate": 7.345305093133226e-06, + "loss": 250.8391, + "step": 30660 + }, + { + "epoch": 0.5898615738937691, + "grad_norm": 1530.3763290482568, + "learning_rate": 7.3394214305795175e-06, + "loss": 255.2492, + "step": 30670 + }, + { + "epoch": 0.5900538991542497, + "grad_norm": 1637.8511492093023, + "learning_rate": 7.33353875904739e-06, + "loss": 252.3276, + "step": 30680 + }, + { + "epoch": 0.5902462244147302, + "grad_norm": 1602.444571441276, + "learning_rate": 7.327657080728032e-06, + "loss": 249.006, + "step": 30690 + }, + { + "epoch": 0.5904385496752107, + "grad_norm": 1713.9192670303632, + "learning_rate": 7.321776397812279e-06, + "loss": 252.6097, + "step": 30700 + }, + { + "epoch": 0.5906308749356912, + "grad_norm": 1656.1174715814968, + "learning_rate": 7.315896712490584e-06, + "loss": 257.8542, + "step": 30710 + }, + { + "epoch": 0.5908232001961717, + "grad_norm": 1590.4843269849346, + "learning_rate": 7.310018026953036e-06, + "loss": 254.9456, + "step": 30720 + }, + { + "epoch": 0.5910155254566523, + "grad_norm": 1566.7090224771084, + "learning_rate": 7.304140343389348e-06, + "loss": 253.2903, + "step": 30730 + }, + { + "epoch": 0.5912078507171328, + "grad_norm": 1566.0103004199314, + "learning_rate": 7.298263663988853e-06, + "loss": 260.1873, + "step": 30740 + }, + { + "epoch": 0.5914001759776133, + "grad_norm": 2092.2012335686536, + "learning_rate": 7.292387990940526e-06, + "loss": 244.4104, + "step": 30750 + }, + { + "epoch": 0.5915925012380938, + "grad_norm": 1650.419203500856, + "learning_rate": 7.286513326432953e-06, + "loss": 252.8488, + "step": 30760 + }, + { + "epoch": 0.5917848264985743, + "grad_norm": 1578.28047741487, + "learning_rate": 7.2806396726543526e-06, + "loss": 262.9985, + "step": 30770 + }, + { + "epoch": 0.591977151759055, + "grad_norm": 1845.8480121646596, + "learning_rate": 7.2747670317925625e-06, + "loss": 257.6338, + "step": 30780 + }, + { + "epoch": 0.5921694770195355, + "grad_norm": 1974.4470116573864, + "learning_rate": 7.268895406035046e-06, + "loss": 251.438, + "step": 30790 + }, + { + "epoch": 0.592361802280016, + "grad_norm": 1841.9674024691687, + "learning_rate": 7.263024797568884e-06, + "loss": 261.3318, + "step": 30800 + }, + { + "epoch": 0.5925541275404965, + "grad_norm": 1485.6737540030588, + "learning_rate": 7.257155208580778e-06, + "loss": 251.8768, + "step": 30810 + }, + { + "epoch": 0.592746452800977, + "grad_norm": 1706.8842117862596, + "learning_rate": 7.251286641257062e-06, + "loss": 257.6578, + "step": 30820 + }, + { + "epoch": 0.5929387780614576, + "grad_norm": 1663.8095588862873, + "learning_rate": 7.245419097783674e-06, + "loss": 262.0871, + "step": 30830 + }, + { + "epoch": 0.5931311033219381, + "grad_norm": 1573.6637705795938, + "learning_rate": 7.239552580346181e-06, + "loss": 245.1458, + "step": 30840 + }, + { + "epoch": 0.5933234285824186, + "grad_norm": 1699.2428161549244, + "learning_rate": 7.233687091129757e-06, + "loss": 265.6187, + "step": 30850 + }, + { + "epoch": 0.5935157538428991, + "grad_norm": 1973.7300314829045, + "learning_rate": 7.227822632319208e-06, + "loss": 264.8671, + "step": 30860 + }, + { + "epoch": 0.5937080791033796, + "grad_norm": 2116.200925766933, + "learning_rate": 7.221959206098945e-06, + "loss": 269.4615, + "step": 30870 + }, + { + "epoch": 0.5939004043638602, + "grad_norm": 1820.7836993861838, + "learning_rate": 7.216096814652992e-06, + "loss": 267.4089, + "step": 30880 + }, + { + "epoch": 0.5940927296243407, + "grad_norm": 1615.8204308074812, + "learning_rate": 7.210235460165002e-06, + "loss": 254.4707, + "step": 30890 + }, + { + "epoch": 0.5942850548848212, + "grad_norm": 1658.3097901525714, + "learning_rate": 7.2043751448182275e-06, + "loss": 252.7119, + "step": 30900 + }, + { + "epoch": 0.5944773801453017, + "grad_norm": 1673.4034565115521, + "learning_rate": 7.198515870795542e-06, + "loss": 250.9427, + "step": 30910 + }, + { + "epoch": 0.5946697054057822, + "grad_norm": 1547.5950616964494, + "learning_rate": 7.192657640279421e-06, + "loss": 259.1345, + "step": 30920 + }, + { + "epoch": 0.5948620306662628, + "grad_norm": 1675.760765028026, + "learning_rate": 7.18680045545197e-06, + "loss": 258.7112, + "step": 30930 + }, + { + "epoch": 0.5950543559267433, + "grad_norm": 2205.419779580163, + "learning_rate": 7.180944318494888e-06, + "loss": 257.2583, + "step": 30940 + }, + { + "epoch": 0.5952466811872238, + "grad_norm": 1528.8485652231484, + "learning_rate": 7.175089231589485e-06, + "loss": 258.0885, + "step": 30950 + }, + { + "epoch": 0.5954390064477043, + "grad_norm": 1669.1332566979506, + "learning_rate": 7.1692351969166905e-06, + "loss": 266.1266, + "step": 30960 + }, + { + "epoch": 0.5956313317081848, + "grad_norm": 1705.1472426130142, + "learning_rate": 7.163382216657033e-06, + "loss": 250.7686, + "step": 30970 + }, + { + "epoch": 0.5958236569686655, + "grad_norm": 1550.030714470702, + "learning_rate": 7.157530292990654e-06, + "loss": 252.4722, + "step": 30980 + }, + { + "epoch": 0.596015982229146, + "grad_norm": 1587.797767818733, + "learning_rate": 7.151679428097291e-06, + "loss": 244.6052, + "step": 30990 + }, + { + "epoch": 0.5962083074896265, + "grad_norm": 1718.5129403705446, + "learning_rate": 7.145829624156304e-06, + "loss": 255.8966, + "step": 31000 + }, + { + "epoch": 0.596400632750107, + "grad_norm": 1588.9529401264854, + "learning_rate": 7.1399808833466445e-06, + "loss": 252.2865, + "step": 31010 + }, + { + "epoch": 0.5965929580105875, + "grad_norm": 1476.7402336192167, + "learning_rate": 7.134133207846869e-06, + "loss": 254.0667, + "step": 31020 + }, + { + "epoch": 0.5967852832710681, + "grad_norm": 1657.7247551425467, + "learning_rate": 7.128286599835139e-06, + "loss": 258.9685, + "step": 31030 + }, + { + "epoch": 0.5969776085315486, + "grad_norm": 1493.3089829784653, + "learning_rate": 7.122441061489228e-06, + "loss": 244.1329, + "step": 31040 + }, + { + "epoch": 0.5971699337920291, + "grad_norm": 1697.6064276542609, + "learning_rate": 7.1165965949864934e-06, + "loss": 254.1651, + "step": 31050 + }, + { + "epoch": 0.5973622590525096, + "grad_norm": 1598.173350250402, + "learning_rate": 7.110753202503906e-06, + "loss": 252.9963, + "step": 31060 + }, + { + "epoch": 0.5975545843129901, + "grad_norm": 1715.0095474392097, + "learning_rate": 7.104910886218036e-06, + "loss": 255.701, + "step": 31070 + }, + { + "epoch": 0.5977469095734707, + "grad_norm": 1797.1796430432103, + "learning_rate": 7.0990696483050466e-06, + "loss": 254.4337, + "step": 31080 + }, + { + "epoch": 0.5979392348339512, + "grad_norm": 1543.2766174927401, + "learning_rate": 7.093229490940704e-06, + "loss": 255.3483, + "step": 31090 + }, + { + "epoch": 0.5981315600944317, + "grad_norm": 1530.8538209123392, + "learning_rate": 7.087390416300364e-06, + "loss": 246.355, + "step": 31100 + }, + { + "epoch": 0.5983238853549122, + "grad_norm": 1534.7340270855257, + "learning_rate": 7.081552426558995e-06, + "loss": 261.3595, + "step": 31110 + }, + { + "epoch": 0.5985162106153927, + "grad_norm": 1597.4918208427741, + "learning_rate": 7.075715523891146e-06, + "loss": 247.4333, + "step": 31120 + }, + { + "epoch": 0.5987085358758732, + "grad_norm": 1619.4496582796928, + "learning_rate": 7.069879710470965e-06, + "loss": 249.2795, + "step": 31130 + }, + { + "epoch": 0.5989008611363538, + "grad_norm": 1766.288958669557, + "learning_rate": 7.064044988472204e-06, + "loss": 245.3983, + "step": 31140 + }, + { + "epoch": 0.5990931863968343, + "grad_norm": 1715.6246582145068, + "learning_rate": 7.058211360068196e-06, + "loss": 248.6759, + "step": 31150 + }, + { + "epoch": 0.5992855116573148, + "grad_norm": 1600.9913551559735, + "learning_rate": 7.052378827431871e-06, + "loss": 254.0407, + "step": 31160 + }, + { + "epoch": 0.5994778369177953, + "grad_norm": 1631.445811614129, + "learning_rate": 7.046547392735747e-06, + "loss": 247.8378, + "step": 31170 + }, + { + "epoch": 0.5996701621782758, + "grad_norm": 1841.4276324290997, + "learning_rate": 7.040717058151945e-06, + "loss": 249.4191, + "step": 31180 + }, + { + "epoch": 0.5998624874387565, + "grad_norm": 1668.9245369334262, + "learning_rate": 7.034887825852164e-06, + "loss": 253.9356, + "step": 31190 + }, + { + "epoch": 0.600054812699237, + "grad_norm": 1617.808100854206, + "learning_rate": 7.029059698007699e-06, + "loss": 245.587, + "step": 31200 + }, + { + "epoch": 0.6002471379597175, + "grad_norm": 1861.6967300436138, + "learning_rate": 7.023232676789424e-06, + "loss": 262.5603, + "step": 31210 + }, + { + "epoch": 0.600439463220198, + "grad_norm": 1984.2303963111485, + "learning_rate": 7.01740676436782e-06, + "loss": 252.9359, + "step": 31220 + }, + { + "epoch": 0.6006317884806786, + "grad_norm": 1644.3810825427179, + "learning_rate": 7.011581962912936e-06, + "loss": 256.1246, + "step": 31230 + }, + { + "epoch": 0.6008241137411591, + "grad_norm": 1539.2303624853166, + "learning_rate": 7.005758274594412e-06, + "loss": 252.4768, + "step": 31240 + }, + { + "epoch": 0.6010164390016396, + "grad_norm": 1697.7370864596169, + "learning_rate": 6.999935701581482e-06, + "loss": 251.9505, + "step": 31250 + }, + { + "epoch": 0.6012087642621201, + "grad_norm": 1692.0694242967468, + "learning_rate": 6.9941142460429555e-06, + "loss": 251.524, + "step": 31260 + }, + { + "epoch": 0.6014010895226006, + "grad_norm": 1514.3251569804493, + "learning_rate": 6.988293910147229e-06, + "loss": 252.5521, + "step": 31270 + }, + { + "epoch": 0.6015934147830811, + "grad_norm": 1629.8771022634385, + "learning_rate": 6.982474696062278e-06, + "loss": 253.2377, + "step": 31280 + }, + { + "epoch": 0.6017857400435617, + "grad_norm": 1507.48910827372, + "learning_rate": 6.97665660595567e-06, + "loss": 252.0084, + "step": 31290 + }, + { + "epoch": 0.6019780653040422, + "grad_norm": 1642.313027731901, + "learning_rate": 6.970839641994545e-06, + "loss": 249.8814, + "step": 31300 + }, + { + "epoch": 0.6021703905645227, + "grad_norm": 1656.1947154348657, + "learning_rate": 6.965023806345619e-06, + "loss": 249.3657, + "step": 31310 + }, + { + "epoch": 0.6023627158250032, + "grad_norm": 1389.1206807563376, + "learning_rate": 6.959209101175206e-06, + "loss": 248.1517, + "step": 31320 + }, + { + "epoch": 0.6025550410854837, + "grad_norm": 1668.3207880499845, + "learning_rate": 6.9533955286491805e-06, + "loss": 257.285, + "step": 31330 + }, + { + "epoch": 0.6027473663459643, + "grad_norm": 1427.8934830695273, + "learning_rate": 6.947583090933008e-06, + "loss": 245.3844, + "step": 31340 + }, + { + "epoch": 0.6029396916064448, + "grad_norm": 1489.9132665636912, + "learning_rate": 6.941771790191716e-06, + "loss": 247.1532, + "step": 31350 + }, + { + "epoch": 0.6031320168669253, + "grad_norm": 1563.59563281525, + "learning_rate": 6.9359616285899266e-06, + "loss": 253.3851, + "step": 31360 + }, + { + "epoch": 0.6033243421274058, + "grad_norm": 1878.7687245126763, + "learning_rate": 6.930152608291829e-06, + "loss": 259.8763, + "step": 31370 + }, + { + "epoch": 0.6035166673878863, + "grad_norm": 1539.902923288817, + "learning_rate": 6.924344731461179e-06, + "loss": 245.4633, + "step": 31380 + }, + { + "epoch": 0.603708992648367, + "grad_norm": 1550.7855968338188, + "learning_rate": 6.918538000261325e-06, + "loss": 249.0463, + "step": 31390 + }, + { + "epoch": 0.6039013179088475, + "grad_norm": 1988.9150506110027, + "learning_rate": 6.912732416855171e-06, + "loss": 256.2311, + "step": 31400 + }, + { + "epoch": 0.604093643169328, + "grad_norm": 1878.9682886092892, + "learning_rate": 6.906927983405207e-06, + "loss": 257.5113, + "step": 31410 + }, + { + "epoch": 0.6042859684298085, + "grad_norm": 1642.4895769428017, + "learning_rate": 6.901124702073481e-06, + "loss": 249.8725, + "step": 31420 + }, + { + "epoch": 0.604478293690289, + "grad_norm": 1876.8221362242273, + "learning_rate": 6.895322575021628e-06, + "loss": 255.899, + "step": 31430 + }, + { + "epoch": 0.6046706189507696, + "grad_norm": 1773.895536924106, + "learning_rate": 6.88952160441084e-06, + "loss": 256.9432, + "step": 31440 + }, + { + "epoch": 0.6048629442112501, + "grad_norm": 1608.965136540702, + "learning_rate": 6.8837217924018825e-06, + "loss": 250.8332, + "step": 31450 + }, + { + "epoch": 0.6050552694717306, + "grad_norm": 1537.358247161311, + "learning_rate": 6.877923141155087e-06, + "loss": 257.739, + "step": 31460 + }, + { + "epoch": 0.6052475947322111, + "grad_norm": 1663.2277440354994, + "learning_rate": 6.87212565283036e-06, + "loss": 246.3145, + "step": 31470 + }, + { + "epoch": 0.6054399199926916, + "grad_norm": 1802.6395685323575, + "learning_rate": 6.8663293295871715e-06, + "loss": 246.6193, + "step": 31480 + }, + { + "epoch": 0.6056322452531722, + "grad_norm": 1522.459516268851, + "learning_rate": 6.86053417358455e-06, + "loss": 255.4797, + "step": 31490 + }, + { + "epoch": 0.6058245705136527, + "grad_norm": 1624.380718400352, + "learning_rate": 6.854740186981102e-06, + "loss": 251.4926, + "step": 31500 + }, + { + "epoch": 0.6060168957741332, + "grad_norm": 1574.675381804736, + "learning_rate": 6.848947371934989e-06, + "loss": 255.4592, + "step": 31510 + }, + { + "epoch": 0.6062092210346137, + "grad_norm": 1681.2388359024635, + "learning_rate": 6.843155730603939e-06, + "loss": 257.7308, + "step": 31520 + }, + { + "epoch": 0.6064015462950942, + "grad_norm": 1530.8075190777176, + "learning_rate": 6.837365265145237e-06, + "loss": 247.6673, + "step": 31530 + }, + { + "epoch": 0.6065938715555748, + "grad_norm": 1628.2402185766443, + "learning_rate": 6.831575977715745e-06, + "loss": 254.0782, + "step": 31540 + }, + { + "epoch": 0.6067861968160553, + "grad_norm": 1544.8008847477113, + "learning_rate": 6.825787870471873e-06, + "loss": 248.9658, + "step": 31550 + }, + { + "epoch": 0.6069785220765358, + "grad_norm": 1867.8608113699281, + "learning_rate": 6.820000945569592e-06, + "loss": 250.3517, + "step": 31560 + }, + { + "epoch": 0.6071708473370163, + "grad_norm": 2162.486715266018, + "learning_rate": 6.814215205164444e-06, + "loss": 261.1754, + "step": 31570 + }, + { + "epoch": 0.6073631725974968, + "grad_norm": 1577.9628754088033, + "learning_rate": 6.808430651411518e-06, + "loss": 248.5343, + "step": 31580 + }, + { + "epoch": 0.6075554978579775, + "grad_norm": 1709.8006448517604, + "learning_rate": 6.802647286465461e-06, + "loss": 251.4245, + "step": 31590 + }, + { + "epoch": 0.607747823118458, + "grad_norm": 1705.761315145192, + "learning_rate": 6.796865112480482e-06, + "loss": 258.8202, + "step": 31600 + }, + { + "epoch": 0.6079401483789385, + "grad_norm": 1484.2652345066156, + "learning_rate": 6.79108413161035e-06, + "loss": 253.5714, + "step": 31610 + }, + { + "epoch": 0.608132473639419, + "grad_norm": 1799.1816316610466, + "learning_rate": 6.785304346008381e-06, + "loss": 252.6374, + "step": 31620 + }, + { + "epoch": 0.6083247988998995, + "grad_norm": 1538.6895950374667, + "learning_rate": 6.779525757827452e-06, + "loss": 246.7217, + "step": 31630 + }, + { + "epoch": 0.6085171241603801, + "grad_norm": 1596.60695218881, + "learning_rate": 6.773748369219986e-06, + "loss": 255.4618, + "step": 31640 + }, + { + "epoch": 0.6087094494208606, + "grad_norm": 1839.616090262443, + "learning_rate": 6.767972182337974e-06, + "loss": 260.0553, + "step": 31650 + }, + { + "epoch": 0.6089017746813411, + "grad_norm": 1613.0026459049998, + "learning_rate": 6.762197199332945e-06, + "loss": 250.3196, + "step": 31660 + }, + { + "epoch": 0.6090940999418216, + "grad_norm": 1491.9956726457312, + "learning_rate": 6.756423422355981e-06, + "loss": 245.0071, + "step": 31670 + }, + { + "epoch": 0.6092864252023021, + "grad_norm": 1555.0605083988942, + "learning_rate": 6.750650853557728e-06, + "loss": 245.5285, + "step": 31680 + }, + { + "epoch": 0.6094787504627827, + "grad_norm": 1657.9337461123548, + "learning_rate": 6.744879495088364e-06, + "loss": 254.6239, + "step": 31690 + }, + { + "epoch": 0.6096710757232632, + "grad_norm": 1903.515552856546, + "learning_rate": 6.7391093490976285e-06, + "loss": 242.8989, + "step": 31700 + }, + { + "epoch": 0.6098634009837437, + "grad_norm": 1601.9733261037613, + "learning_rate": 6.7333404177348036e-06, + "loss": 243.1104, + "step": 31710 + }, + { + "epoch": 0.6100557262442242, + "grad_norm": 1578.7571563074657, + "learning_rate": 6.727572703148726e-06, + "loss": 245.3339, + "step": 31720 + }, + { + "epoch": 0.6102480515047047, + "grad_norm": 1512.1713138921884, + "learning_rate": 6.721806207487769e-06, + "loss": 252.2113, + "step": 31730 + }, + { + "epoch": 0.6104403767651853, + "grad_norm": 1734.8967999615904, + "learning_rate": 6.716040932899857e-06, + "loss": 258.8036, + "step": 31740 + }, + { + "epoch": 0.6106327020256658, + "grad_norm": 1628.0268229886353, + "learning_rate": 6.710276881532463e-06, + "loss": 251.443, + "step": 31750 + }, + { + "epoch": 0.6108250272861463, + "grad_norm": 1549.991520920231, + "learning_rate": 6.704514055532597e-06, + "loss": 252.4328, + "step": 31760 + }, + { + "epoch": 0.6110173525466268, + "grad_norm": 1774.0970598385702, + "learning_rate": 6.698752457046822e-06, + "loss": 245.6301, + "step": 31770 + }, + { + "epoch": 0.6112096778071073, + "grad_norm": 1632.1267733035593, + "learning_rate": 6.692992088221231e-06, + "loss": 256.3446, + "step": 31780 + }, + { + "epoch": 0.6114020030675879, + "grad_norm": 1502.057169906419, + "learning_rate": 6.687232951201473e-06, + "loss": 246.7599, + "step": 31790 + }, + { + "epoch": 0.6115943283280685, + "grad_norm": 1620.8729984258923, + "learning_rate": 6.681475048132729e-06, + "loss": 259.1275, + "step": 31800 + }, + { + "epoch": 0.611786653588549, + "grad_norm": 1744.82182574893, + "learning_rate": 6.675718381159719e-06, + "loss": 248.6843, + "step": 31810 + }, + { + "epoch": 0.6119789788490295, + "grad_norm": 1580.456265980805, + "learning_rate": 6.6699629524267114e-06, + "loss": 246.2566, + "step": 31820 + }, + { + "epoch": 0.61217130410951, + "grad_norm": 1525.056289089862, + "learning_rate": 6.664208764077507e-06, + "loss": 253.225, + "step": 31830 + }, + { + "epoch": 0.6123636293699906, + "grad_norm": 1649.851850165884, + "learning_rate": 6.658455818255445e-06, + "loss": 243.1506, + "step": 31840 + }, + { + "epoch": 0.6125559546304711, + "grad_norm": 1597.9424416727297, + "learning_rate": 6.652704117103401e-06, + "loss": 244.7898, + "step": 31850 + }, + { + "epoch": 0.6127482798909516, + "grad_norm": 1602.6126692859373, + "learning_rate": 6.646953662763796e-06, + "loss": 261.2959, + "step": 31860 + }, + { + "epoch": 0.6129406051514321, + "grad_norm": 1549.4641554031066, + "learning_rate": 6.6412044573785725e-06, + "loss": 250.3392, + "step": 31870 + }, + { + "epoch": 0.6131329304119126, + "grad_norm": 1582.955431582562, + "learning_rate": 6.635456503089217e-06, + "loss": 249.266, + "step": 31880 + }, + { + "epoch": 0.6133252556723932, + "grad_norm": 1792.818896541898, + "learning_rate": 6.6297098020367435e-06, + "loss": 265.8789, + "step": 31890 + }, + { + "epoch": 0.6135175809328737, + "grad_norm": 1719.069564256674, + "learning_rate": 6.623964356361707e-06, + "loss": 264.3555, + "step": 31900 + }, + { + "epoch": 0.6137099061933542, + "grad_norm": 1726.9317400608413, + "learning_rate": 6.618220168204193e-06, + "loss": 247.0801, + "step": 31910 + }, + { + "epoch": 0.6139022314538347, + "grad_norm": 1510.7470892740862, + "learning_rate": 6.6124772397038115e-06, + "loss": 250.4055, + "step": 31920 + }, + { + "epoch": 0.6140945567143152, + "grad_norm": 1613.3241614756466, + "learning_rate": 6.606735572999714e-06, + "loss": 256.8448, + "step": 31930 + }, + { + "epoch": 0.6142868819747958, + "grad_norm": 1454.3553455606593, + "learning_rate": 6.600995170230575e-06, + "loss": 239.996, + "step": 31940 + }, + { + "epoch": 0.6144792072352763, + "grad_norm": 1589.8753590643007, + "learning_rate": 6.595256033534598e-06, + "loss": 245.4918, + "step": 31950 + }, + { + "epoch": 0.6146715324957568, + "grad_norm": 1788.2335614252272, + "learning_rate": 6.589518165049514e-06, + "loss": 246.8885, + "step": 31960 + }, + { + "epoch": 0.6148638577562373, + "grad_norm": 1608.8035911286963, + "learning_rate": 6.5837815669125906e-06, + "loss": 252.2777, + "step": 31970 + }, + { + "epoch": 0.6150561830167178, + "grad_norm": 1471.079507708029, + "learning_rate": 6.5780462412606124e-06, + "loss": 238.6989, + "step": 31980 + }, + { + "epoch": 0.6152485082771983, + "grad_norm": 1511.0155152253, + "learning_rate": 6.572312190229895e-06, + "loss": 247.5496, + "step": 31990 + }, + { + "epoch": 0.615440833537679, + "grad_norm": 1614.8713740986534, + "learning_rate": 6.5665794159562734e-06, + "loss": 246.021, + "step": 32000 + }, + { + "epoch": 0.6156331587981595, + "grad_norm": 1623.9992160658344, + "learning_rate": 6.560847920575118e-06, + "loss": 242.5339, + "step": 32010 + }, + { + "epoch": 0.61582548405864, + "grad_norm": 1594.1458627122008, + "learning_rate": 6.5551177062213126e-06, + "loss": 255.3999, + "step": 32020 + }, + { + "epoch": 0.6160178093191205, + "grad_norm": 1628.471693623422, + "learning_rate": 6.5493887750292616e-06, + "loss": 246.7218, + "step": 32030 + }, + { + "epoch": 0.616210134579601, + "grad_norm": 1476.5325397406718, + "learning_rate": 6.54366112913291e-06, + "loss": 257.9446, + "step": 32040 + }, + { + "epoch": 0.6164024598400816, + "grad_norm": 1857.1994670994266, + "learning_rate": 6.537934770665701e-06, + "loss": 256.731, + "step": 32050 + }, + { + "epoch": 0.6165947851005621, + "grad_norm": 1629.5665638543069, + "learning_rate": 6.532209701760615e-06, + "loss": 260.7297, + "step": 32060 + }, + { + "epoch": 0.6167871103610426, + "grad_norm": 1568.8003140103235, + "learning_rate": 6.526485924550138e-06, + "loss": 254.8337, + "step": 32070 + }, + { + "epoch": 0.6169794356215231, + "grad_norm": 2038.3809150575157, + "learning_rate": 6.520763441166291e-06, + "loss": 253.361, + "step": 32080 + }, + { + "epoch": 0.6171717608820037, + "grad_norm": 1476.8358441094674, + "learning_rate": 6.515042253740601e-06, + "loss": 237.9503, + "step": 32090 + }, + { + "epoch": 0.6173640861424842, + "grad_norm": 1765.9492939359543, + "learning_rate": 6.509322364404112e-06, + "loss": 252.383, + "step": 32100 + }, + { + "epoch": 0.6175564114029647, + "grad_norm": 1488.644221621674, + "learning_rate": 6.503603775287395e-06, + "loss": 249.6691, + "step": 32110 + }, + { + "epoch": 0.6177487366634452, + "grad_norm": 1583.3130400077239, + "learning_rate": 6.497886488520524e-06, + "loss": 245.5295, + "step": 32120 + }, + { + "epoch": 0.6179410619239257, + "grad_norm": 1683.3685894768932, + "learning_rate": 6.4921705062331e-06, + "loss": 257.5813, + "step": 32130 + }, + { + "epoch": 0.6181333871844062, + "grad_norm": 1682.7045364901157, + "learning_rate": 6.486455830554224e-06, + "loss": 244.7124, + "step": 32140 + }, + { + "epoch": 0.6183257124448868, + "grad_norm": 1877.0490159193469, + "learning_rate": 6.4807424636125285e-06, + "loss": 249.3643, + "step": 32150 + }, + { + "epoch": 0.6185180377053673, + "grad_norm": 1700.0408400252504, + "learning_rate": 6.475030407536141e-06, + "loss": 251.4053, + "step": 32160 + }, + { + "epoch": 0.6187103629658478, + "grad_norm": 1493.8667077487155, + "learning_rate": 6.469319664452709e-06, + "loss": 249.9095, + "step": 32170 + }, + { + "epoch": 0.6189026882263283, + "grad_norm": 1520.7891904512587, + "learning_rate": 6.463610236489391e-06, + "loss": 242.9274, + "step": 32180 + }, + { + "epoch": 0.6190950134868088, + "grad_norm": 1751.6989459226484, + "learning_rate": 6.457902125772854e-06, + "loss": 249.4639, + "step": 32190 + }, + { + "epoch": 0.6192873387472894, + "grad_norm": 1928.0840012032722, + "learning_rate": 6.452195334429277e-06, + "loss": 252.8523, + "step": 32200 + }, + { + "epoch": 0.61947966400777, + "grad_norm": 1546.1189550341815, + "learning_rate": 6.446489864584341e-06, + "loss": 246.3375, + "step": 32210 + }, + { + "epoch": 0.6196719892682505, + "grad_norm": 1570.5477599560918, + "learning_rate": 6.440785718363245e-06, + "loss": 245.6131, + "step": 32220 + }, + { + "epoch": 0.619864314528731, + "grad_norm": 1739.4972611254748, + "learning_rate": 6.435082897890688e-06, + "loss": 246.1498, + "step": 32230 + }, + { + "epoch": 0.6200566397892115, + "grad_norm": 1678.4469029802467, + "learning_rate": 6.429381405290873e-06, + "loss": 246.2843, + "step": 32240 + }, + { + "epoch": 0.6202489650496921, + "grad_norm": 1634.775324897167, + "learning_rate": 6.4236812426875124e-06, + "loss": 250.7838, + "step": 32250 + }, + { + "epoch": 0.6204412903101726, + "grad_norm": 1589.5214058834304, + "learning_rate": 6.4179824122038244e-06, + "loss": 249.8279, + "step": 32260 + }, + { + "epoch": 0.6206336155706531, + "grad_norm": 1758.1687057063032, + "learning_rate": 6.412284915962532e-06, + "loss": 250.6428, + "step": 32270 + }, + { + "epoch": 0.6208259408311336, + "grad_norm": 1590.9530138235439, + "learning_rate": 6.406588756085849e-06, + "loss": 244.9568, + "step": 32280 + }, + { + "epoch": 0.6210182660916141, + "grad_norm": 1608.8235312338588, + "learning_rate": 6.400893934695514e-06, + "loss": 247.0817, + "step": 32290 + }, + { + "epoch": 0.6212105913520947, + "grad_norm": 1598.3996115572095, + "learning_rate": 6.395200453912747e-06, + "loss": 253.0403, + "step": 32300 + }, + { + "epoch": 0.6214029166125752, + "grad_norm": 1485.0043715057932, + "learning_rate": 6.389508315858272e-06, + "loss": 250.6169, + "step": 32310 + }, + { + "epoch": 0.6215952418730557, + "grad_norm": 1706.0132389886498, + "learning_rate": 6.38381752265232e-06, + "loss": 247.6966, + "step": 32320 + }, + { + "epoch": 0.6217875671335362, + "grad_norm": 1940.961766426073, + "learning_rate": 6.378128076414619e-06, + "loss": 252.8314, + "step": 32330 + }, + { + "epoch": 0.6219798923940167, + "grad_norm": 1751.9731911783938, + "learning_rate": 6.372439979264393e-06, + "loss": 253.346, + "step": 32340 + }, + { + "epoch": 0.6221722176544973, + "grad_norm": 1580.5134141952267, + "learning_rate": 6.3667532333203655e-06, + "loss": 243.3838, + "step": 32350 + }, + { + "epoch": 0.6223645429149778, + "grad_norm": 1534.8565407098354, + "learning_rate": 6.361067840700747e-06, + "loss": 245.5675, + "step": 32360 + }, + { + "epoch": 0.6225568681754583, + "grad_norm": 1535.1015959866318, + "learning_rate": 6.355383803523265e-06, + "loss": 246.4744, + "step": 32370 + }, + { + "epoch": 0.6227491934359388, + "grad_norm": 1521.4844756247937, + "learning_rate": 6.349701123905123e-06, + "loss": 248.2752, + "step": 32380 + }, + { + "epoch": 0.6229415186964193, + "grad_norm": 1585.2650509553055, + "learning_rate": 6.344019803963021e-06, + "loss": 250.1241, + "step": 32390 + }, + { + "epoch": 0.6231338439568999, + "grad_norm": 1618.6602149788982, + "learning_rate": 6.338339845813164e-06, + "loss": 245.747, + "step": 32400 + }, + { + "epoch": 0.6233261692173805, + "grad_norm": 1601.0337287187801, + "learning_rate": 6.332661251571241e-06, + "loss": 245.6743, + "step": 32410 + }, + { + "epoch": 0.623518494477861, + "grad_norm": 1538.1357091455461, + "learning_rate": 6.326984023352435e-06, + "loss": 252.085, + "step": 32420 + }, + { + "epoch": 0.6237108197383415, + "grad_norm": 1689.5525904868528, + "learning_rate": 6.321308163271413e-06, + "loss": 247.09, + "step": 32430 + }, + { + "epoch": 0.623903144998822, + "grad_norm": 1607.63037700626, + "learning_rate": 6.315633673442349e-06, + "loss": 248.4497, + "step": 32440 + }, + { + "epoch": 0.6240954702593026, + "grad_norm": 1688.4707478131659, + "learning_rate": 6.309960555978894e-06, + "loss": 243.1952, + "step": 32450 + }, + { + "epoch": 0.6242877955197831, + "grad_norm": 1589.6763783257345, + "learning_rate": 6.304288812994183e-06, + "loss": 245.1268, + "step": 32460 + }, + { + "epoch": 0.6244801207802636, + "grad_norm": 1567.0129232821203, + "learning_rate": 6.298618446600856e-06, + "loss": 256.4447, + "step": 32470 + }, + { + "epoch": 0.6246724460407441, + "grad_norm": 1567.832361528433, + "learning_rate": 6.292949458911029e-06, + "loss": 248.8332, + "step": 32480 + }, + { + "epoch": 0.6248647713012246, + "grad_norm": 1622.7786841280042, + "learning_rate": 6.287281852036304e-06, + "loss": 242.8957, + "step": 32490 + }, + { + "epoch": 0.6250570965617052, + "grad_norm": 1574.65614154125, + "learning_rate": 6.2816156280877675e-06, + "loss": 245.2473, + "step": 32500 + }, + { + "epoch": 0.6252494218221857, + "grad_norm": 1450.0720051573194, + "learning_rate": 6.275950789176002e-06, + "loss": 248.8614, + "step": 32510 + }, + { + "epoch": 0.6254417470826662, + "grad_norm": 2114.7601680932303, + "learning_rate": 6.270287337411064e-06, + "loss": 244.607, + "step": 32520 + }, + { + "epoch": 0.6256340723431467, + "grad_norm": 1648.1336613282983, + "learning_rate": 6.264625274902492e-06, + "loss": 251.1316, + "step": 32530 + }, + { + "epoch": 0.6258263976036272, + "grad_norm": 1705.3451392061618, + "learning_rate": 6.258964603759311e-06, + "loss": 253.2709, + "step": 32540 + }, + { + "epoch": 0.6260187228641078, + "grad_norm": 1757.0387141401993, + "learning_rate": 6.2533053260900345e-06, + "loss": 245.0642, + "step": 32550 + }, + { + "epoch": 0.6262110481245883, + "grad_norm": 1727.526741065685, + "learning_rate": 6.247647444002644e-06, + "loss": 248.3847, + "step": 32560 + }, + { + "epoch": 0.6264033733850688, + "grad_norm": 1566.666573390075, + "learning_rate": 6.241990959604607e-06, + "loss": 254.7402, + "step": 32570 + }, + { + "epoch": 0.6265956986455493, + "grad_norm": 1579.8128116168036, + "learning_rate": 6.2363358750028745e-06, + "loss": 246.6831, + "step": 32580 + }, + { + "epoch": 0.6267880239060298, + "grad_norm": 1567.6092283979997, + "learning_rate": 6.23068219230387e-06, + "loss": 242.8022, + "step": 32590 + }, + { + "epoch": 0.6269803491665104, + "grad_norm": 1494.8954769744298, + "learning_rate": 6.2250299136134925e-06, + "loss": 242.8416, + "step": 32600 + }, + { + "epoch": 0.6271726744269909, + "grad_norm": 1507.4813117435835, + "learning_rate": 6.219379041037128e-06, + "loss": 248.8065, + "step": 32610 + }, + { + "epoch": 0.6273649996874715, + "grad_norm": 1506.6896920838308, + "learning_rate": 6.213729576679632e-06, + "loss": 246.772, + "step": 32620 + }, + { + "epoch": 0.627557324947952, + "grad_norm": 1626.2599369080062, + "learning_rate": 6.208081522645339e-06, + "loss": 241.8009, + "step": 32630 + }, + { + "epoch": 0.6277496502084325, + "grad_norm": 1647.035360325793, + "learning_rate": 6.202434881038048e-06, + "loss": 245.0505, + "step": 32640 + }, + { + "epoch": 0.6279419754689131, + "grad_norm": 1507.5611481247427, + "learning_rate": 6.196789653961048e-06, + "loss": 243.504, + "step": 32650 + }, + { + "epoch": 0.6281343007293936, + "grad_norm": 1442.3763540100265, + "learning_rate": 6.191145843517093e-06, + "loss": 244.2661, + "step": 32660 + }, + { + "epoch": 0.6283266259898741, + "grad_norm": 1768.6358707894049, + "learning_rate": 6.185503451808401e-06, + "loss": 248.468, + "step": 32670 + }, + { + "epoch": 0.6285189512503546, + "grad_norm": 1537.5944360458418, + "learning_rate": 6.1798624809366755e-06, + "loss": 254.1811, + "step": 32680 + }, + { + "epoch": 0.6287112765108351, + "grad_norm": 1708.2752417297472, + "learning_rate": 6.174222933003084e-06, + "loss": 248.4348, + "step": 32690 + }, + { + "epoch": 0.6289036017713157, + "grad_norm": 1619.463492778449, + "learning_rate": 6.168584810108269e-06, + "loss": 249.2467, + "step": 32700 + }, + { + "epoch": 0.6290959270317962, + "grad_norm": 1816.4470533788578, + "learning_rate": 6.162948114352328e-06, + "loss": 254.6911, + "step": 32710 + }, + { + "epoch": 0.6292882522922767, + "grad_norm": 1624.2993518406572, + "learning_rate": 6.157312847834848e-06, + "loss": 248.102, + "step": 32720 + }, + { + "epoch": 0.6294805775527572, + "grad_norm": 1599.3784062367943, + "learning_rate": 6.1516790126548695e-06, + "loss": 245.2853, + "step": 32730 + }, + { + "epoch": 0.6296729028132377, + "grad_norm": 1589.5099275240586, + "learning_rate": 6.146046610910899e-06, + "loss": 248.4512, + "step": 32740 + }, + { + "epoch": 0.6298652280737183, + "grad_norm": 1536.989171143397, + "learning_rate": 6.140415644700915e-06, + "loss": 257.6639, + "step": 32750 + }, + { + "epoch": 0.6300575533341988, + "grad_norm": 1837.6683537179802, + "learning_rate": 6.13478611612236e-06, + "loss": 249.9978, + "step": 32760 + }, + { + "epoch": 0.6302498785946793, + "grad_norm": 1539.785434588748, + "learning_rate": 6.129158027272144e-06, + "loss": 246.3008, + "step": 32770 + }, + { + "epoch": 0.6304422038551598, + "grad_norm": 1828.6763877484193, + "learning_rate": 6.123531380246632e-06, + "loss": 243.6701, + "step": 32780 + }, + { + "epoch": 0.6306345291156403, + "grad_norm": 1518.9369019170892, + "learning_rate": 6.117906177141657e-06, + "loss": 250.4036, + "step": 32790 + }, + { + "epoch": 0.6308268543761208, + "grad_norm": 1770.785546728867, + "learning_rate": 6.112282420052518e-06, + "loss": 243.2777, + "step": 32800 + }, + { + "epoch": 0.6310191796366014, + "grad_norm": 1607.8479079552997, + "learning_rate": 6.106660111073969e-06, + "loss": 247.6805, + "step": 32810 + }, + { + "epoch": 0.631211504897082, + "grad_norm": 1532.8465484716394, + "learning_rate": 6.101039252300227e-06, + "loss": 250.477, + "step": 32820 + }, + { + "epoch": 0.6314038301575625, + "grad_norm": 1583.6556477459974, + "learning_rate": 6.095419845824971e-06, + "loss": 245.806, + "step": 32830 + }, + { + "epoch": 0.631596155418043, + "grad_norm": 1565.7516005574798, + "learning_rate": 6.089801893741338e-06, + "loss": 247.4883, + "step": 32840 + }, + { + "epoch": 0.6317884806785236, + "grad_norm": 1578.300650843023, + "learning_rate": 6.084185398141921e-06, + "loss": 246.5214, + "step": 32850 + }, + { + "epoch": 0.6319808059390041, + "grad_norm": 1534.5244133975293, + "learning_rate": 6.078570361118768e-06, + "loss": 249.1956, + "step": 32860 + }, + { + "epoch": 0.6321731311994846, + "grad_norm": 1595.3661320643646, + "learning_rate": 6.072956784763393e-06, + "loss": 246.858, + "step": 32870 + }, + { + "epoch": 0.6323654564599651, + "grad_norm": 1477.1977708577524, + "learning_rate": 6.067344671166757e-06, + "loss": 249.2065, + "step": 32880 + }, + { + "epoch": 0.6325577817204456, + "grad_norm": 1663.6308689361103, + "learning_rate": 6.061734022419281e-06, + "loss": 246.4378, + "step": 32890 + }, + { + "epoch": 0.6327501069809262, + "grad_norm": 1576.8311054849837, + "learning_rate": 6.056124840610839e-06, + "loss": 236.941, + "step": 32900 + }, + { + "epoch": 0.6329424322414067, + "grad_norm": 1718.6030339996457, + "learning_rate": 6.050517127830761e-06, + "loss": 248.3223, + "step": 32910 + }, + { + "epoch": 0.6331347575018872, + "grad_norm": 1720.5924619205668, + "learning_rate": 6.044910886167825e-06, + "loss": 255.572, + "step": 32920 + }, + { + "epoch": 0.6333270827623677, + "grad_norm": 1711.6160043458594, + "learning_rate": 6.03930611771026e-06, + "loss": 243.622, + "step": 32930 + }, + { + "epoch": 0.6335194080228482, + "grad_norm": 1727.5870155952293, + "learning_rate": 6.033702824545755e-06, + "loss": 240.5427, + "step": 32940 + }, + { + "epoch": 0.6337117332833287, + "grad_norm": 1638.667753915611, + "learning_rate": 6.028101008761445e-06, + "loss": 236.3573, + "step": 32950 + }, + { + "epoch": 0.6339040585438093, + "grad_norm": 1629.6333799236143, + "learning_rate": 6.022500672443907e-06, + "loss": 244.3131, + "step": 32960 + }, + { + "epoch": 0.6340963838042898, + "grad_norm": 1663.290947145034, + "learning_rate": 6.016901817679177e-06, + "loss": 252.1628, + "step": 32970 + }, + { + "epoch": 0.6342887090647703, + "grad_norm": 1412.7271375235703, + "learning_rate": 6.011304446552741e-06, + "loss": 241.2882, + "step": 32980 + }, + { + "epoch": 0.6344810343252508, + "grad_norm": 1594.4454825745604, + "learning_rate": 6.005708561149523e-06, + "loss": 246.0481, + "step": 32990 + }, + { + "epoch": 0.6346733595857313, + "grad_norm": 1707.5138202281103, + "learning_rate": 6.000114163553894e-06, + "loss": 249.547, + "step": 33000 + }, + { + "epoch": 0.6348656848462119, + "grad_norm": 1528.2134605789608, + "learning_rate": 5.994521255849684e-06, + "loss": 251.3665, + "step": 33010 + }, + { + "epoch": 0.6350580101066924, + "grad_norm": 1478.1877711859377, + "learning_rate": 5.988929840120151e-06, + "loss": 248.4078, + "step": 33020 + }, + { + "epoch": 0.635250335367173, + "grad_norm": 1769.7989304924388, + "learning_rate": 5.983339918448008e-06, + "loss": 246.3821, + "step": 33030 + }, + { + "epoch": 0.6354426606276535, + "grad_norm": 1623.5398650186996, + "learning_rate": 5.977751492915404e-06, + "loss": 241.6955, + "step": 33040 + }, + { + "epoch": 0.635634985888134, + "grad_norm": 1568.3239354920047, + "learning_rate": 5.972164565603944e-06, + "loss": 246.2752, + "step": 33050 + }, + { + "epoch": 0.6358273111486146, + "grad_norm": 1620.8237772235327, + "learning_rate": 5.966579138594661e-06, + "loss": 253.7861, + "step": 33060 + }, + { + "epoch": 0.6360196364090951, + "grad_norm": 1539.9819747996405, + "learning_rate": 5.960995213968033e-06, + "loss": 248.4565, + "step": 33070 + }, + { + "epoch": 0.6362119616695756, + "grad_norm": 1596.4702876001725, + "learning_rate": 5.955412793803983e-06, + "loss": 242.4913, + "step": 33080 + }, + { + "epoch": 0.6364042869300561, + "grad_norm": 1980.138686396594, + "learning_rate": 5.949831880181869e-06, + "loss": 259.1184, + "step": 33090 + }, + { + "epoch": 0.6365966121905366, + "grad_norm": 1562.2473903526125, + "learning_rate": 5.944252475180487e-06, + "loss": 256.8562, + "step": 33100 + }, + { + "epoch": 0.6367889374510172, + "grad_norm": 1515.1407736467438, + "learning_rate": 5.938674580878077e-06, + "loss": 246.3608, + "step": 33110 + }, + { + "epoch": 0.6369812627114977, + "grad_norm": 1441.905515966365, + "learning_rate": 5.933098199352307e-06, + "loss": 250.6053, + "step": 33120 + }, + { + "epoch": 0.6371735879719782, + "grad_norm": 1629.1783300601244, + "learning_rate": 5.927523332680296e-06, + "loss": 250.9615, + "step": 33130 + }, + { + "epoch": 0.6373659132324587, + "grad_norm": 1641.1701206727084, + "learning_rate": 5.921949982938583e-06, + "loss": 245.559, + "step": 33140 + }, + { + "epoch": 0.6375582384929392, + "grad_norm": 1596.1086023443413, + "learning_rate": 5.916378152203146e-06, + "loss": 249.9641, + "step": 33150 + }, + { + "epoch": 0.6377505637534198, + "grad_norm": 1427.0600192349104, + "learning_rate": 5.910807842549409e-06, + "loss": 244.6634, + "step": 33160 + }, + { + "epoch": 0.6379428890139003, + "grad_norm": 1662.3631962510083, + "learning_rate": 5.905239056052212e-06, + "loss": 242.7421, + "step": 33170 + }, + { + "epoch": 0.6381352142743808, + "grad_norm": 1665.114998134074, + "learning_rate": 5.8996717947858395e-06, + "loss": 245.7042, + "step": 33180 + }, + { + "epoch": 0.6383275395348613, + "grad_norm": 1519.6548917706268, + "learning_rate": 5.894106060824005e-06, + "loss": 245.6331, + "step": 33190 + }, + { + "epoch": 0.6385198647953418, + "grad_norm": 1549.21597966117, + "learning_rate": 5.888541856239854e-06, + "loss": 241.5423, + "step": 33200 + }, + { + "epoch": 0.6387121900558224, + "grad_norm": 1652.5725857712496, + "learning_rate": 5.882979183105959e-06, + "loss": 245.7441, + "step": 33210 + }, + { + "epoch": 0.6389045153163029, + "grad_norm": 1384.0270437995769, + "learning_rate": 5.8774180434943184e-06, + "loss": 242.0473, + "step": 33220 + }, + { + "epoch": 0.6390968405767835, + "grad_norm": 1653.192009309887, + "learning_rate": 5.871858439476374e-06, + "loss": 245.5735, + "step": 33230 + }, + { + "epoch": 0.639289165837264, + "grad_norm": 1547.3408590604622, + "learning_rate": 5.86630037312298e-06, + "loss": 244.2901, + "step": 33240 + }, + { + "epoch": 0.6394814910977445, + "grad_norm": 1613.1810826595165, + "learning_rate": 5.860743846504429e-06, + "loss": 249.6994, + "step": 33250 + }, + { + "epoch": 0.6396738163582251, + "grad_norm": 1473.0402502223385, + "learning_rate": 5.8551888616904305e-06, + "loss": 246.9429, + "step": 33260 + }, + { + "epoch": 0.6398661416187056, + "grad_norm": 1517.465889778779, + "learning_rate": 5.849635420750131e-06, + "loss": 255.199, + "step": 33270 + }, + { + "epoch": 0.6400584668791861, + "grad_norm": 1592.9883626456392, + "learning_rate": 5.84408352575209e-06, + "loss": 245.4556, + "step": 33280 + }, + { + "epoch": 0.6402507921396666, + "grad_norm": 1729.6262112804789, + "learning_rate": 5.8385331787642956e-06, + "loss": 238.5987, + "step": 33290 + }, + { + "epoch": 0.6404431174001471, + "grad_norm": 1580.4064431986308, + "learning_rate": 5.8329843818541665e-06, + "loss": 247.3982, + "step": 33300 + }, + { + "epoch": 0.6406354426606277, + "grad_norm": 1785.1346877032258, + "learning_rate": 5.827437137088535e-06, + "loss": 236.1925, + "step": 33310 + }, + { + "epoch": 0.6408277679211082, + "grad_norm": 1765.173873381017, + "learning_rate": 5.8218914465336585e-06, + "loss": 244.7444, + "step": 33320 + }, + { + "epoch": 0.6410200931815887, + "grad_norm": 1687.537795474892, + "learning_rate": 5.816347312255209e-06, + "loss": 242.8977, + "step": 33330 + }, + { + "epoch": 0.6412124184420692, + "grad_norm": 1580.8500233371835, + "learning_rate": 5.810804736318295e-06, + "loss": 239.8719, + "step": 33340 + }, + { + "epoch": 0.6414047437025497, + "grad_norm": 1704.6797554911557, + "learning_rate": 5.805263720787426e-06, + "loss": 235.9653, + "step": 33350 + }, + { + "epoch": 0.6415970689630303, + "grad_norm": 1534.0302203848453, + "learning_rate": 5.799724267726547e-06, + "loss": 252.2206, + "step": 33360 + }, + { + "epoch": 0.6417893942235108, + "grad_norm": 1427.7885876304517, + "learning_rate": 5.794186379199004e-06, + "loss": 245.9651, + "step": 33370 + }, + { + "epoch": 0.6419817194839913, + "grad_norm": 1441.674886182865, + "learning_rate": 5.7886500572675774e-06, + "loss": 251.7738, + "step": 33380 + }, + { + "epoch": 0.6421740447444718, + "grad_norm": 1562.9216476791923, + "learning_rate": 5.783115303994451e-06, + "loss": 250.4023, + "step": 33390 + }, + { + "epoch": 0.6423663700049523, + "grad_norm": 1920.4788053768436, + "learning_rate": 5.777582121441227e-06, + "loss": 251.1658, + "step": 33400 + }, + { + "epoch": 0.6425586952654329, + "grad_norm": 1941.7802402947027, + "learning_rate": 5.772050511668931e-06, + "loss": 246.5123, + "step": 33410 + }, + { + "epoch": 0.6427510205259134, + "grad_norm": 1568.5024031214668, + "learning_rate": 5.766520476737993e-06, + "loss": 249.1046, + "step": 33420 + }, + { + "epoch": 0.642943345786394, + "grad_norm": 1567.923840090729, + "learning_rate": 5.760992018708253e-06, + "loss": 243.7918, + "step": 33430 + }, + { + "epoch": 0.6431356710468745, + "grad_norm": 1486.9123075394398, + "learning_rate": 5.755465139638983e-06, + "loss": 232.4583, + "step": 33440 + }, + { + "epoch": 0.643327996307355, + "grad_norm": 1570.8365556836536, + "learning_rate": 5.749939841588846e-06, + "loss": 255.1207, + "step": 33450 + }, + { + "epoch": 0.6435203215678356, + "grad_norm": 1407.0351624994057, + "learning_rate": 5.744416126615926e-06, + "loss": 243.4232, + "step": 33460 + }, + { + "epoch": 0.6437126468283161, + "grad_norm": 1580.1842310231264, + "learning_rate": 5.738893996777713e-06, + "loss": 249.9519, + "step": 33470 + }, + { + "epoch": 0.6439049720887966, + "grad_norm": 1593.3669439472326, + "learning_rate": 5.7333734541311144e-06, + "loss": 240.081, + "step": 33480 + }, + { + "epoch": 0.6440972973492771, + "grad_norm": 2612.285670751348, + "learning_rate": 5.727854500732435e-06, + "loss": 250.6562, + "step": 33490 + }, + { + "epoch": 0.6442896226097576, + "grad_norm": 1539.7385637260627, + "learning_rate": 5.722337138637402e-06, + "loss": 240.3438, + "step": 33500 + }, + { + "epoch": 0.6444819478702382, + "grad_norm": 1403.7126688946553, + "learning_rate": 5.716821369901131e-06, + "loss": 243.7391, + "step": 33510 + }, + { + "epoch": 0.6446742731307187, + "grad_norm": 1528.1058627803038, + "learning_rate": 5.711307196578166e-06, + "loss": 238.9891, + "step": 33520 + }, + { + "epoch": 0.6448665983911992, + "grad_norm": 1563.8369730986801, + "learning_rate": 5.70579462072244e-06, + "loss": 241.4954, + "step": 33530 + }, + { + "epoch": 0.6450589236516797, + "grad_norm": 1537.7667829895724, + "learning_rate": 5.700283644387292e-06, + "loss": 245.0168, + "step": 33540 + }, + { + "epoch": 0.6452512489121602, + "grad_norm": 1595.3647465891213, + "learning_rate": 5.6947742696254794e-06, + "loss": 249.0262, + "step": 33550 + }, + { + "epoch": 0.6454435741726408, + "grad_norm": 1668.3365718955092, + "learning_rate": 5.689266498489148e-06, + "loss": 245.9052, + "step": 33560 + }, + { + "epoch": 0.6456358994331213, + "grad_norm": 1498.947090389856, + "learning_rate": 5.683760333029851e-06, + "loss": 248.3906, + "step": 33570 + }, + { + "epoch": 0.6458282246936018, + "grad_norm": 1701.087014436955, + "learning_rate": 5.678255775298542e-06, + "loss": 240.099, + "step": 33580 + }, + { + "epoch": 0.6460205499540823, + "grad_norm": 1896.5405180718585, + "learning_rate": 5.672752827345584e-06, + "loss": 241.0282, + "step": 33590 + }, + { + "epoch": 0.6462128752145628, + "grad_norm": 1743.6078933233418, + "learning_rate": 5.667251491220731e-06, + "loss": 237.3629, + "step": 33600 + }, + { + "epoch": 0.6464052004750434, + "grad_norm": 1681.7415906054146, + "learning_rate": 5.661751768973136e-06, + "loss": 241.7321, + "step": 33610 + }, + { + "epoch": 0.6465975257355239, + "grad_norm": 1721.9831247416032, + "learning_rate": 5.656253662651362e-06, + "loss": 248.6104, + "step": 33620 + }, + { + "epoch": 0.6467898509960044, + "grad_norm": 1580.6107418236597, + "learning_rate": 5.650757174303356e-06, + "loss": 237.5195, + "step": 33630 + }, + { + "epoch": 0.646982176256485, + "grad_norm": 1564.6415974446613, + "learning_rate": 5.645262305976476e-06, + "loss": 245.6259, + "step": 33640 + }, + { + "epoch": 0.6471745015169655, + "grad_norm": 1659.3163505740938, + "learning_rate": 5.639769059717462e-06, + "loss": 240.3887, + "step": 33650 + }, + { + "epoch": 0.6473668267774461, + "grad_norm": 1782.4986364043527, + "learning_rate": 5.634277437572466e-06, + "loss": 243.8076, + "step": 33660 + }, + { + "epoch": 0.6475591520379266, + "grad_norm": 1543.5231035426991, + "learning_rate": 5.6287874415870225e-06, + "loss": 243.0699, + "step": 33670 + }, + { + "epoch": 0.6477514772984071, + "grad_norm": 1716.2955970962194, + "learning_rate": 5.623299073806063e-06, + "loss": 240.1385, + "step": 33680 + }, + { + "epoch": 0.6479438025588876, + "grad_norm": 1688.011132883271, + "learning_rate": 5.617812336273912e-06, + "loss": 244.3975, + "step": 33690 + }, + { + "epoch": 0.6481361278193681, + "grad_norm": 1563.549686292041, + "learning_rate": 5.612327231034296e-06, + "loss": 239.5934, + "step": 33700 + }, + { + "epoch": 0.6483284530798487, + "grad_norm": 1600.1830411580322, + "learning_rate": 5.606843760130321e-06, + "loss": 240.8768, + "step": 33710 + }, + { + "epoch": 0.6485207783403292, + "grad_norm": 1822.9876700064274, + "learning_rate": 5.601361925604485e-06, + "loss": 243.2996, + "step": 33720 + }, + { + "epoch": 0.6487131036008097, + "grad_norm": 1646.714053356861, + "learning_rate": 5.595881729498691e-06, + "loss": 238.8543, + "step": 33730 + }, + { + "epoch": 0.6489054288612902, + "grad_norm": 1663.792240115043, + "learning_rate": 5.590403173854215e-06, + "loss": 246.3007, + "step": 33740 + }, + { + "epoch": 0.6490977541217707, + "grad_norm": 1544.8035930567348, + "learning_rate": 5.584926260711732e-06, + "loss": 245.8514, + "step": 33750 + }, + { + "epoch": 0.6492900793822513, + "grad_norm": 1449.8989005270469, + "learning_rate": 5.579450992111294e-06, + "loss": 246.5289, + "step": 33760 + }, + { + "epoch": 0.6494824046427318, + "grad_norm": 1516.2265236731903, + "learning_rate": 5.573977370092358e-06, + "loss": 237.2555, + "step": 33770 + }, + { + "epoch": 0.6496747299032123, + "grad_norm": 1598.6996037292274, + "learning_rate": 5.568505396693749e-06, + "loss": 246.269, + "step": 33780 + }, + { + "epoch": 0.6498670551636928, + "grad_norm": 1643.7361251387388, + "learning_rate": 5.563035073953691e-06, + "loss": 249.9002, + "step": 33790 + }, + { + "epoch": 0.6500593804241733, + "grad_norm": 1534.8969275845736, + "learning_rate": 5.557566403909794e-06, + "loss": 236.6114, + "step": 33800 + }, + { + "epoch": 0.6502517056846538, + "grad_norm": 1583.1704136875312, + "learning_rate": 5.552099388599042e-06, + "loss": 245.5881, + "step": 33810 + }, + { + "epoch": 0.6504440309451344, + "grad_norm": 1628.260049827327, + "learning_rate": 5.5466340300578095e-06, + "loss": 237.9173, + "step": 33820 + }, + { + "epoch": 0.6506363562056149, + "grad_norm": 1594.8747911753942, + "learning_rate": 5.541170330321845e-06, + "loss": 241.4152, + "step": 33830 + }, + { + "epoch": 0.6508286814660955, + "grad_norm": 1535.6191317398784, + "learning_rate": 5.535708291426297e-06, + "loss": 244.6761, + "step": 33840 + }, + { + "epoch": 0.651021006726576, + "grad_norm": 1552.608541969007, + "learning_rate": 5.53024791540568e-06, + "loss": 239.3801, + "step": 33850 + }, + { + "epoch": 0.6512133319870566, + "grad_norm": 1543.8663426498124, + "learning_rate": 5.5247892042938944e-06, + "loss": 236.4334, + "step": 33860 + }, + { + "epoch": 0.6514056572475371, + "grad_norm": 1540.959945513679, + "learning_rate": 5.519332160124215e-06, + "loss": 242.037, + "step": 33870 + }, + { + "epoch": 0.6515979825080176, + "grad_norm": 1607.608344707393, + "learning_rate": 5.513876784929311e-06, + "loss": 237.8388, + "step": 33880 + }, + { + "epoch": 0.6517903077684981, + "grad_norm": 1655.9616162185432, + "learning_rate": 5.5084230807412135e-06, + "loss": 236.6411, + "step": 33890 + }, + { + "epoch": 0.6519826330289786, + "grad_norm": 1994.690495480855, + "learning_rate": 5.502971049591332e-06, + "loss": 249.3606, + "step": 33900 + }, + { + "epoch": 0.6521749582894592, + "grad_norm": 1486.854638150478, + "learning_rate": 5.497520693510469e-06, + "loss": 242.6046, + "step": 33910 + }, + { + "epoch": 0.6523672835499397, + "grad_norm": 1488.9397508013353, + "learning_rate": 5.492072014528783e-06, + "loss": 245.5518, + "step": 33920 + }, + { + "epoch": 0.6525596088104202, + "grad_norm": 1577.3181891064764, + "learning_rate": 5.4866250146758235e-06, + "loss": 243.3775, + "step": 33930 + }, + { + "epoch": 0.6527519340709007, + "grad_norm": 1440.5795182481272, + "learning_rate": 5.481179695980503e-06, + "loss": 243.203, + "step": 33940 + }, + { + "epoch": 0.6529442593313812, + "grad_norm": 1368.608328644047, + "learning_rate": 5.475736060471117e-06, + "loss": 247.5795, + "step": 33950 + }, + { + "epoch": 0.6531365845918617, + "grad_norm": 1500.5440019994876, + "learning_rate": 5.470294110175329e-06, + "loss": 242.9794, + "step": 33960 + }, + { + "epoch": 0.6533289098523423, + "grad_norm": 1505.3898057259446, + "learning_rate": 5.464853847120169e-06, + "loss": 237.1644, + "step": 33970 + }, + { + "epoch": 0.6535212351128228, + "grad_norm": 1565.3298648135217, + "learning_rate": 5.459415273332056e-06, + "loss": 244.4879, + "step": 33980 + }, + { + "epoch": 0.6537135603733033, + "grad_norm": 1558.1025582137224, + "learning_rate": 5.453978390836763e-06, + "loss": 236.0704, + "step": 33990 + }, + { + "epoch": 0.6539058856337838, + "grad_norm": 1480.863230972188, + "learning_rate": 5.44854320165944e-06, + "loss": 248.3781, + "step": 34000 + }, + { + "epoch": 0.6540982108942643, + "grad_norm": 1624.917563891583, + "learning_rate": 5.443109707824599e-06, + "loss": 249.1022, + "step": 34010 + }, + { + "epoch": 0.6542905361547449, + "grad_norm": 1540.8689369688175, + "learning_rate": 5.437677911356137e-06, + "loss": 241.2371, + "step": 34020 + }, + { + "epoch": 0.6544828614152254, + "grad_norm": 1661.2790109400378, + "learning_rate": 5.432247814277305e-06, + "loss": 236.8804, + "step": 34030 + }, + { + "epoch": 0.6546751866757059, + "grad_norm": 1910.3639870614024, + "learning_rate": 5.426819418610718e-06, + "loss": 248.7827, + "step": 34040 + }, + { + "epoch": 0.6548675119361865, + "grad_norm": 1558.603472536026, + "learning_rate": 5.4213927263783725e-06, + "loss": 236.9976, + "step": 34050 + }, + { + "epoch": 0.655059837196667, + "grad_norm": 1421.7470280716952, + "learning_rate": 5.415967739601616e-06, + "loss": 234.0622, + "step": 34060 + }, + { + "epoch": 0.6552521624571476, + "grad_norm": 1553.646499313457, + "learning_rate": 5.41054446030117e-06, + "loss": 241.8123, + "step": 34070 + }, + { + "epoch": 0.6554444877176281, + "grad_norm": 1851.6982961388287, + "learning_rate": 5.405122890497114e-06, + "loss": 250.3723, + "step": 34080 + }, + { + "epoch": 0.6556368129781086, + "grad_norm": 1561.6896288143896, + "learning_rate": 5.399703032208896e-06, + "loss": 243.1483, + "step": 34090 + }, + { + "epoch": 0.6558291382385891, + "grad_norm": 1577.121166693554, + "learning_rate": 5.3942848874553235e-06, + "loss": 240.305, + "step": 34100 + }, + { + "epoch": 0.6560214634990696, + "grad_norm": 1475.4204529224933, + "learning_rate": 5.388868458254565e-06, + "loss": 233.7801, + "step": 34110 + }, + { + "epoch": 0.6562137887595502, + "grad_norm": 1507.669425403277, + "learning_rate": 5.3834537466241455e-06, + "loss": 238.5951, + "step": 34120 + }, + { + "epoch": 0.6564061140200307, + "grad_norm": 1383.2948893470773, + "learning_rate": 5.378040754580964e-06, + "loss": 234.3419, + "step": 34130 + }, + { + "epoch": 0.6565984392805112, + "grad_norm": 1587.0554629770497, + "learning_rate": 5.37262948414127e-06, + "loss": 244.7607, + "step": 34140 + }, + { + "epoch": 0.6567907645409917, + "grad_norm": 1728.0160670652772, + "learning_rate": 5.367219937320663e-06, + "loss": 236.5773, + "step": 34150 + }, + { + "epoch": 0.6569830898014722, + "grad_norm": 1497.7001376726575, + "learning_rate": 5.361812116134122e-06, + "loss": 240.3799, + "step": 34160 + }, + { + "epoch": 0.6571754150619528, + "grad_norm": 1649.5838296742736, + "learning_rate": 5.356406022595963e-06, + "loss": 245.3542, + "step": 34170 + }, + { + "epoch": 0.6573677403224333, + "grad_norm": 1558.1627200630019, + "learning_rate": 5.351001658719872e-06, + "loss": 244.3351, + "step": 34180 + }, + { + "epoch": 0.6575600655829138, + "grad_norm": 1353.5702789873073, + "learning_rate": 5.345599026518877e-06, + "loss": 242.9741, + "step": 34190 + }, + { + "epoch": 0.6577523908433943, + "grad_norm": 1537.4491165065745, + "learning_rate": 5.3401981280053745e-06, + "loss": 250.0492, + "step": 34200 + }, + { + "epoch": 0.6579447161038748, + "grad_norm": 1556.1483851977846, + "learning_rate": 5.334798965191115e-06, + "loss": 241.9707, + "step": 34210 + }, + { + "epoch": 0.6581370413643554, + "grad_norm": 1591.265777770099, + "learning_rate": 5.329401540087188e-06, + "loss": 237.3961, + "step": 34220 + }, + { + "epoch": 0.6583293666248359, + "grad_norm": 1527.0948515868372, + "learning_rate": 5.3240058547040525e-06, + "loss": 234.2487, + "step": 34230 + }, + { + "epoch": 0.6585216918853164, + "grad_norm": 1463.990784982395, + "learning_rate": 5.318611911051512e-06, + "loss": 239.1191, + "step": 34240 + }, + { + "epoch": 0.658714017145797, + "grad_norm": 1430.5563353294062, + "learning_rate": 5.313219711138717e-06, + "loss": 240.6146, + "step": 34250 + }, + { + "epoch": 0.6589063424062775, + "grad_norm": 1605.3487600450017, + "learning_rate": 5.307829256974173e-06, + "loss": 235.8148, + "step": 34260 + }, + { + "epoch": 0.6590986676667581, + "grad_norm": 1535.623240763071, + "learning_rate": 5.302440550565739e-06, + "loss": 230.1232, + "step": 34270 + }, + { + "epoch": 0.6592909929272386, + "grad_norm": 1586.308704450949, + "learning_rate": 5.297053593920618e-06, + "loss": 232.7758, + "step": 34280 + }, + { + "epoch": 0.6594833181877191, + "grad_norm": 1455.5176649254452, + "learning_rate": 5.291668389045362e-06, + "loss": 238.7155, + "step": 34290 + }, + { + "epoch": 0.6596756434481996, + "grad_norm": 1566.9981409277634, + "learning_rate": 5.286284937945866e-06, + "loss": 241.0195, + "step": 34300 + }, + { + "epoch": 0.6598679687086801, + "grad_norm": 1591.0879362389355, + "learning_rate": 5.280903242627384e-06, + "loss": 244.9684, + "step": 34310 + }, + { + "epoch": 0.6600602939691607, + "grad_norm": 3271.3285393791975, + "learning_rate": 5.2755233050945076e-06, + "loss": 245.5639, + "step": 34320 + }, + { + "epoch": 0.6602526192296412, + "grad_norm": 1682.0916508380708, + "learning_rate": 5.270145127351168e-06, + "loss": 247.4215, + "step": 34330 + }, + { + "epoch": 0.6604449444901217, + "grad_norm": 1789.31363571357, + "learning_rate": 5.264768711400656e-06, + "loss": 241.0164, + "step": 34340 + }, + { + "epoch": 0.6606372697506022, + "grad_norm": 1719.6523963782524, + "learning_rate": 5.25939405924559e-06, + "loss": 242.2833, + "step": 34350 + }, + { + "epoch": 0.6608295950110827, + "grad_norm": 1601.5005012523118, + "learning_rate": 5.254021172887947e-06, + "loss": 247.5436, + "step": 34360 + }, + { + "epoch": 0.6610219202715633, + "grad_norm": 1612.383745768123, + "learning_rate": 5.248650054329032e-06, + "loss": 247.6403, + "step": 34370 + }, + { + "epoch": 0.6612142455320438, + "grad_norm": 1623.4026519042443, + "learning_rate": 5.2432807055695035e-06, + "loss": 247.4154, + "step": 34380 + }, + { + "epoch": 0.6614065707925243, + "grad_norm": 1579.6288849252182, + "learning_rate": 5.237913128609352e-06, + "loss": 243.2603, + "step": 34390 + }, + { + "epoch": 0.6615988960530048, + "grad_norm": 1455.270238227104, + "learning_rate": 5.232547325447908e-06, + "loss": 250.4422, + "step": 34400 + }, + { + "epoch": 0.6617912213134853, + "grad_norm": 1630.009850769735, + "learning_rate": 5.227183298083854e-06, + "loss": 239.656, + "step": 34410 + }, + { + "epoch": 0.6619835465739659, + "grad_norm": 1751.6551799830158, + "learning_rate": 5.221821048515193e-06, + "loss": 242.0374, + "step": 34420 + }, + { + "epoch": 0.6621758718344464, + "grad_norm": 1514.6136013261407, + "learning_rate": 5.216460578739278e-06, + "loss": 236.949, + "step": 34430 + }, + { + "epoch": 0.6623681970949269, + "grad_norm": 1515.4167284499051, + "learning_rate": 5.211101890752792e-06, + "loss": 236.9444, + "step": 34440 + }, + { + "epoch": 0.6625605223554074, + "grad_norm": 1554.3141051753453, + "learning_rate": 5.205744986551763e-06, + "loss": 240.2105, + "step": 34450 + }, + { + "epoch": 0.662752847615888, + "grad_norm": 1677.5415540828621, + "learning_rate": 5.200389868131547e-06, + "loss": 248.5601, + "step": 34460 + }, + { + "epoch": 0.6629451728763686, + "grad_norm": 1559.733496865752, + "learning_rate": 5.195036537486833e-06, + "loss": 247.374, + "step": 34470 + }, + { + "epoch": 0.6631374981368491, + "grad_norm": 1550.7657091462142, + "learning_rate": 5.189684996611657e-06, + "loss": 246.8185, + "step": 34480 + }, + { + "epoch": 0.6633298233973296, + "grad_norm": 1428.9433311814219, + "learning_rate": 5.18433524749937e-06, + "loss": 235.8814, + "step": 34490 + }, + { + "epoch": 0.6635221486578101, + "grad_norm": 1799.0210163625984, + "learning_rate": 5.178987292142674e-06, + "loss": 252.3958, + "step": 34500 + }, + { + "epoch": 0.6637144739182906, + "grad_norm": 1443.1877247379198, + "learning_rate": 5.173641132533586e-06, + "loss": 240.2568, + "step": 34510 + }, + { + "epoch": 0.6639067991787712, + "grad_norm": 1635.034512203725, + "learning_rate": 5.16829677066347e-06, + "loss": 247.8541, + "step": 34520 + }, + { + "epoch": 0.6640991244392517, + "grad_norm": 1674.1642518045078, + "learning_rate": 5.16295420852301e-06, + "loss": 238.8359, + "step": 34530 + }, + { + "epoch": 0.6642914496997322, + "grad_norm": 1542.1724582160432, + "learning_rate": 5.157613448102219e-06, + "loss": 243.5318, + "step": 34540 + }, + { + "epoch": 0.6644837749602127, + "grad_norm": 1359.6359175561622, + "learning_rate": 5.152274491390441e-06, + "loss": 241.1148, + "step": 34550 + }, + { + "epoch": 0.6646761002206932, + "grad_norm": 1633.8622693225025, + "learning_rate": 5.1469373403763555e-06, + "loss": 243.5518, + "step": 34560 + }, + { + "epoch": 0.6648684254811738, + "grad_norm": 1550.9344748639865, + "learning_rate": 5.14160199704796e-06, + "loss": 238.6767, + "step": 34570 + }, + { + "epoch": 0.6650607507416543, + "grad_norm": 1499.9267239020364, + "learning_rate": 5.136268463392578e-06, + "loss": 237.2557, + "step": 34580 + }, + { + "epoch": 0.6652530760021348, + "grad_norm": 1665.5736182747498, + "learning_rate": 5.13093674139687e-06, + "loss": 239.5459, + "step": 34590 + }, + { + "epoch": 0.6654454012626153, + "grad_norm": 1485.474032002765, + "learning_rate": 5.12560683304681e-06, + "loss": 235.1012, + "step": 34600 + }, + { + "epoch": 0.6656377265230958, + "grad_norm": 1669.3176931646353, + "learning_rate": 5.120278740327702e-06, + "loss": 238.2485, + "step": 34610 + }, + { + "epoch": 0.6658300517835763, + "grad_norm": 1429.0662090165415, + "learning_rate": 5.114952465224168e-06, + "loss": 240.7864, + "step": 34620 + }, + { + "epoch": 0.6660223770440569, + "grad_norm": 1574.384026635983, + "learning_rate": 5.109628009720162e-06, + "loss": 247.8457, + "step": 34630 + }, + { + "epoch": 0.6662147023045374, + "grad_norm": 1974.0278155291112, + "learning_rate": 5.104305375798958e-06, + "loss": 242.7548, + "step": 34640 + }, + { + "epoch": 0.6664070275650179, + "grad_norm": 1865.3875014337664, + "learning_rate": 5.098984565443148e-06, + "loss": 244.976, + "step": 34650 + }, + { + "epoch": 0.6665993528254985, + "grad_norm": 1799.3689359589089, + "learning_rate": 5.093665580634639e-06, + "loss": 236.0072, + "step": 34660 + }, + { + "epoch": 0.6667916780859791, + "grad_norm": 1586.830044361748, + "learning_rate": 5.088348423354674e-06, + "loss": 236.3255, + "step": 34670 + }, + { + "epoch": 0.6669840033464596, + "grad_norm": 1477.1281301542635, + "learning_rate": 5.0830330955838045e-06, + "loss": 244.4166, + "step": 34680 + }, + { + "epoch": 0.6671763286069401, + "grad_norm": 1453.389875571511, + "learning_rate": 5.077719599301895e-06, + "loss": 241.1291, + "step": 34690 + }, + { + "epoch": 0.6673686538674206, + "grad_norm": 1741.008433516878, + "learning_rate": 5.072407936488145e-06, + "loss": 254.7444, + "step": 34700 + }, + { + "epoch": 0.6675609791279011, + "grad_norm": 1450.1987424526549, + "learning_rate": 5.067098109121058e-06, + "loss": 240.3621, + "step": 34710 + }, + { + "epoch": 0.6677533043883817, + "grad_norm": 1656.220009503437, + "learning_rate": 5.0617901191784536e-06, + "loss": 240.9367, + "step": 34720 + }, + { + "epoch": 0.6679456296488622, + "grad_norm": 1482.6727507809046, + "learning_rate": 5.05648396863747e-06, + "loss": 240.2776, + "step": 34730 + }, + { + "epoch": 0.6681379549093427, + "grad_norm": 1811.330848051615, + "learning_rate": 5.051179659474568e-06, + "loss": 242.9302, + "step": 34740 + }, + { + "epoch": 0.6683302801698232, + "grad_norm": 1593.0864925162757, + "learning_rate": 5.045877193665508e-06, + "loss": 243.2492, + "step": 34750 + }, + { + "epoch": 0.6685226054303037, + "grad_norm": 1649.0097673598555, + "learning_rate": 5.040576573185372e-06, + "loss": 240.529, + "step": 34760 + }, + { + "epoch": 0.6687149306907842, + "grad_norm": 1683.9299016641, + "learning_rate": 5.0352778000085565e-06, + "loss": 238.4384, + "step": 34770 + }, + { + "epoch": 0.6689072559512648, + "grad_norm": 1576.440869984676, + "learning_rate": 5.029980876108762e-06, + "loss": 242.4429, + "step": 34780 + }, + { + "epoch": 0.6690995812117453, + "grad_norm": 1422.2642081678534, + "learning_rate": 5.024685803459013e-06, + "loss": 234.8015, + "step": 34790 + }, + { + "epoch": 0.6692919064722258, + "grad_norm": 1499.545862262694, + "learning_rate": 5.019392584031628e-06, + "loss": 232.897, + "step": 34800 + }, + { + "epoch": 0.6694842317327063, + "grad_norm": 1664.45033227328, + "learning_rate": 5.0141012197982534e-06, + "loss": 248.728, + "step": 34810 + }, + { + "epoch": 0.6696765569931868, + "grad_norm": 1612.1865810594797, + "learning_rate": 5.0088117127298285e-06, + "loss": 237.6479, + "step": 34820 + }, + { + "epoch": 0.6698688822536674, + "grad_norm": 1462.8324785069049, + "learning_rate": 5.003524064796608e-06, + "loss": 240.4195, + "step": 34830 + }, + { + "epoch": 0.6700612075141479, + "grad_norm": 1560.2940839790592, + "learning_rate": 4.998238277968149e-06, + "loss": 242.2532, + "step": 34840 + }, + { + "epoch": 0.6702535327746284, + "grad_norm": 1653.3102655389425, + "learning_rate": 4.992954354213331e-06, + "loss": 236.3106, + "step": 34850 + }, + { + "epoch": 0.6704458580351089, + "grad_norm": 1717.3012319069778, + "learning_rate": 4.98767229550032e-06, + "loss": 245.5809, + "step": 34860 + }, + { + "epoch": 0.6706381832955896, + "grad_norm": 1649.3152282098256, + "learning_rate": 4.982392103796595e-06, + "loss": 236.3104, + "step": 34870 + }, + { + "epoch": 0.6708305085560701, + "grad_norm": 1592.2440026780055, + "learning_rate": 4.977113781068945e-06, + "loss": 245.7599, + "step": 34880 + }, + { + "epoch": 0.6710228338165506, + "grad_norm": 1477.387494514049, + "learning_rate": 4.971837329283458e-06, + "loss": 243.1606, + "step": 34890 + }, + { + "epoch": 0.6712151590770311, + "grad_norm": 1510.9082515170664, + "learning_rate": 4.966562750405517e-06, + "loss": 249.1312, + "step": 34900 + }, + { + "epoch": 0.6714074843375116, + "grad_norm": 1667.1164304552096, + "learning_rate": 4.9612900463998274e-06, + "loss": 239.2116, + "step": 34910 + }, + { + "epoch": 0.6715998095979921, + "grad_norm": 1596.9511155995442, + "learning_rate": 4.9560192192303735e-06, + "loss": 250.2845, + "step": 34920 + }, + { + "epoch": 0.6717921348584727, + "grad_norm": 1638.6961634633678, + "learning_rate": 4.95075027086046e-06, + "loss": 236.8568, + "step": 34930 + }, + { + "epoch": 0.6719844601189532, + "grad_norm": 1707.5169579675846, + "learning_rate": 4.9454832032526755e-06, + "loss": 247.0217, + "step": 34940 + }, + { + "epoch": 0.6721767853794337, + "grad_norm": 1534.7959342305119, + "learning_rate": 4.940218018368924e-06, + "loss": 240.9309, + "step": 34950 + }, + { + "epoch": 0.6723691106399142, + "grad_norm": 1477.2685584523297, + "learning_rate": 4.934954718170396e-06, + "loss": 238.7597, + "step": 34960 + }, + { + "epoch": 0.6725614359003947, + "grad_norm": 1573.6164713650567, + "learning_rate": 4.9296933046175834e-06, + "loss": 234.4981, + "step": 34970 + }, + { + "epoch": 0.6727537611608753, + "grad_norm": 1660.8753066222212, + "learning_rate": 4.924433779670271e-06, + "loss": 234.8812, + "step": 34980 + }, + { + "epoch": 0.6729460864213558, + "grad_norm": 1472.2716227435594, + "learning_rate": 4.9191761452875554e-06, + "loss": 248.6342, + "step": 34990 + }, + { + "epoch": 0.6731384116818363, + "grad_norm": 1549.4541004053892, + "learning_rate": 4.913920403427812e-06, + "loss": 238.4336, + "step": 35000 + }, + { + "epoch": 0.6733307369423168, + "grad_norm": 1985.7517243036866, + "learning_rate": 4.908666556048719e-06, + "loss": 236.0099, + "step": 35010 + }, + { + "epoch": 0.6735230622027973, + "grad_norm": 1546.281868327463, + "learning_rate": 4.903414605107244e-06, + "loss": 237.7881, + "step": 35020 + }, + { + "epoch": 0.6737153874632779, + "grad_norm": 1556.804223861094, + "learning_rate": 4.89816455255966e-06, + "loss": 239.3844, + "step": 35030 + }, + { + "epoch": 0.6739077127237584, + "grad_norm": 1660.4441269248118, + "learning_rate": 4.892916400361516e-06, + "loss": 241.0597, + "step": 35040 + }, + { + "epoch": 0.6741000379842389, + "grad_norm": 1537.1815918548596, + "learning_rate": 4.887670150467671e-06, + "loss": 240.1067, + "step": 35050 + }, + { + "epoch": 0.6742923632447194, + "grad_norm": 1374.8726925474718, + "learning_rate": 4.882425804832258e-06, + "loss": 239.3176, + "step": 35060 + }, + { + "epoch": 0.6744846885052, + "grad_norm": 13431.556008525862, + "learning_rate": 4.8771833654087165e-06, + "loss": 241.3402, + "step": 35070 + }, + { + "epoch": 0.6746770137656806, + "grad_norm": 1681.119131350578, + "learning_rate": 4.8719428341497665e-06, + "loss": 240.1362, + "step": 35080 + }, + { + "epoch": 0.6748693390261611, + "grad_norm": 1565.9377754503635, + "learning_rate": 4.866704213007413e-06, + "loss": 243.6768, + "step": 35090 + }, + { + "epoch": 0.6750616642866416, + "grad_norm": 1595.1903327164287, + "learning_rate": 4.861467503932965e-06, + "loss": 242.1631, + "step": 35100 + }, + { + "epoch": 0.6752539895471221, + "grad_norm": 1430.9472111458415, + "learning_rate": 4.856232708877007e-06, + "loss": 234.3875, + "step": 35110 + }, + { + "epoch": 0.6754463148076026, + "grad_norm": 1757.7523260708806, + "learning_rate": 4.8509998297894075e-06, + "loss": 245.6894, + "step": 35120 + }, + { + "epoch": 0.6756386400680832, + "grad_norm": 1709.9388251090809, + "learning_rate": 4.845768868619338e-06, + "loss": 239.7329, + "step": 35130 + }, + { + "epoch": 0.6758309653285637, + "grad_norm": 1790.13059566027, + "learning_rate": 4.8405398273152405e-06, + "loss": 260.7546, + "step": 35140 + }, + { + "epoch": 0.6760232905890442, + "grad_norm": 1695.99320182758, + "learning_rate": 4.8353127078248455e-06, + "loss": 250.915, + "step": 35150 + }, + { + "epoch": 0.6762156158495247, + "grad_norm": 1654.7286934425988, + "learning_rate": 4.830087512095164e-06, + "loss": 236.3527, + "step": 35160 + }, + { + "epoch": 0.6764079411100052, + "grad_norm": 1518.8809802475955, + "learning_rate": 4.824864242072506e-06, + "loss": 236.217, + "step": 35170 + }, + { + "epoch": 0.6766002663704858, + "grad_norm": 1676.1492450616627, + "learning_rate": 4.819642899702447e-06, + "loss": 239.2257, + "step": 35180 + }, + { + "epoch": 0.6767925916309663, + "grad_norm": 1604.8433463634062, + "learning_rate": 4.814423486929846e-06, + "loss": 238.3168, + "step": 35190 + }, + { + "epoch": 0.6769849168914468, + "grad_norm": 1555.6284985922198, + "learning_rate": 4.809206005698856e-06, + "loss": 236.2358, + "step": 35200 + }, + { + "epoch": 0.6771772421519273, + "grad_norm": 1501.771519891142, + "learning_rate": 4.803990457952903e-06, + "loss": 250.7805, + "step": 35210 + }, + { + "epoch": 0.6773695674124078, + "grad_norm": 1598.6267349997, + "learning_rate": 4.798776845634688e-06, + "loss": 235.7079, + "step": 35220 + }, + { + "epoch": 0.6775618926728884, + "grad_norm": 1610.666340493888, + "learning_rate": 4.793565170686196e-06, + "loss": 242.6579, + "step": 35230 + }, + { + "epoch": 0.6777542179333689, + "grad_norm": 1514.145068771424, + "learning_rate": 4.7883554350486925e-06, + "loss": 246.1071, + "step": 35240 + }, + { + "epoch": 0.6779465431938494, + "grad_norm": 1569.7645123417265, + "learning_rate": 4.783147640662716e-06, + "loss": 234.9182, + "step": 35250 + }, + { + "epoch": 0.6781388684543299, + "grad_norm": 1843.8223095579276, + "learning_rate": 4.777941789468086e-06, + "loss": 238.5698, + "step": 35260 + }, + { + "epoch": 0.6783311937148104, + "grad_norm": 1702.281904489595, + "learning_rate": 4.772737883403889e-06, + "loss": 247.3601, + "step": 35270 + }, + { + "epoch": 0.6785235189752911, + "grad_norm": 1628.4594832928703, + "learning_rate": 4.767535924408504e-06, + "loss": 242.4072, + "step": 35280 + }, + { + "epoch": 0.6787158442357716, + "grad_norm": 1548.370651253724, + "learning_rate": 4.76233591441957e-06, + "loss": 241.6772, + "step": 35290 + }, + { + "epoch": 0.6789081694962521, + "grad_norm": 1793.823798929674, + "learning_rate": 4.757137855374001e-06, + "loss": 236.6288, + "step": 35300 + }, + { + "epoch": 0.6791004947567326, + "grad_norm": 1642.3865718885472, + "learning_rate": 4.751941749207996e-06, + "loss": 242.9599, + "step": 35310 + }, + { + "epoch": 0.6792928200172131, + "grad_norm": 1465.3930417500717, + "learning_rate": 4.746747597857014e-06, + "loss": 238.4086, + "step": 35320 + }, + { + "epoch": 0.6794851452776937, + "grad_norm": 1535.2361090234504, + "learning_rate": 4.74155540325579e-06, + "loss": 241.5043, + "step": 35330 + }, + { + "epoch": 0.6796774705381742, + "grad_norm": 1792.7497882019732, + "learning_rate": 4.736365167338333e-06, + "loss": 247.4346, + "step": 35340 + }, + { + "epoch": 0.6798697957986547, + "grad_norm": 1669.936825913557, + "learning_rate": 4.731176892037918e-06, + "loss": 235.9599, + "step": 35350 + }, + { + "epoch": 0.6800621210591352, + "grad_norm": 1473.0361533499533, + "learning_rate": 4.725990579287097e-06, + "loss": 233.579, + "step": 35360 + }, + { + "epoch": 0.6802544463196157, + "grad_norm": 2305.0392250606014, + "learning_rate": 4.720806231017676e-06, + "loss": 242.3682, + "step": 35370 + }, + { + "epoch": 0.6804467715800963, + "grad_norm": 1739.121755322293, + "learning_rate": 4.7156238491607506e-06, + "loss": 241.138, + "step": 35380 + }, + { + "epoch": 0.6806390968405768, + "grad_norm": 1638.4784515221852, + "learning_rate": 4.710443435646666e-06, + "loss": 237.0229, + "step": 35390 + }, + { + "epoch": 0.6808314221010573, + "grad_norm": 1481.2497186672954, + "learning_rate": 4.705264992405043e-06, + "loss": 236.662, + "step": 35400 + }, + { + "epoch": 0.6810237473615378, + "grad_norm": 1684.8178681389418, + "learning_rate": 4.700088521364761e-06, + "loss": 236.6188, + "step": 35410 + }, + { + "epoch": 0.6812160726220183, + "grad_norm": 1562.625843975324, + "learning_rate": 4.694914024453977e-06, + "loss": 241.3342, + "step": 35420 + }, + { + "epoch": 0.6814083978824989, + "grad_norm": 1727.2125847675445, + "learning_rate": 4.689741503600103e-06, + "loss": 240.6263, + "step": 35430 + }, + { + "epoch": 0.6816007231429794, + "grad_norm": 1902.1493212950902, + "learning_rate": 4.684570960729818e-06, + "loss": 240.4563, + "step": 35440 + }, + { + "epoch": 0.6817930484034599, + "grad_norm": 1510.626143137195, + "learning_rate": 4.679402397769057e-06, + "loss": 234.0074, + "step": 35450 + }, + { + "epoch": 0.6819853736639404, + "grad_norm": 1834.008118031445, + "learning_rate": 4.674235816643035e-06, + "loss": 251.336, + "step": 35460 + }, + { + "epoch": 0.6821776989244209, + "grad_norm": 1391.0513980372607, + "learning_rate": 4.66907121927621e-06, + "loss": 237.735, + "step": 35470 + }, + { + "epoch": 0.6823700241849016, + "grad_norm": 1525.4357475208928, + "learning_rate": 4.663908607592316e-06, + "loss": 235.0554, + "step": 35480 + }, + { + "epoch": 0.6825623494453821, + "grad_norm": 1417.3135561199801, + "learning_rate": 4.658747983514334e-06, + "loss": 229.8492, + "step": 35490 + }, + { + "epoch": 0.6827546747058626, + "grad_norm": 1576.8775379958165, + "learning_rate": 4.653589348964517e-06, + "loss": 238.249, + "step": 35500 + }, + { + "epoch": 0.6829469999663431, + "grad_norm": 1602.8624964794892, + "learning_rate": 4.648432705864369e-06, + "loss": 259.1102, + "step": 35510 + }, + { + "epoch": 0.6831393252268236, + "grad_norm": 1517.6239958002438, + "learning_rate": 4.64327805613465e-06, + "loss": 246.4283, + "step": 35520 + }, + { + "epoch": 0.6833316504873042, + "grad_norm": 1534.8169871231958, + "learning_rate": 4.638125401695391e-06, + "loss": 236.2565, + "step": 35530 + }, + { + "epoch": 0.6835239757477847, + "grad_norm": 1584.166090396211, + "learning_rate": 4.632974744465865e-06, + "loss": 244.4494, + "step": 35540 + }, + { + "epoch": 0.6837163010082652, + "grad_norm": 1473.9590014564478, + "learning_rate": 4.627826086364603e-06, + "loss": 246.8307, + "step": 35550 + }, + { + "epoch": 0.6839086262687457, + "grad_norm": 2086.6172954712015, + "learning_rate": 4.622679429309404e-06, + "loss": 240.9162, + "step": 35560 + }, + { + "epoch": 0.6841009515292262, + "grad_norm": 1533.2570787807176, + "learning_rate": 4.617534775217307e-06, + "loss": 229.8946, + "step": 35570 + }, + { + "epoch": 0.6842932767897068, + "grad_norm": 1578.873240031328, + "learning_rate": 4.6123921260046135e-06, + "loss": 234.8505, + "step": 35580 + }, + { + "epoch": 0.6844856020501873, + "grad_norm": 1577.3051897353623, + "learning_rate": 4.607251483586869e-06, + "loss": 244.1948, + "step": 35590 + }, + { + "epoch": 0.6846779273106678, + "grad_norm": 1467.8752814913016, + "learning_rate": 4.6021128498788855e-06, + "loss": 231.7717, + "step": 35600 + }, + { + "epoch": 0.6848702525711483, + "grad_norm": 1424.6796093895823, + "learning_rate": 4.596976226794718e-06, + "loss": 232.1213, + "step": 35610 + }, + { + "epoch": 0.6850625778316288, + "grad_norm": 1571.2834264316803, + "learning_rate": 4.591841616247669e-06, + "loss": 245.3532, + "step": 35620 + }, + { + "epoch": 0.6852549030921093, + "grad_norm": 1676.7221917131023, + "learning_rate": 4.586709020150297e-06, + "loss": 236.3314, + "step": 35630 + }, + { + "epoch": 0.6854472283525899, + "grad_norm": 1692.7840442568893, + "learning_rate": 4.581578440414417e-06, + "loss": 235.2596, + "step": 35640 + }, + { + "epoch": 0.6856395536130704, + "grad_norm": 1527.4220045571374, + "learning_rate": 4.576449878951079e-06, + "loss": 241.827, + "step": 35650 + }, + { + "epoch": 0.6858318788735509, + "grad_norm": 1589.0963502807458, + "learning_rate": 4.571323337670584e-06, + "loss": 238.6202, + "step": 35660 + }, + { + "epoch": 0.6860242041340314, + "grad_norm": 1476.2895786090012, + "learning_rate": 4.566198818482494e-06, + "loss": 240.941, + "step": 35670 + }, + { + "epoch": 0.686216529394512, + "grad_norm": 1486.6620378889293, + "learning_rate": 4.5610763232956e-06, + "loss": 241.7036, + "step": 35680 + }, + { + "epoch": 0.6864088546549926, + "grad_norm": 1613.6380638201672, + "learning_rate": 4.55595585401795e-06, + "loss": 248.1098, + "step": 35690 + }, + { + "epoch": 0.6866011799154731, + "grad_norm": 1677.7797662336263, + "learning_rate": 4.550837412556828e-06, + "loss": 249.7874, + "step": 35700 + }, + { + "epoch": 0.6867935051759536, + "grad_norm": 1636.632598513363, + "learning_rate": 4.545721000818778e-06, + "loss": 247.96, + "step": 35710 + }, + { + "epoch": 0.6869858304364341, + "grad_norm": 1561.3114680973345, + "learning_rate": 4.540606620709575e-06, + "loss": 255.0879, + "step": 35720 + }, + { + "epoch": 0.6871781556969146, + "grad_norm": 1508.6229173893246, + "learning_rate": 4.535494274134236e-06, + "loss": 245.9509, + "step": 35730 + }, + { + "epoch": 0.6873704809573952, + "grad_norm": 1597.9553692072259, + "learning_rate": 4.530383962997036e-06, + "loss": 240.9111, + "step": 35740 + }, + { + "epoch": 0.6875628062178757, + "grad_norm": 1715.5095172633003, + "learning_rate": 4.525275689201476e-06, + "loss": 250.077, + "step": 35750 + }, + { + "epoch": 0.6877551314783562, + "grad_norm": 1799.9672307505061, + "learning_rate": 4.520169454650299e-06, + "loss": 242.9612, + "step": 35760 + }, + { + "epoch": 0.6879474567388367, + "grad_norm": 1682.473233411806, + "learning_rate": 4.515065261245498e-06, + "loss": 244.1876, + "step": 35770 + }, + { + "epoch": 0.6881397819993172, + "grad_norm": 1641.163603961195, + "learning_rate": 4.509963110888305e-06, + "loss": 239.5544, + "step": 35780 + }, + { + "epoch": 0.6883321072597978, + "grad_norm": 1595.9848110961755, + "learning_rate": 4.504863005479182e-06, + "loss": 234.0018, + "step": 35790 + }, + { + "epoch": 0.6885244325202783, + "grad_norm": 1766.9205349420956, + "learning_rate": 4.499764946917834e-06, + "loss": 243.1572, + "step": 35800 + }, + { + "epoch": 0.6887167577807588, + "grad_norm": 1565.7807598365366, + "learning_rate": 4.494668937103201e-06, + "loss": 240.1978, + "step": 35810 + }, + { + "epoch": 0.6889090830412393, + "grad_norm": 1578.7619336539742, + "learning_rate": 4.489574977933467e-06, + "loss": 235.9885, + "step": 35820 + }, + { + "epoch": 0.6891014083017198, + "grad_norm": 1560.615096373814, + "learning_rate": 4.484483071306048e-06, + "loss": 246.358, + "step": 35830 + }, + { + "epoch": 0.6892937335622004, + "grad_norm": 1433.1483361624723, + "learning_rate": 4.479393219117588e-06, + "loss": 235.0705, + "step": 35840 + }, + { + "epoch": 0.6894860588226809, + "grad_norm": 1432.4922794531635, + "learning_rate": 4.474305423263984e-06, + "loss": 236.9843, + "step": 35850 + }, + { + "epoch": 0.6896783840831614, + "grad_norm": 1669.7486936920873, + "learning_rate": 4.469219685640348e-06, + "loss": 232.8931, + "step": 35860 + }, + { + "epoch": 0.6898707093436419, + "grad_norm": 1670.3213881608501, + "learning_rate": 4.4641360081410356e-06, + "loss": 239.7855, + "step": 35870 + }, + { + "epoch": 0.6900630346041224, + "grad_norm": 1769.9006754087213, + "learning_rate": 4.4590543926596285e-06, + "loss": 243.5827, + "step": 35880 + }, + { + "epoch": 0.6902553598646031, + "grad_norm": 1546.2195805647068, + "learning_rate": 4.453974841088953e-06, + "loss": 244.6879, + "step": 35890 + }, + { + "epoch": 0.6904476851250836, + "grad_norm": 1529.8960950734172, + "learning_rate": 4.448897355321049e-06, + "loss": 238.6065, + "step": 35900 + }, + { + "epoch": 0.6906400103855641, + "grad_norm": 1515.8719221340143, + "learning_rate": 4.443821937247205e-06, + "loss": 237.8188, + "step": 35910 + }, + { + "epoch": 0.6908323356460446, + "grad_norm": 1620.5687062007185, + "learning_rate": 4.438748588757922e-06, + "loss": 238.8739, + "step": 35920 + }, + { + "epoch": 0.6910246609065251, + "grad_norm": 1553.720992753021, + "learning_rate": 4.433677311742945e-06, + "loss": 236.6553, + "step": 35930 + }, + { + "epoch": 0.6912169861670057, + "grad_norm": 1592.7902512353996, + "learning_rate": 4.428608108091241e-06, + "loss": 236.2994, + "step": 35940 + }, + { + "epoch": 0.6914093114274862, + "grad_norm": 1512.5315467280025, + "learning_rate": 4.4235409796909965e-06, + "loss": 237.001, + "step": 35950 + }, + { + "epoch": 0.6916016366879667, + "grad_norm": 1592.8641353374699, + "learning_rate": 4.418475928429644e-06, + "loss": 237.5281, + "step": 35960 + }, + { + "epoch": 0.6917939619484472, + "grad_norm": 1673.5055085300598, + "learning_rate": 4.413412956193826e-06, + "loss": 247.5375, + "step": 35970 + }, + { + "epoch": 0.6919862872089277, + "grad_norm": 1658.7893733343092, + "learning_rate": 4.4083520648694156e-06, + "loss": 230.0056, + "step": 35980 + }, + { + "epoch": 0.6921786124694083, + "grad_norm": 1564.9546440812223, + "learning_rate": 4.4032932563415075e-06, + "loss": 236.3262, + "step": 35990 + }, + { + "epoch": 0.6923709377298888, + "grad_norm": 1537.2178303272024, + "learning_rate": 4.398236532494434e-06, + "loss": 237.2992, + "step": 36000 + }, + { + "epoch": 0.6925632629903693, + "grad_norm": 1657.592525169498, + "learning_rate": 4.393181895211735e-06, + "loss": 248.6368, + "step": 36010 + }, + { + "epoch": 0.6927555882508498, + "grad_norm": 1578.9140177466093, + "learning_rate": 4.388129346376177e-06, + "loss": 236.5844, + "step": 36020 + }, + { + "epoch": 0.6929479135113303, + "grad_norm": 1603.4710988208183, + "learning_rate": 4.383078887869759e-06, + "loss": 234.7896, + "step": 36030 + }, + { + "epoch": 0.6931402387718109, + "grad_norm": 1451.0021624587962, + "learning_rate": 4.378030521573683e-06, + "loss": 233.4693, + "step": 36040 + }, + { + "epoch": 0.6933325640322914, + "grad_norm": 1572.0228397647154, + "learning_rate": 4.372984249368393e-06, + "loss": 237.8678, + "step": 36050 + }, + { + "epoch": 0.6935248892927719, + "grad_norm": 1603.752642575329, + "learning_rate": 4.367940073133533e-06, + "loss": 235.3691, + "step": 36060 + }, + { + "epoch": 0.6937172145532524, + "grad_norm": 1522.568548592612, + "learning_rate": 4.362897994747982e-06, + "loss": 238.5645, + "step": 36070 + }, + { + "epoch": 0.6939095398137329, + "grad_norm": 1468.6477682731438, + "learning_rate": 4.35785801608983e-06, + "loss": 230.4714, + "step": 36080 + }, + { + "epoch": 0.6941018650742136, + "grad_norm": 1834.847687325395, + "learning_rate": 4.352820139036379e-06, + "loss": 238.1109, + "step": 36090 + }, + { + "epoch": 0.6942941903346941, + "grad_norm": 1462.0342093144627, + "learning_rate": 4.347784365464163e-06, + "loss": 237.521, + "step": 36100 + }, + { + "epoch": 0.6944865155951746, + "grad_norm": 1418.0825557405747, + "learning_rate": 4.342750697248922e-06, + "loss": 240.2118, + "step": 36110 + }, + { + "epoch": 0.6946788408556551, + "grad_norm": 1559.7967771585688, + "learning_rate": 4.337719136265614e-06, + "loss": 230.327, + "step": 36120 + }, + { + "epoch": 0.6948711661161356, + "grad_norm": 1417.2288115778736, + "learning_rate": 4.332689684388408e-06, + "loss": 233.4392, + "step": 36130 + }, + { + "epoch": 0.6950634913766162, + "grad_norm": 1523.724144053179, + "learning_rate": 4.327662343490701e-06, + "loss": 237.2281, + "step": 36140 + }, + { + "epoch": 0.6952558166370967, + "grad_norm": 1581.0526726482626, + "learning_rate": 4.322637115445088e-06, + "loss": 244.6629, + "step": 36150 + }, + { + "epoch": 0.6954481418975772, + "grad_norm": 1550.2562505480962, + "learning_rate": 4.3176140021233845e-06, + "loss": 236.3199, + "step": 36160 + }, + { + "epoch": 0.6956404671580577, + "grad_norm": 1579.3722451659926, + "learning_rate": 4.312593005396615e-06, + "loss": 239.1987, + "step": 36170 + }, + { + "epoch": 0.6958327924185382, + "grad_norm": 1474.3929174394623, + "learning_rate": 4.307574127135022e-06, + "loss": 233.4023, + "step": 36180 + }, + { + "epoch": 0.6960251176790188, + "grad_norm": 1748.2313424856814, + "learning_rate": 4.3025573692080516e-06, + "loss": 239.5724, + "step": 36190 + }, + { + "epoch": 0.6962174429394993, + "grad_norm": 1668.808792585717, + "learning_rate": 4.297542733484364e-06, + "loss": 234.4606, + "step": 36200 + }, + { + "epoch": 0.6964097681999798, + "grad_norm": 1461.0731753006285, + "learning_rate": 4.292530221831832e-06, + "loss": 240.304, + "step": 36210 + }, + { + "epoch": 0.6966020934604603, + "grad_norm": 1615.1909924772508, + "learning_rate": 4.2875198361175305e-06, + "loss": 237.9068, + "step": 36220 + }, + { + "epoch": 0.6967944187209408, + "grad_norm": 1383.8775109463547, + "learning_rate": 4.282511578207746e-06, + "loss": 234.1359, + "step": 36230 + }, + { + "epoch": 0.6969867439814214, + "grad_norm": 1466.7274005129375, + "learning_rate": 4.277505449967967e-06, + "loss": 240.9818, + "step": 36240 + }, + { + "epoch": 0.6971790692419019, + "grad_norm": 1392.4392078728338, + "learning_rate": 4.2725014532629015e-06, + "loss": 239.9398, + "step": 36250 + }, + { + "epoch": 0.6973713945023824, + "grad_norm": 1472.6089944240452, + "learning_rate": 4.267499589956453e-06, + "loss": 237.7319, + "step": 36260 + }, + { + "epoch": 0.6975637197628629, + "grad_norm": 1523.7424218507942, + "learning_rate": 4.262499861911727e-06, + "loss": 243.8434, + "step": 36270 + }, + { + "epoch": 0.6977560450233434, + "grad_norm": 1652.6107732906125, + "learning_rate": 4.257502270991048e-06, + "loss": 239.0709, + "step": 36280 + }, + { + "epoch": 0.697948370283824, + "grad_norm": 1577.8605099156312, + "learning_rate": 4.252506819055934e-06, + "loss": 242.0587, + "step": 36290 + }, + { + "epoch": 0.6981406955443046, + "grad_norm": 1610.7692035032255, + "learning_rate": 4.2475135079671045e-06, + "loss": 237.3633, + "step": 36300 + }, + { + "epoch": 0.6983330208047851, + "grad_norm": 1413.6707835153559, + "learning_rate": 4.242522339584486e-06, + "loss": 233.5362, + "step": 36310 + }, + { + "epoch": 0.6985253460652656, + "grad_norm": 1373.494526053818, + "learning_rate": 4.2375333157672114e-06, + "loss": 238.8478, + "step": 36320 + }, + { + "epoch": 0.6987176713257461, + "grad_norm": 1474.978860781716, + "learning_rate": 4.232546438373604e-06, + "loss": 237.1909, + "step": 36330 + }, + { + "epoch": 0.6989099965862267, + "grad_norm": 1685.7446852498474, + "learning_rate": 4.227561709261198e-06, + "loss": 230.6656, + "step": 36340 + }, + { + "epoch": 0.6991023218467072, + "grad_norm": 1444.990274603899, + "learning_rate": 4.222579130286716e-06, + "loss": 239.9439, + "step": 36350 + }, + { + "epoch": 0.6992946471071877, + "grad_norm": 1440.9177629702247, + "learning_rate": 4.217598703306095e-06, + "loss": 242.8563, + "step": 36360 + }, + { + "epoch": 0.6994869723676682, + "grad_norm": 1482.7561204465965, + "learning_rate": 4.212620430174457e-06, + "loss": 242.8523, + "step": 36370 + }, + { + "epoch": 0.6996792976281487, + "grad_norm": 1684.3337713894173, + "learning_rate": 4.207644312746124e-06, + "loss": 236.7045, + "step": 36380 + }, + { + "epoch": 0.6998716228886293, + "grad_norm": 1570.0048378372696, + "learning_rate": 4.202670352874625e-06, + "loss": 234.627, + "step": 36390 + }, + { + "epoch": 0.7000639481491098, + "grad_norm": 1551.606464965818, + "learning_rate": 4.197698552412672e-06, + "loss": 245.2033, + "step": 36400 + }, + { + "epoch": 0.7002562734095903, + "grad_norm": 1571.8202653053443, + "learning_rate": 4.192728913212181e-06, + "loss": 239.5426, + "step": 36410 + }, + { + "epoch": 0.7004485986700708, + "grad_norm": 1429.5459380760037, + "learning_rate": 4.187761437124256e-06, + "loss": 230.2199, + "step": 36420 + }, + { + "epoch": 0.7006409239305513, + "grad_norm": 1998.7112756520833, + "learning_rate": 4.182796125999207e-06, + "loss": 242.5665, + "step": 36430 + }, + { + "epoch": 0.7008332491910318, + "grad_norm": 1859.073618336908, + "learning_rate": 4.177832981686526e-06, + "loss": 242.1965, + "step": 36440 + }, + { + "epoch": 0.7010255744515124, + "grad_norm": 1619.6290495473081, + "learning_rate": 4.172872006034899e-06, + "loss": 238.2478, + "step": 36450 + }, + { + "epoch": 0.7012178997119929, + "grad_norm": 1620.2636076833003, + "learning_rate": 4.167913200892217e-06, + "loss": 233.8886, + "step": 36460 + }, + { + "epoch": 0.7014102249724734, + "grad_norm": 1422.7864452436954, + "learning_rate": 4.162956568105543e-06, + "loss": 245.792, + "step": 36470 + }, + { + "epoch": 0.7016025502329539, + "grad_norm": 1540.3041101411256, + "learning_rate": 4.158002109521149e-06, + "loss": 233.4258, + "step": 36480 + }, + { + "epoch": 0.7017948754934344, + "grad_norm": 1590.403755164191, + "learning_rate": 4.153049826984482e-06, + "loss": 238.6875, + "step": 36490 + }, + { + "epoch": 0.7019872007539151, + "grad_norm": 1486.442548055403, + "learning_rate": 4.148099722340192e-06, + "loss": 245.4797, + "step": 36500 + }, + { + "epoch": 0.7021795260143956, + "grad_norm": 1573.1200138846857, + "learning_rate": 4.143151797432109e-06, + "loss": 236.4589, + "step": 36510 + }, + { + "epoch": 0.7023718512748761, + "grad_norm": 1465.6549467487025, + "learning_rate": 4.1382060541032505e-06, + "loss": 239.0211, + "step": 36520 + }, + { + "epoch": 0.7025641765353566, + "grad_norm": 1486.1555173082197, + "learning_rate": 4.133262494195824e-06, + "loss": 238.1677, + "step": 36530 + }, + { + "epoch": 0.7027565017958372, + "grad_norm": 1468.431071874715, + "learning_rate": 4.12832111955123e-06, + "loss": 228.7636, + "step": 36540 + }, + { + "epoch": 0.7029488270563177, + "grad_norm": 1635.3959797153484, + "learning_rate": 4.123381932010044e-06, + "loss": 240.603, + "step": 36550 + }, + { + "epoch": 0.7031411523167982, + "grad_norm": 1513.0296072196882, + "learning_rate": 4.11844493341203e-06, + "loss": 238.3765, + "step": 36560 + }, + { + "epoch": 0.7033334775772787, + "grad_norm": 1420.5802294866357, + "learning_rate": 4.113510125596145e-06, + "loss": 240.6861, + "step": 36570 + }, + { + "epoch": 0.7035258028377592, + "grad_norm": 1433.1778599123843, + "learning_rate": 4.1085775104005186e-06, + "loss": 229.3261, + "step": 36580 + }, + { + "epoch": 0.7037181280982397, + "grad_norm": 1581.9472138163, + "learning_rate": 4.10364708966247e-06, + "loss": 239.3117, + "step": 36590 + }, + { + "epoch": 0.7039104533587203, + "grad_norm": 1431.5859957338066, + "learning_rate": 4.098718865218496e-06, + "loss": 229.8606, + "step": 36600 + }, + { + "epoch": 0.7041027786192008, + "grad_norm": 1592.5707742720695, + "learning_rate": 4.0937928389042815e-06, + "loss": 245.4869, + "step": 36610 + }, + { + "epoch": 0.7042951038796813, + "grad_norm": 1541.539229887895, + "learning_rate": 4.088869012554694e-06, + "loss": 232.0533, + "step": 36620 + }, + { + "epoch": 0.7044874291401618, + "grad_norm": 1655.9729420262363, + "learning_rate": 4.08394738800377e-06, + "loss": 236.9838, + "step": 36630 + }, + { + "epoch": 0.7046797544006423, + "grad_norm": 1548.9981635393065, + "learning_rate": 4.07902796708474e-06, + "loss": 231.4997, + "step": 36640 + }, + { + "epoch": 0.7048720796611229, + "grad_norm": 1533.6145908623128, + "learning_rate": 4.074110751630005e-06, + "loss": 243.34, + "step": 36650 + }, + { + "epoch": 0.7050644049216034, + "grad_norm": 1534.2479666720096, + "learning_rate": 4.0691957434711446e-06, + "loss": 233.8075, + "step": 36660 + }, + { + "epoch": 0.7052567301820839, + "grad_norm": 1466.9178573390122, + "learning_rate": 4.0642829444389165e-06, + "loss": 235.0018, + "step": 36670 + }, + { + "epoch": 0.7054490554425644, + "grad_norm": 1371.9886259276007, + "learning_rate": 4.059372356363263e-06, + "loss": 244.473, + "step": 36680 + }, + { + "epoch": 0.7056413807030449, + "grad_norm": 1395.573012847938, + "learning_rate": 4.054463981073296e-06, + "loss": 236.107, + "step": 36690 + }, + { + "epoch": 0.7058337059635255, + "grad_norm": 1460.9548441821817, + "learning_rate": 4.049557820397297e-06, + "loss": 238.7327, + "step": 36700 + }, + { + "epoch": 0.7060260312240061, + "grad_norm": 1556.3728261556753, + "learning_rate": 4.044653876162738e-06, + "loss": 228.712, + "step": 36710 + }, + { + "epoch": 0.7062183564844866, + "grad_norm": 1532.8862849101301, + "learning_rate": 4.039752150196257e-06, + "loss": 243.1279, + "step": 36720 + }, + { + "epoch": 0.7064106817449671, + "grad_norm": 1510.087979493412, + "learning_rate": 4.034852644323661e-06, + "loss": 240.6138, + "step": 36730 + }, + { + "epoch": 0.7066030070054476, + "grad_norm": 1488.1642748194001, + "learning_rate": 4.029955360369935e-06, + "loss": 231.6899, + "step": 36740 + }, + { + "epoch": 0.7067953322659282, + "grad_norm": 1668.262096692243, + "learning_rate": 4.0250603001592416e-06, + "loss": 228.8245, + "step": 36750 + }, + { + "epoch": 0.7069876575264087, + "grad_norm": 1449.0467594894396, + "learning_rate": 4.020167465514903e-06, + "loss": 232.0522, + "step": 36760 + }, + { + "epoch": 0.7071799827868892, + "grad_norm": 1732.14284558293, + "learning_rate": 4.015276858259427e-06, + "loss": 236.3973, + "step": 36770 + }, + { + "epoch": 0.7073723080473697, + "grad_norm": 1620.0365120255449, + "learning_rate": 4.0103884802144775e-06, + "loss": 237.6175, + "step": 36780 + }, + { + "epoch": 0.7075646333078502, + "grad_norm": 1435.9687837395886, + "learning_rate": 4.0055023332009e-06, + "loss": 237.5677, + "step": 36790 + }, + { + "epoch": 0.7077569585683308, + "grad_norm": 1430.424433405911, + "learning_rate": 4.000618419038702e-06, + "loss": 224.1724, + "step": 36800 + }, + { + "epoch": 0.7079492838288113, + "grad_norm": 1539.9769017012263, + "learning_rate": 3.9957367395470555e-06, + "loss": 240.6433, + "step": 36810 + }, + { + "epoch": 0.7081416090892918, + "grad_norm": 1497.8846945501607, + "learning_rate": 3.990857296544315e-06, + "loss": 229.3633, + "step": 36820 + }, + { + "epoch": 0.7083339343497723, + "grad_norm": 1478.890507936883, + "learning_rate": 3.985980091847985e-06, + "loss": 232.3452, + "step": 36830 + }, + { + "epoch": 0.7085262596102528, + "grad_norm": 1664.6373127632035, + "learning_rate": 3.981105127274748e-06, + "loss": 237.4508, + "step": 36840 + }, + { + "epoch": 0.7087185848707334, + "grad_norm": 1746.158692369701, + "learning_rate": 3.976232404640441e-06, + "loss": 227.9658, + "step": 36850 + }, + { + "epoch": 0.7089109101312139, + "grad_norm": 1567.4040581206111, + "learning_rate": 3.971361925760081e-06, + "loss": 236.1024, + "step": 36860 + }, + { + "epoch": 0.7091032353916944, + "grad_norm": 1722.1054415455667, + "learning_rate": 3.966493692447838e-06, + "loss": 237.7352, + "step": 36870 + }, + { + "epoch": 0.7092955606521749, + "grad_norm": 1566.9968636591109, + "learning_rate": 3.961627706517044e-06, + "loss": 233.6227, + "step": 36880 + }, + { + "epoch": 0.7094878859126554, + "grad_norm": 1365.377334290341, + "learning_rate": 3.956763969780206e-06, + "loss": 234.3301, + "step": 36890 + }, + { + "epoch": 0.709680211173136, + "grad_norm": 1445.511445977611, + "learning_rate": 3.951902484048978e-06, + "loss": 231.3383, + "step": 36900 + }, + { + "epoch": 0.7098725364336166, + "grad_norm": 1663.3247427441008, + "learning_rate": 3.94704325113419e-06, + "loss": 232.236, + "step": 36910 + }, + { + "epoch": 0.7100648616940971, + "grad_norm": 1400.0997349840698, + "learning_rate": 3.942186272845821e-06, + "loss": 232.9099, + "step": 36920 + }, + { + "epoch": 0.7102571869545776, + "grad_norm": 1363.7355472352858, + "learning_rate": 3.937331550993021e-06, + "loss": 235.6557, + "step": 36930 + }, + { + "epoch": 0.7104495122150581, + "grad_norm": 1670.4160699662211, + "learning_rate": 3.932479087384089e-06, + "loss": 240.1717, + "step": 36940 + }, + { + "epoch": 0.7106418374755387, + "grad_norm": 1505.6961227861493, + "learning_rate": 3.927628883826488e-06, + "loss": 234.9185, + "step": 36950 + }, + { + "epoch": 0.7108341627360192, + "grad_norm": 1435.1502078133735, + "learning_rate": 3.922780942126837e-06, + "loss": 234.7055, + "step": 36960 + }, + { + "epoch": 0.7110264879964997, + "grad_norm": 1376.20604381166, + "learning_rate": 3.91793526409092e-06, + "loss": 235.231, + "step": 36970 + }, + { + "epoch": 0.7112188132569802, + "grad_norm": 1573.3670881740359, + "learning_rate": 3.913091851523667e-06, + "loss": 236.8275, + "step": 36980 + }, + { + "epoch": 0.7114111385174607, + "grad_norm": 1453.3263390209486, + "learning_rate": 3.908250706229168e-06, + "loss": 230.027, + "step": 36990 + }, + { + "epoch": 0.7116034637779413, + "grad_norm": 1485.769996440751, + "learning_rate": 3.903411830010676e-06, + "loss": 242.0607, + "step": 37000 + }, + { + "epoch": 0.7117957890384218, + "grad_norm": 1532.2970997159234, + "learning_rate": 3.8985752246705885e-06, + "loss": 234.7675, + "step": 37010 + }, + { + "epoch": 0.7119881142989023, + "grad_norm": 1547.6265843219417, + "learning_rate": 3.893740892010463e-06, + "loss": 236.3145, + "step": 37020 + }, + { + "epoch": 0.7121804395593828, + "grad_norm": 1561.630207264149, + "learning_rate": 3.888908833831002e-06, + "loss": 239.4003, + "step": 37030 + }, + { + "epoch": 0.7123727648198633, + "grad_norm": 1510.605240861088, + "learning_rate": 3.884079051932073e-06, + "loss": 236.1792, + "step": 37040 + }, + { + "epoch": 0.7125650900803439, + "grad_norm": 1534.3632009540188, + "learning_rate": 3.879251548112692e-06, + "loss": 241.3086, + "step": 37050 + }, + { + "epoch": 0.7127574153408244, + "grad_norm": 1806.6796395736437, + "learning_rate": 3.874426324171019e-06, + "loss": 249.0332, + "step": 37060 + }, + { + "epoch": 0.7129497406013049, + "grad_norm": 2016.3750466670258, + "learning_rate": 3.869603381904377e-06, + "loss": 236.7132, + "step": 37070 + }, + { + "epoch": 0.7131420658617854, + "grad_norm": 1566.106222686981, + "learning_rate": 3.864782723109227e-06, + "loss": 231.7482, + "step": 37080 + }, + { + "epoch": 0.7133343911222659, + "grad_norm": 1560.2987007657275, + "learning_rate": 3.859964349581187e-06, + "loss": 233.1552, + "step": 37090 + }, + { + "epoch": 0.7135267163827465, + "grad_norm": 1769.2260892394934, + "learning_rate": 3.855148263115017e-06, + "loss": 234.646, + "step": 37100 + }, + { + "epoch": 0.713719041643227, + "grad_norm": 1698.0319735982089, + "learning_rate": 3.850334465504637e-06, + "loss": 236.5427, + "step": 37110 + }, + { + "epoch": 0.7139113669037076, + "grad_norm": 1684.4199983771625, + "learning_rate": 3.845522958543104e-06, + "loss": 241.887, + "step": 37120 + }, + { + "epoch": 0.7141036921641881, + "grad_norm": 1844.9808840557555, + "learning_rate": 3.840713744022624e-06, + "loss": 236.2804, + "step": 37130 + }, + { + "epoch": 0.7142960174246686, + "grad_norm": 1541.3606644378585, + "learning_rate": 3.835906823734548e-06, + "loss": 240.3451, + "step": 37140 + }, + { + "epoch": 0.7144883426851492, + "grad_norm": 1627.873940031945, + "learning_rate": 3.831102199469379e-06, + "loss": 249.0218, + "step": 37150 + }, + { + "epoch": 0.7146806679456297, + "grad_norm": 1448.4906422092638, + "learning_rate": 3.826299873016758e-06, + "loss": 238.2984, + "step": 37160 + }, + { + "epoch": 0.7148729932061102, + "grad_norm": 1481.0101010630947, + "learning_rate": 3.821499846165468e-06, + "loss": 227.4023, + "step": 37170 + }, + { + "epoch": 0.7150653184665907, + "grad_norm": 1511.9525245861967, + "learning_rate": 3.816702120703449e-06, + "loss": 234.5009, + "step": 37180 + }, + { + "epoch": 0.7152576437270712, + "grad_norm": 1434.3699061079233, + "learning_rate": 3.8119066984177654e-06, + "loss": 231.3417, + "step": 37190 + }, + { + "epoch": 0.7154499689875518, + "grad_norm": 1602.5048808493707, + "learning_rate": 3.8071135810946415e-06, + "loss": 237.2059, + "step": 37200 + }, + { + "epoch": 0.7156422942480323, + "grad_norm": 1609.5907307870593, + "learning_rate": 3.802322770519424e-06, + "loss": 230.2667, + "step": 37210 + }, + { + "epoch": 0.7158346195085128, + "grad_norm": 1462.1144772868236, + "learning_rate": 3.7975342684766215e-06, + "loss": 229.9174, + "step": 37220 + }, + { + "epoch": 0.7160269447689933, + "grad_norm": 1556.4489530243522, + "learning_rate": 3.792748076749867e-06, + "loss": 236.9995, + "step": 37230 + }, + { + "epoch": 0.7162192700294738, + "grad_norm": 1606.6375549844663, + "learning_rate": 3.787964197121934e-06, + "loss": 237.1684, + "step": 37240 + }, + { + "epoch": 0.7164115952899544, + "grad_norm": 1461.1124908639335, + "learning_rate": 3.7831826313747454e-06, + "loss": 228.4254, + "step": 37250 + }, + { + "epoch": 0.7166039205504349, + "grad_norm": 1624.4875748797022, + "learning_rate": 3.778403381289353e-06, + "loss": 229.4599, + "step": 37260 + }, + { + "epoch": 0.7167962458109154, + "grad_norm": 1461.1178928515196, + "learning_rate": 3.7736264486459486e-06, + "loss": 231.6607, + "step": 37270 + }, + { + "epoch": 0.7169885710713959, + "grad_norm": 1557.728353339117, + "learning_rate": 3.7688518352238555e-06, + "loss": 232.5229, + "step": 37280 + }, + { + "epoch": 0.7171808963318764, + "grad_norm": 1741.0367463683694, + "learning_rate": 3.7640795428015462e-06, + "loss": 237.2978, + "step": 37290 + }, + { + "epoch": 0.717373221592357, + "grad_norm": 1444.1362216994314, + "learning_rate": 3.7593095731566186e-06, + "loss": 232.3923, + "step": 37300 + }, + { + "epoch": 0.7175655468528375, + "grad_norm": 1559.3365992915112, + "learning_rate": 3.7545419280658025e-06, + "loss": 235.3234, + "step": 37310 + }, + { + "epoch": 0.7177578721133181, + "grad_norm": 1553.1545499996446, + "learning_rate": 3.749776609304975e-06, + "loss": 234.207, + "step": 37320 + }, + { + "epoch": 0.7179501973737986, + "grad_norm": 1575.052944165723, + "learning_rate": 3.7450136186491315e-06, + "loss": 237.8557, + "step": 37330 + }, + { + "epoch": 0.7181425226342791, + "grad_norm": 1544.7978492163777, + "learning_rate": 3.7402529578724134e-06, + "loss": 234.4243, + "step": 37340 + }, + { + "epoch": 0.7183348478947597, + "grad_norm": 1504.0280172511, + "learning_rate": 3.735494628748082e-06, + "loss": 234.0796, + "step": 37350 + }, + { + "epoch": 0.7185271731552402, + "grad_norm": 1542.6179104680355, + "learning_rate": 3.730738633048543e-06, + "loss": 230.7367, + "step": 37360 + }, + { + "epoch": 0.7187194984157207, + "grad_norm": 1503.465130204258, + "learning_rate": 3.7259849725453225e-06, + "loss": 238.3747, + "step": 37370 + }, + { + "epoch": 0.7189118236762012, + "grad_norm": 1560.8548193230586, + "learning_rate": 3.7212336490090815e-06, + "loss": 231.5233, + "step": 37380 + }, + { + "epoch": 0.7191041489366817, + "grad_norm": 1625.0760171848426, + "learning_rate": 3.7164846642096053e-06, + "loss": 235.499, + "step": 37390 + }, + { + "epoch": 0.7192964741971622, + "grad_norm": 1611.7693898449481, + "learning_rate": 3.7117380199158204e-06, + "loss": 241.0182, + "step": 37400 + }, + { + "epoch": 0.7194887994576428, + "grad_norm": 1525.2995827557488, + "learning_rate": 3.706993717895768e-06, + "loss": 231.1189, + "step": 37410 + }, + { + "epoch": 0.7196811247181233, + "grad_norm": 1662.461814996555, + "learning_rate": 3.70225175991662e-06, + "loss": 233.8253, + "step": 37420 + }, + { + "epoch": 0.7198734499786038, + "grad_norm": 1627.8829357689497, + "learning_rate": 3.697512147744684e-06, + "loss": 239.7928, + "step": 37430 + }, + { + "epoch": 0.7200657752390843, + "grad_norm": 1580.84535546553, + "learning_rate": 3.6927748831453835e-06, + "loss": 229.8675, + "step": 37440 + }, + { + "epoch": 0.7202581004995648, + "grad_norm": 1691.9899522622936, + "learning_rate": 3.688039967883269e-06, + "loss": 233.9928, + "step": 37450 + }, + { + "epoch": 0.7204504257600454, + "grad_norm": 1646.4956034451407, + "learning_rate": 3.683307403722025e-06, + "loss": 231.9454, + "step": 37460 + }, + { + "epoch": 0.7206427510205259, + "grad_norm": 1452.9406305908544, + "learning_rate": 3.678577192424445e-06, + "loss": 230.8371, + "step": 37470 + }, + { + "epoch": 0.7208350762810064, + "grad_norm": 1909.9190232044855, + "learning_rate": 3.6738493357524628e-06, + "loss": 231.7844, + "step": 37480 + }, + { + "epoch": 0.7210274015414869, + "grad_norm": 1427.9012348817284, + "learning_rate": 3.6691238354671233e-06, + "loss": 234.9695, + "step": 37490 + }, + { + "epoch": 0.7212197268019674, + "grad_norm": 1784.1848292523393, + "learning_rate": 3.664400693328595e-06, + "loss": 235.5823, + "step": 37500 + }, + { + "epoch": 0.721412052062448, + "grad_norm": 1416.093029582277, + "learning_rate": 3.6596799110961746e-06, + "loss": 228.8743, + "step": 37510 + }, + { + "epoch": 0.7216043773229286, + "grad_norm": 1659.8501820012216, + "learning_rate": 3.6549614905282724e-06, + "loss": 239.8036, + "step": 37520 + }, + { + "epoch": 0.7217967025834091, + "grad_norm": 1456.5211880780953, + "learning_rate": 3.6502454333824224e-06, + "loss": 236.1289, + "step": 37530 + }, + { + "epoch": 0.7219890278438896, + "grad_norm": 1852.734474841436, + "learning_rate": 3.6455317414152803e-06, + "loss": 234.7422, + "step": 37540 + }, + { + "epoch": 0.7221813531043701, + "grad_norm": 1547.2671143298355, + "learning_rate": 3.640820416382618e-06, + "loss": 235.2092, + "step": 37550 + }, + { + "epoch": 0.7223736783648507, + "grad_norm": 1453.7809208511683, + "learning_rate": 3.6361114600393242e-06, + "loss": 225.6464, + "step": 37560 + }, + { + "epoch": 0.7225660036253312, + "grad_norm": 1542.9814103822732, + "learning_rate": 3.6314048741394057e-06, + "loss": 237.9018, + "step": 37570 + }, + { + "epoch": 0.7227583288858117, + "grad_norm": 1491.2889064386443, + "learning_rate": 3.6267006604359943e-06, + "loss": 236.3528, + "step": 37580 + }, + { + "epoch": 0.7229506541462922, + "grad_norm": 1678.0753334877468, + "learning_rate": 3.6219988206813285e-06, + "loss": 240.7572, + "step": 37590 + }, + { + "epoch": 0.7231429794067727, + "grad_norm": 1511.7530309298397, + "learning_rate": 3.6172993566267623e-06, + "loss": 229.9834, + "step": 37600 + }, + { + "epoch": 0.7233353046672533, + "grad_norm": 1706.6628385502697, + "learning_rate": 3.6126022700227715e-06, + "loss": 235.563, + "step": 37610 + }, + { + "epoch": 0.7235276299277338, + "grad_norm": 1621.9463661496866, + "learning_rate": 3.6079075626189476e-06, + "loss": 235.9628, + "step": 37620 + }, + { + "epoch": 0.7237199551882143, + "grad_norm": 1541.8512749723611, + "learning_rate": 3.603215236163987e-06, + "loss": 228.3153, + "step": 37630 + }, + { + "epoch": 0.7239122804486948, + "grad_norm": 1430.7869807013765, + "learning_rate": 3.5985252924057023e-06, + "loss": 229.3261, + "step": 37640 + }, + { + "epoch": 0.7241046057091753, + "grad_norm": 1416.9922622696242, + "learning_rate": 3.5938377330910245e-06, + "loss": 232.6925, + "step": 37650 + }, + { + "epoch": 0.7242969309696559, + "grad_norm": 1488.252286117843, + "learning_rate": 3.5891525599659905e-06, + "loss": 233.1205, + "step": 37660 + }, + { + "epoch": 0.7244892562301364, + "grad_norm": 1617.1437070836214, + "learning_rate": 3.5844697747757496e-06, + "loss": 229.868, + "step": 37670 + }, + { + "epoch": 0.7246815814906169, + "grad_norm": 1574.1878686033965, + "learning_rate": 3.5797893792645577e-06, + "loss": 234.2606, + "step": 37680 + }, + { + "epoch": 0.7248739067510974, + "grad_norm": 1689.432210523899, + "learning_rate": 3.5751113751757925e-06, + "loss": 236.0605, + "step": 37690 + }, + { + "epoch": 0.7250662320115779, + "grad_norm": 1479.4623468117168, + "learning_rate": 3.5704357642519295e-06, + "loss": 230.437, + "step": 37700 + }, + { + "epoch": 0.7252585572720585, + "grad_norm": 1619.7816764119484, + "learning_rate": 3.5657625482345526e-06, + "loss": 231.8185, + "step": 37710 + }, + { + "epoch": 0.725450882532539, + "grad_norm": 1693.3841506888232, + "learning_rate": 3.5610917288643655e-06, + "loss": 232.6709, + "step": 37720 + }, + { + "epoch": 0.7256432077930196, + "grad_norm": 1801.9910353301125, + "learning_rate": 3.556423307881167e-06, + "loss": 235.0963, + "step": 37730 + }, + { + "epoch": 0.7258355330535001, + "grad_norm": 1668.7726223905188, + "learning_rate": 3.551757287023865e-06, + "loss": 235.075, + "step": 37740 + }, + { + "epoch": 0.7260278583139806, + "grad_norm": 1388.884080230714, + "learning_rate": 3.547093668030479e-06, + "loss": 235.9934, + "step": 37750 + }, + { + "epoch": 0.7262201835744612, + "grad_norm": 1677.8991278417686, + "learning_rate": 3.542432452638126e-06, + "loss": 234.0942, + "step": 37760 + }, + { + "epoch": 0.7264125088349417, + "grad_norm": 1544.1650085195756, + "learning_rate": 3.5377736425830366e-06, + "loss": 239.614, + "step": 37770 + }, + { + "epoch": 0.7266048340954222, + "grad_norm": 1812.3429776276469, + "learning_rate": 3.5331172396005354e-06, + "loss": 229.4545, + "step": 37780 + }, + { + "epoch": 0.7267971593559027, + "grad_norm": 1603.1230736647542, + "learning_rate": 3.528463245425062e-06, + "loss": 240.1801, + "step": 37790 + }, + { + "epoch": 0.7269894846163832, + "grad_norm": 1443.434335743417, + "learning_rate": 3.5238116617901486e-06, + "loss": 232.0958, + "step": 37800 + }, + { + "epoch": 0.7271818098768638, + "grad_norm": 1605.3742844790415, + "learning_rate": 3.519162490428433e-06, + "loss": 239.055, + "step": 37810 + }, + { + "epoch": 0.7273741351373443, + "grad_norm": 1527.088012355998, + "learning_rate": 3.5145157330716516e-06, + "loss": 229.7837, + "step": 37820 + }, + { + "epoch": 0.7275664603978248, + "grad_norm": 1480.5399344921368, + "learning_rate": 3.509871391450652e-06, + "loss": 241.1656, + "step": 37830 + }, + { + "epoch": 0.7277587856583053, + "grad_norm": 1386.8784457822, + "learning_rate": 3.505229467295371e-06, + "loss": 231.1997, + "step": 37840 + }, + { + "epoch": 0.7279511109187858, + "grad_norm": 1498.2244943293506, + "learning_rate": 3.5005899623348493e-06, + "loss": 244.6487, + "step": 37850 + }, + { + "epoch": 0.7281434361792664, + "grad_norm": 1469.4204061317337, + "learning_rate": 3.495952878297221e-06, + "loss": 235.9334, + "step": 37860 + }, + { + "epoch": 0.7283357614397469, + "grad_norm": 1351.2079624777139, + "learning_rate": 3.4913182169097315e-06, + "loss": 230.4485, + "step": 37870 + }, + { + "epoch": 0.7285280867002274, + "grad_norm": 1502.1192196783863, + "learning_rate": 3.4866859798987084e-06, + "loss": 231.663, + "step": 37880 + }, + { + "epoch": 0.7287204119607079, + "grad_norm": 1759.390405141497, + "learning_rate": 3.4820561689895906e-06, + "loss": 235.0598, + "step": 37890 + }, + { + "epoch": 0.7289127372211884, + "grad_norm": 1577.3634302237792, + "learning_rate": 3.4774287859068988e-06, + "loss": 229.306, + "step": 37900 + }, + { + "epoch": 0.729105062481669, + "grad_norm": 9423.990269975693, + "learning_rate": 3.472803832374263e-06, + "loss": 226.7137, + "step": 37910 + }, + { + "epoch": 0.7292973877421495, + "grad_norm": 1492.2728316103485, + "learning_rate": 3.4681813101144e-06, + "loss": 236.0989, + "step": 37920 + }, + { + "epoch": 0.7294897130026301, + "grad_norm": 1497.4115488474138, + "learning_rate": 3.4635612208491197e-06, + "loss": 237.5444, + "step": 37930 + }, + { + "epoch": 0.7296820382631106, + "grad_norm": 1424.5715929030805, + "learning_rate": 3.458943566299334e-06, + "loss": 224.4871, + "step": 37940 + }, + { + "epoch": 0.7298743635235911, + "grad_norm": 1602.2442483422503, + "learning_rate": 3.454328348185042e-06, + "loss": 226.0011, + "step": 37950 + }, + { + "epoch": 0.7300666887840717, + "grad_norm": 1625.1770808516565, + "learning_rate": 3.4497155682253314e-06, + "loss": 240.1945, + "step": 37960 + }, + { + "epoch": 0.7302590140445522, + "grad_norm": 1535.8323367872435, + "learning_rate": 3.4451052281383922e-06, + "loss": 227.0235, + "step": 37970 + }, + { + "epoch": 0.7304513393050327, + "grad_norm": 1579.472889542994, + "learning_rate": 3.440497329641499e-06, + "loss": 236.794, + "step": 37980 + }, + { + "epoch": 0.7306436645655132, + "grad_norm": 1580.954188423651, + "learning_rate": 3.435891874451017e-06, + "loss": 230.2277, + "step": 37990 + }, + { + "epoch": 0.7308359898259937, + "grad_norm": 1469.7377768417768, + "learning_rate": 3.431288864282398e-06, + "loss": 230.226, + "step": 38000 + }, + { + "epoch": 0.7310283150864743, + "grad_norm": 1421.2548757450052, + "learning_rate": 3.4266883008501937e-06, + "loss": 228.2719, + "step": 38010 + }, + { + "epoch": 0.7312206403469548, + "grad_norm": 1538.2825198599721, + "learning_rate": 3.4220901858680365e-06, + "loss": 238.3946, + "step": 38020 + }, + { + "epoch": 0.7314129656074353, + "grad_norm": 1521.3598967828464, + "learning_rate": 3.4174945210486445e-06, + "loss": 235.642, + "step": 38030 + }, + { + "epoch": 0.7316052908679158, + "grad_norm": 1366.2607252731727, + "learning_rate": 3.4129013081038285e-06, + "loss": 235.8387, + "step": 38040 + }, + { + "epoch": 0.7317976161283963, + "grad_norm": 1469.340021318117, + "learning_rate": 3.40831054874449e-06, + "loss": 230.4746, + "step": 38050 + }, + { + "epoch": 0.7319899413888769, + "grad_norm": 1518.1402450302116, + "learning_rate": 3.403722244680606e-06, + "loss": 230.0023, + "step": 38060 + }, + { + "epoch": 0.7321822666493574, + "grad_norm": 1588.654904140331, + "learning_rate": 3.3991363976212423e-06, + "loss": 241.2521, + "step": 38070 + }, + { + "epoch": 0.7323745919098379, + "grad_norm": 1495.2090854258015, + "learning_rate": 3.394553009274556e-06, + "loss": 227.6466, + "step": 38080 + }, + { + "epoch": 0.7325669171703184, + "grad_norm": 1490.171408351072, + "learning_rate": 3.389972081347782e-06, + "loss": 229.939, + "step": 38090 + }, + { + "epoch": 0.7327592424307989, + "grad_norm": 1565.1137289771389, + "learning_rate": 3.385393615547239e-06, + "loss": 239.5606, + "step": 38100 + }, + { + "epoch": 0.7329515676912794, + "grad_norm": 1555.2400904353592, + "learning_rate": 3.3808176135783276e-06, + "loss": 245.8193, + "step": 38110 + }, + { + "epoch": 0.73314389295176, + "grad_norm": 1563.5939510084056, + "learning_rate": 3.3762440771455386e-06, + "loss": 236.9324, + "step": 38120 + }, + { + "epoch": 0.7333362182122405, + "grad_norm": 1372.7472298470218, + "learning_rate": 3.371673007952435e-06, + "loss": 229.9706, + "step": 38130 + }, + { + "epoch": 0.7335285434727211, + "grad_norm": 1989.4626982753248, + "learning_rate": 3.3671044077016634e-06, + "loss": 244.1696, + "step": 38140 + }, + { + "epoch": 0.7337208687332016, + "grad_norm": 1382.315746654657, + "learning_rate": 3.3625382780949576e-06, + "loss": 236.8861, + "step": 38150 + }, + { + "epoch": 0.7339131939936822, + "grad_norm": 2013.7536739456355, + "learning_rate": 3.357974620833121e-06, + "loss": 236.7099, + "step": 38160 + }, + { + "epoch": 0.7341055192541627, + "grad_norm": 1480.908347081604, + "learning_rate": 3.353413437616039e-06, + "loss": 237.05, + "step": 38170 + }, + { + "epoch": 0.7342978445146432, + "grad_norm": 1609.1857888718864, + "learning_rate": 3.3488547301426786e-06, + "loss": 221.6165, + "step": 38180 + }, + { + "epoch": 0.7344901697751237, + "grad_norm": 1616.1171745614897, + "learning_rate": 3.344298500111087e-06, + "loss": 233.9652, + "step": 38190 + }, + { + "epoch": 0.7346824950356042, + "grad_norm": 1548.548677522442, + "learning_rate": 3.3397447492183833e-06, + "loss": 232.3867, + "step": 38200 + }, + { + "epoch": 0.7348748202960848, + "grad_norm": 1720.5928973720063, + "learning_rate": 3.3351934791607576e-06, + "loss": 234.0296, + "step": 38210 + }, + { + "epoch": 0.7350671455565653, + "grad_norm": 1396.6493745825644, + "learning_rate": 3.330644691633492e-06, + "loss": 231.8759, + "step": 38220 + }, + { + "epoch": 0.7352594708170458, + "grad_norm": 1605.4773960245195, + "learning_rate": 3.3260983883309306e-06, + "loss": 240.6725, + "step": 38230 + }, + { + "epoch": 0.7354517960775263, + "grad_norm": 1500.612684089531, + "learning_rate": 3.321554570946497e-06, + "loss": 228.6631, + "step": 38240 + }, + { + "epoch": 0.7356441213380068, + "grad_norm": 1553.948986128039, + "learning_rate": 3.317013241172684e-06, + "loss": 233.0377, + "step": 38250 + }, + { + "epoch": 0.7358364465984873, + "grad_norm": 1547.010115429101, + "learning_rate": 3.3124744007010688e-06, + "loss": 230.8883, + "step": 38260 + }, + { + "epoch": 0.7360287718589679, + "grad_norm": 1664.7985164728855, + "learning_rate": 3.3079380512222904e-06, + "loss": 233.1191, + "step": 38270 + }, + { + "epoch": 0.7362210971194484, + "grad_norm": 1515.2440430941901, + "learning_rate": 3.3034041944260654e-06, + "loss": 234.5917, + "step": 38280 + }, + { + "epoch": 0.7364134223799289, + "grad_norm": 2032.3442517176109, + "learning_rate": 3.2988728320011774e-06, + "loss": 231.3146, + "step": 38290 + }, + { + "epoch": 0.7366057476404094, + "grad_norm": 1527.9186762694744, + "learning_rate": 3.294343965635489e-06, + "loss": 232.4708, + "step": 38300 + }, + { + "epoch": 0.7367980729008899, + "grad_norm": 1376.744789862261, + "learning_rate": 3.289817597015923e-06, + "loss": 224.7349, + "step": 38310 + }, + { + "epoch": 0.7369903981613705, + "grad_norm": 1724.8007360933777, + "learning_rate": 3.2852937278284837e-06, + "loss": 236.0009, + "step": 38320 + }, + { + "epoch": 0.737182723421851, + "grad_norm": 1583.083827873174, + "learning_rate": 3.28077235975823e-06, + "loss": 230.9427, + "step": 38330 + }, + { + "epoch": 0.7373750486823316, + "grad_norm": 1382.2351877769281, + "learning_rate": 3.2762534944893033e-06, + "loss": 235.0521, + "step": 38340 + }, + { + "epoch": 0.7375673739428121, + "grad_norm": 1465.7386454187583, + "learning_rate": 3.271737133704904e-06, + "loss": 227.5069, + "step": 38350 + }, + { + "epoch": 0.7377596992032927, + "grad_norm": 1548.1191185165078, + "learning_rate": 3.2672232790872983e-06, + "loss": 232.8356, + "step": 38360 + }, + { + "epoch": 0.7379520244637732, + "grad_norm": 1532.9614833623862, + "learning_rate": 3.262711932317828e-06, + "loss": 234.831, + "step": 38370 + }, + { + "epoch": 0.7381443497242537, + "grad_norm": 1546.0956541191586, + "learning_rate": 3.258203095076894e-06, + "loss": 231.3097, + "step": 38380 + }, + { + "epoch": 0.7383366749847342, + "grad_norm": 1593.369081047998, + "learning_rate": 3.2536967690439592e-06, + "loss": 241.5989, + "step": 38390 + }, + { + "epoch": 0.7385290002452147, + "grad_norm": 1391.2124942627154, + "learning_rate": 3.249192955897562e-06, + "loss": 232.1474, + "step": 38400 + }, + { + "epoch": 0.7387213255056952, + "grad_norm": 1770.44050331133, + "learning_rate": 3.2446916573152955e-06, + "loss": 233.9994, + "step": 38410 + }, + { + "epoch": 0.7389136507661758, + "grad_norm": 1363.0946982386781, + "learning_rate": 3.24019287497382e-06, + "loss": 227.8752, + "step": 38420 + }, + { + "epoch": 0.7391059760266563, + "grad_norm": 1524.4093907439187, + "learning_rate": 3.235696610548852e-06, + "loss": 225.0379, + "step": 38430 + }, + { + "epoch": 0.7392983012871368, + "grad_norm": 1511.6932896158028, + "learning_rate": 3.231202865715184e-06, + "loss": 234.9341, + "step": 38440 + }, + { + "epoch": 0.7394906265476173, + "grad_norm": 1431.2248540907865, + "learning_rate": 3.226711642146655e-06, + "loss": 240.6318, + "step": 38450 + }, + { + "epoch": 0.7396829518080978, + "grad_norm": 1439.2073383165941, + "learning_rate": 3.222222941516179e-06, + "loss": 230.4061, + "step": 38460 + }, + { + "epoch": 0.7398752770685784, + "grad_norm": 1403.5401126611653, + "learning_rate": 3.2177367654957137e-06, + "loss": 223.8518, + "step": 38470 + }, + { + "epoch": 0.7400676023290589, + "grad_norm": 1559.0925481611444, + "learning_rate": 3.213253115756295e-06, + "loss": 232.6755, + "step": 38480 + }, + { + "epoch": 0.7402599275895394, + "grad_norm": 1480.5121393484953, + "learning_rate": 3.208771993968003e-06, + "loss": 232.3182, + "step": 38490 + }, + { + "epoch": 0.7404522528500199, + "grad_norm": 1539.960055255188, + "learning_rate": 3.2042934017999795e-06, + "loss": 228.5095, + "step": 38500 + }, + { + "epoch": 0.7406445781105004, + "grad_norm": 1354.0956620780714, + "learning_rate": 3.1998173409204326e-06, + "loss": 229.5166, + "step": 38510 + }, + { + "epoch": 0.740836903370981, + "grad_norm": 1508.161123468857, + "learning_rate": 3.1953438129966175e-06, + "loss": 241.1044, + "step": 38520 + }, + { + "epoch": 0.7410292286314615, + "grad_norm": 1475.1107350472876, + "learning_rate": 3.190872819694849e-06, + "loss": 239.4382, + "step": 38530 + }, + { + "epoch": 0.741221553891942, + "grad_norm": 1391.2570241695958, + "learning_rate": 3.1864043626804953e-06, + "loss": 225.6426, + "step": 38540 + }, + { + "epoch": 0.7414138791524226, + "grad_norm": 1414.0084050629603, + "learning_rate": 3.18193844361799e-06, + "loss": 224.4675, + "step": 38550 + }, + { + "epoch": 0.7416062044129031, + "grad_norm": 1607.8175152907313, + "learning_rate": 3.1774750641708095e-06, + "loss": 228.3828, + "step": 38560 + }, + { + "epoch": 0.7417985296733837, + "grad_norm": 1577.781339626044, + "learning_rate": 3.1730142260014875e-06, + "loss": 231.7078, + "step": 38570 + }, + { + "epoch": 0.7419908549338642, + "grad_norm": 1523.5309648075292, + "learning_rate": 3.1685559307716187e-06, + "loss": 229.4399, + "step": 38580 + }, + { + "epoch": 0.7421831801943447, + "grad_norm": 1486.2068497748805, + "learning_rate": 3.164100180141839e-06, + "loss": 231.492, + "step": 38590 + }, + { + "epoch": 0.7423755054548252, + "grad_norm": 1519.7079700238107, + "learning_rate": 3.159646975771842e-06, + "loss": 235.6418, + "step": 38600 + }, + { + "epoch": 0.7425678307153057, + "grad_norm": 1722.8987081581117, + "learning_rate": 3.155196319320374e-06, + "loss": 237.3484, + "step": 38610 + }, + { + "epoch": 0.7427601559757863, + "grad_norm": 1659.9162008450585, + "learning_rate": 3.1507482124452337e-06, + "loss": 236.7098, + "step": 38620 + }, + { + "epoch": 0.7429524812362668, + "grad_norm": 1494.1360038720754, + "learning_rate": 3.146302656803266e-06, + "loss": 228.4177, + "step": 38630 + }, + { + "epoch": 0.7431448064967473, + "grad_norm": 1514.5651028015404, + "learning_rate": 3.1418596540503653e-06, + "loss": 228.4572, + "step": 38640 + }, + { + "epoch": 0.7433371317572278, + "grad_norm": 1577.7417549788806, + "learning_rate": 3.1374192058414755e-06, + "loss": 240.3857, + "step": 38650 + }, + { + "epoch": 0.7435294570177083, + "grad_norm": 1483.4734957474009, + "learning_rate": 3.1329813138305944e-06, + "loss": 235.6175, + "step": 38660 + }, + { + "epoch": 0.7437217822781889, + "grad_norm": 1517.817491423927, + "learning_rate": 3.128545979670762e-06, + "loss": 224.0461, + "step": 38670 + }, + { + "epoch": 0.7439141075386694, + "grad_norm": 1472.852040987415, + "learning_rate": 3.124113205014063e-06, + "loss": 227.5622, + "step": 38680 + }, + { + "epoch": 0.7441064327991499, + "grad_norm": 1647.6381240449646, + "learning_rate": 3.119682991511639e-06, + "loss": 233.6358, + "step": 38690 + }, + { + "epoch": 0.7442987580596304, + "grad_norm": 1636.7000026118264, + "learning_rate": 3.1152553408136686e-06, + "loss": 229.2395, + "step": 38700 + }, + { + "epoch": 0.7444910833201109, + "grad_norm": 1626.5114430746166, + "learning_rate": 3.110830254569378e-06, + "loss": 233.4884, + "step": 38710 + }, + { + "epoch": 0.7446834085805915, + "grad_norm": 1588.9076680875623, + "learning_rate": 3.106407734427037e-06, + "loss": 233.4497, + "step": 38720 + }, + { + "epoch": 0.744875733841072, + "grad_norm": 1546.1459562253833, + "learning_rate": 3.101987782033966e-06, + "loss": 234.1466, + "step": 38730 + }, + { + "epoch": 0.7450680591015525, + "grad_norm": 1536.6085840720723, + "learning_rate": 3.097570399036519e-06, + "loss": 232.2633, + "step": 38740 + }, + { + "epoch": 0.7452603843620331, + "grad_norm": 1477.7490614590422, + "learning_rate": 3.0931555870801033e-06, + "loss": 224.3291, + "step": 38750 + }, + { + "epoch": 0.7454527096225136, + "grad_norm": 1354.5010656531415, + "learning_rate": 3.0887433478091587e-06, + "loss": 228.2884, + "step": 38760 + }, + { + "epoch": 0.7456450348829942, + "grad_norm": 1465.9569984578495, + "learning_rate": 3.0843336828671765e-06, + "loss": 229.9976, + "step": 38770 + }, + { + "epoch": 0.7458373601434747, + "grad_norm": 1622.1769824796472, + "learning_rate": 3.079926593896683e-06, + "loss": 229.3872, + "step": 38780 + }, + { + "epoch": 0.7460296854039552, + "grad_norm": 1532.9826618181792, + "learning_rate": 3.0755220825392397e-06, + "loss": 226.9077, + "step": 38790 + }, + { + "epoch": 0.7462220106644357, + "grad_norm": 1627.3066234340752, + "learning_rate": 3.0711201504354628e-06, + "loss": 230.8235, + "step": 38800 + }, + { + "epoch": 0.7464143359249162, + "grad_norm": 1375.838055736071, + "learning_rate": 3.0667207992249948e-06, + "loss": 233.8211, + "step": 38810 + }, + { + "epoch": 0.7466066611853968, + "grad_norm": 1417.7960527685434, + "learning_rate": 3.062324030546523e-06, + "loss": 232.4826, + "step": 38820 + }, + { + "epoch": 0.7467989864458773, + "grad_norm": 1503.0315132252572, + "learning_rate": 3.057929846037767e-06, + "loss": 227.4745, + "step": 38830 + }, + { + "epoch": 0.7469913117063578, + "grad_norm": 1918.148383930101, + "learning_rate": 3.0535382473354945e-06, + "loss": 232.5834, + "step": 38840 + }, + { + "epoch": 0.7471836369668383, + "grad_norm": 1541.8218319319228, + "learning_rate": 3.0491492360755003e-06, + "loss": 232.8663, + "step": 38850 + }, + { + "epoch": 0.7473759622273188, + "grad_norm": 1619.755062191748, + "learning_rate": 3.0447628138926153e-06, + "loss": 223.3882, + "step": 38860 + }, + { + "epoch": 0.7475682874877994, + "grad_norm": 1552.9096540805451, + "learning_rate": 3.0403789824207165e-06, + "loss": 224.4841, + "step": 38870 + }, + { + "epoch": 0.7477606127482799, + "grad_norm": 1606.2692061301036, + "learning_rate": 3.0359977432927013e-06, + "loss": 228.7691, + "step": 38880 + }, + { + "epoch": 0.7479529380087604, + "grad_norm": 1493.6444073551415, + "learning_rate": 3.0316190981405147e-06, + "loss": 228.6588, + "step": 38890 + }, + { + "epoch": 0.7481452632692409, + "grad_norm": 1621.4724822994094, + "learning_rate": 3.0272430485951244e-06, + "loss": 226.9325, + "step": 38900 + }, + { + "epoch": 0.7483375885297214, + "grad_norm": 1529.2698704758027, + "learning_rate": 3.0228695962865438e-06, + "loss": 232.5227, + "step": 38910 + }, + { + "epoch": 0.748529913790202, + "grad_norm": 1611.6355212872631, + "learning_rate": 3.018498742843806e-06, + "loss": 233.6747, + "step": 38920 + }, + { + "epoch": 0.7487222390506825, + "grad_norm": 1534.9337549557347, + "learning_rate": 3.014130489894982e-06, + "loss": 230.3002, + "step": 38930 + }, + { + "epoch": 0.748914564311163, + "grad_norm": 1485.581125702763, + "learning_rate": 3.0097648390671765e-06, + "loss": 223.6374, + "step": 38940 + }, + { + "epoch": 0.7491068895716435, + "grad_norm": 1618.7800635046221, + "learning_rate": 3.005401791986522e-06, + "loss": 236.3089, + "step": 38950 + }, + { + "epoch": 0.7492992148321241, + "grad_norm": 1600.9724572405305, + "learning_rate": 3.00104135027818e-06, + "loss": 235.8808, + "step": 38960 + }, + { + "epoch": 0.7494915400926047, + "grad_norm": 1632.6882586007207, + "learning_rate": 2.99668351556634e-06, + "loss": 233.8207, + "step": 38970 + }, + { + "epoch": 0.7496838653530852, + "grad_norm": 1742.7987924364313, + "learning_rate": 2.99232828947423e-06, + "loss": 234.3095, + "step": 38980 + }, + { + "epoch": 0.7498761906135657, + "grad_norm": 1560.8204002034142, + "learning_rate": 2.987975673624096e-06, + "loss": 226.3867, + "step": 38990 + }, + { + "epoch": 0.7500685158740462, + "grad_norm": 1559.7950189364128, + "learning_rate": 2.9836256696372178e-06, + "loss": 236.0132, + "step": 39000 + }, + { + "epoch": 0.7502608411345267, + "grad_norm": 1461.7826027381584, + "learning_rate": 2.9792782791338936e-06, + "loss": 232.3441, + "step": 39010 + }, + { + "epoch": 0.7504531663950073, + "grad_norm": 1540.1667159474748, + "learning_rate": 2.9749335037334604e-06, + "loss": 233.2916, + "step": 39020 + }, + { + "epoch": 0.7506454916554878, + "grad_norm": 1493.4633613370054, + "learning_rate": 2.9705913450542777e-06, + "loss": 225.4259, + "step": 39030 + }, + { + "epoch": 0.7508378169159683, + "grad_norm": 1681.9987349814191, + "learning_rate": 2.9662518047137214e-06, + "loss": 227.9373, + "step": 39040 + }, + { + "epoch": 0.7510301421764488, + "grad_norm": 1551.8812699037337, + "learning_rate": 2.961914884328203e-06, + "loss": 232.0307, + "step": 39050 + }, + { + "epoch": 0.7512224674369293, + "grad_norm": 1577.1057744004038, + "learning_rate": 2.9575805855131546e-06, + "loss": 230.7319, + "step": 39060 + }, + { + "epoch": 0.7514147926974099, + "grad_norm": 1387.5241059839784, + "learning_rate": 2.9532489098830274e-06, + "loss": 222.7067, + "step": 39070 + }, + { + "epoch": 0.7516071179578904, + "grad_norm": 1618.4106613229749, + "learning_rate": 2.9489198590512967e-06, + "loss": 229.0409, + "step": 39080 + }, + { + "epoch": 0.7517994432183709, + "grad_norm": 1545.6290078684374, + "learning_rate": 2.9445934346304706e-06, + "loss": 231.7582, + "step": 39090 + }, + { + "epoch": 0.7519917684788514, + "grad_norm": 1498.1802867621911, + "learning_rate": 2.940269638232065e-06, + "loss": 229.0374, + "step": 39100 + }, + { + "epoch": 0.7521840937393319, + "grad_norm": 1484.7504843406407, + "learning_rate": 2.935948471466622e-06, + "loss": 227.3486, + "step": 39110 + }, + { + "epoch": 0.7523764189998124, + "grad_norm": 1406.5905899381798, + "learning_rate": 2.9316299359437085e-06, + "loss": 228.8529, + "step": 39120 + }, + { + "epoch": 0.752568744260293, + "grad_norm": 1468.5098236518395, + "learning_rate": 2.9273140332719064e-06, + "loss": 227.1665, + "step": 39130 + }, + { + "epoch": 0.7527610695207735, + "grad_norm": 1480.4347493309658, + "learning_rate": 2.923000765058818e-06, + "loss": 229.7315, + "step": 39140 + }, + { + "epoch": 0.752953394781254, + "grad_norm": 1459.8983937371859, + "learning_rate": 2.9186901329110605e-06, + "loss": 223.861, + "step": 39150 + }, + { + "epoch": 0.7531457200417346, + "grad_norm": 1630.1424199653043, + "learning_rate": 2.9143821384342808e-06, + "loss": 226.2624, + "step": 39160 + }, + { + "epoch": 0.7533380453022152, + "grad_norm": 1560.6245436710506, + "learning_rate": 2.9100767832331277e-06, + "loss": 227.4073, + "step": 39170 + }, + { + "epoch": 0.7535303705626957, + "grad_norm": 1501.8897167206867, + "learning_rate": 2.9057740689112822e-06, + "loss": 235.3969, + "step": 39180 + }, + { + "epoch": 0.7537226958231762, + "grad_norm": 1522.0067963644365, + "learning_rate": 2.901473997071428e-06, + "loss": 229.2319, + "step": 39190 + }, + { + "epoch": 0.7539150210836567, + "grad_norm": 1431.8775443390805, + "learning_rate": 2.8971765693152767e-06, + "loss": 228.2309, + "step": 39200 + }, + { + "epoch": 0.7541073463441372, + "grad_norm": 1341.6012616189569, + "learning_rate": 2.8928817872435465e-06, + "loss": 227.5759, + "step": 39210 + }, + { + "epoch": 0.7542996716046177, + "grad_norm": 1492.1599328719174, + "learning_rate": 2.8885896524559696e-06, + "loss": 232.1361, + "step": 39220 + }, + { + "epoch": 0.7544919968650983, + "grad_norm": 1633.6914536316206, + "learning_rate": 2.8843001665513016e-06, + "loss": 229.4284, + "step": 39230 + }, + { + "epoch": 0.7546843221255788, + "grad_norm": 1533.30097259094, + "learning_rate": 2.8800133311273016e-06, + "loss": 235.565, + "step": 39240 + }, + { + "epoch": 0.7548766473860593, + "grad_norm": 1737.6545061013549, + "learning_rate": 2.875729147780745e-06, + "loss": 233.1697, + "step": 39250 + }, + { + "epoch": 0.7550689726465398, + "grad_norm": 1712.2424769954955, + "learning_rate": 2.871447618107417e-06, + "loss": 229.7837, + "step": 39260 + }, + { + "epoch": 0.7552612979070203, + "grad_norm": 1560.7419996991025, + "learning_rate": 2.867168743702122e-06, + "loss": 224.7095, + "step": 39270 + }, + { + "epoch": 0.7554536231675009, + "grad_norm": 1702.4049797609039, + "learning_rate": 2.8628925261586683e-06, + "loss": 234.5142, + "step": 39280 + }, + { + "epoch": 0.7556459484279814, + "grad_norm": 2075.1287585968976, + "learning_rate": 2.8586189670698717e-06, + "loss": 231.4869, + "step": 39290 + }, + { + "epoch": 0.7558382736884619, + "grad_norm": 1428.5769655978036, + "learning_rate": 2.854348068027568e-06, + "loss": 227.7387, + "step": 39300 + }, + { + "epoch": 0.7560305989489424, + "grad_norm": 1553.079126384359, + "learning_rate": 2.850079830622593e-06, + "loss": 234.0741, + "step": 39310 + }, + { + "epoch": 0.7562229242094229, + "grad_norm": 1556.8255438387275, + "learning_rate": 2.845814256444799e-06, + "loss": 239.6045, + "step": 39320 + }, + { + "epoch": 0.7564152494699035, + "grad_norm": 1432.1019826302088, + "learning_rate": 2.8415513470830357e-06, + "loss": 236.7049, + "step": 39330 + }, + { + "epoch": 0.756607574730384, + "grad_norm": 1556.9228249657856, + "learning_rate": 2.837291104125174e-06, + "loss": 234.3567, + "step": 39340 + }, + { + "epoch": 0.7567998999908645, + "grad_norm": 1507.964162233076, + "learning_rate": 2.833033529158079e-06, + "loss": 226.55, + "step": 39350 + }, + { + "epoch": 0.7569922252513451, + "grad_norm": 1578.3682066255808, + "learning_rate": 2.8287786237676253e-06, + "loss": 238.0503, + "step": 39360 + }, + { + "epoch": 0.7571845505118256, + "grad_norm": 1609.993364757536, + "learning_rate": 2.824526389538701e-06, + "loss": 236.4321, + "step": 39370 + }, + { + "epoch": 0.7573768757723062, + "grad_norm": 1565.811264480137, + "learning_rate": 2.8202768280551894e-06, + "loss": 227.1677, + "step": 39380 + }, + { + "epoch": 0.7575692010327867, + "grad_norm": 1522.7060655441767, + "learning_rate": 2.8160299408999827e-06, + "loss": 238.6643, + "step": 39390 + }, + { + "epoch": 0.7577615262932672, + "grad_norm": 1525.0299837914583, + "learning_rate": 2.811785729654972e-06, + "loss": 232.3769, + "step": 39400 + }, + { + "epoch": 0.7579538515537477, + "grad_norm": 1830.8695918774251, + "learning_rate": 2.8075441959010628e-06, + "loss": 227.8009, + "step": 39410 + }, + { + "epoch": 0.7581461768142282, + "grad_norm": 1425.616184175756, + "learning_rate": 2.8033053412181543e-06, + "loss": 227.7011, + "step": 39420 + }, + { + "epoch": 0.7583385020747088, + "grad_norm": 1318.8439040850305, + "learning_rate": 2.799069167185148e-06, + "loss": 217.5075, + "step": 39430 + }, + { + "epoch": 0.7585308273351893, + "grad_norm": 1429.719327200252, + "learning_rate": 2.7948356753799466e-06, + "loss": 238.4496, + "step": 39440 + }, + { + "epoch": 0.7587231525956698, + "grad_norm": 1469.4435178137517, + "learning_rate": 2.7906048673794593e-06, + "loss": 227.6901, + "step": 39450 + }, + { + "epoch": 0.7589154778561503, + "grad_norm": 1493.9387265662892, + "learning_rate": 2.7863767447595946e-06, + "loss": 233.9194, + "step": 39460 + }, + { + "epoch": 0.7591078031166308, + "grad_norm": 1373.719482542066, + "learning_rate": 2.7821513090952523e-06, + "loss": 226.7143, + "step": 39470 + }, + { + "epoch": 0.7593001283771114, + "grad_norm": 1488.3352043271668, + "learning_rate": 2.7779285619603446e-06, + "loss": 229.5452, + "step": 39480 + }, + { + "epoch": 0.7594924536375919, + "grad_norm": 1457.1325948435813, + "learning_rate": 2.77370850492777e-06, + "loss": 241.8746, + "step": 39490 + }, + { + "epoch": 0.7596847788980724, + "grad_norm": 1524.312368437958, + "learning_rate": 2.7694911395694324e-06, + "loss": 230.2175, + "step": 39500 + }, + { + "epoch": 0.7598771041585529, + "grad_norm": 1497.1561124764387, + "learning_rate": 2.765276467456225e-06, + "loss": 233.6009, + "step": 39510 + }, + { + "epoch": 0.7600694294190334, + "grad_norm": 1840.2446602403843, + "learning_rate": 2.761064490158052e-06, + "loss": 231.5688, + "step": 39520 + }, + { + "epoch": 0.760261754679514, + "grad_norm": 1493.0030869830234, + "learning_rate": 2.7568552092438018e-06, + "loss": 229.5692, + "step": 39530 + }, + { + "epoch": 0.7604540799399945, + "grad_norm": 1458.218326324335, + "learning_rate": 2.7526486262813578e-06, + "loss": 227.9972, + "step": 39540 + }, + { + "epoch": 0.760646405200475, + "grad_norm": 1519.3327051377792, + "learning_rate": 2.7484447428376094e-06, + "loss": 223.1645, + "step": 39550 + }, + { + "epoch": 0.7608387304609555, + "grad_norm": 1544.6972057422195, + "learning_rate": 2.7442435604784313e-06, + "loss": 231.1909, + "step": 39560 + }, + { + "epoch": 0.7610310557214361, + "grad_norm": 1734.2695560302896, + "learning_rate": 2.740045080768694e-06, + "loss": 230.289, + "step": 39570 + }, + { + "epoch": 0.7612233809819167, + "grad_norm": 1345.318831638213, + "learning_rate": 2.7358493052722603e-06, + "loss": 227.446, + "step": 39580 + }, + { + "epoch": 0.7614157062423972, + "grad_norm": 1768.76847183003, + "learning_rate": 2.7316562355519904e-06, + "loss": 228.2119, + "step": 39590 + }, + { + "epoch": 0.7616080315028777, + "grad_norm": 1615.1567394760389, + "learning_rate": 2.727465873169729e-06, + "loss": 230.1732, + "step": 39600 + }, + { + "epoch": 0.7618003567633582, + "grad_norm": 1475.6891761370462, + "learning_rate": 2.723278219686324e-06, + "loss": 230.1224, + "step": 39610 + }, + { + "epoch": 0.7619926820238387, + "grad_norm": 1623.8386817986877, + "learning_rate": 2.7190932766615998e-06, + "loss": 230.9577, + "step": 39620 + }, + { + "epoch": 0.7621850072843193, + "grad_norm": 1456.9462614488823, + "learning_rate": 2.714911045654385e-06, + "loss": 224.0631, + "step": 39630 + }, + { + "epoch": 0.7623773325447998, + "grad_norm": 1709.329324322735, + "learning_rate": 2.7107315282224878e-06, + "loss": 226.8969, + "step": 39640 + }, + { + "epoch": 0.7625696578052803, + "grad_norm": 1550.3853116982405, + "learning_rate": 2.7065547259227078e-06, + "loss": 231.5462, + "step": 39650 + }, + { + "epoch": 0.7627619830657608, + "grad_norm": 1538.7643760089115, + "learning_rate": 2.7023806403108397e-06, + "loss": 233.9908, + "step": 39660 + }, + { + "epoch": 0.7629543083262413, + "grad_norm": 1502.0380791874352, + "learning_rate": 2.698209272941659e-06, + "loss": 223.9186, + "step": 39670 + }, + { + "epoch": 0.7631466335867219, + "grad_norm": 1770.0076136099817, + "learning_rate": 2.694040625368931e-06, + "loss": 240.11, + "step": 39680 + }, + { + "epoch": 0.7633389588472024, + "grad_norm": 1476.1021015682165, + "learning_rate": 2.689874699145405e-06, + "loss": 226.428, + "step": 39690 + }, + { + "epoch": 0.7635312841076829, + "grad_norm": 1479.3706646795874, + "learning_rate": 2.685711495822827e-06, + "loss": 226.8066, + "step": 39700 + }, + { + "epoch": 0.7637236093681634, + "grad_norm": 1485.3689278382758, + "learning_rate": 2.6815510169519164e-06, + "loss": 225.0812, + "step": 39710 + }, + { + "epoch": 0.7639159346286439, + "grad_norm": 1465.0505655110665, + "learning_rate": 2.677393264082381e-06, + "loss": 239.3931, + "step": 39720 + }, + { + "epoch": 0.7641082598891245, + "grad_norm": 1356.5680751083773, + "learning_rate": 2.673238238762921e-06, + "loss": 233.6438, + "step": 39730 + }, + { + "epoch": 0.764300585149605, + "grad_norm": 1532.2535572660252, + "learning_rate": 2.6690859425412075e-06, + "loss": 229.7484, + "step": 39740 + }, + { + "epoch": 0.7644929104100855, + "grad_norm": 1826.663160506281, + "learning_rate": 2.6649363769639103e-06, + "loss": 240.9609, + "step": 39750 + }, + { + "epoch": 0.764685235670566, + "grad_norm": 1619.1353528456075, + "learning_rate": 2.660789543576667e-06, + "loss": 230.513, + "step": 39760 + }, + { + "epoch": 0.7648775609310466, + "grad_norm": 1451.953162359556, + "learning_rate": 2.6566454439241107e-06, + "loss": 232.0023, + "step": 39770 + }, + { + "epoch": 0.7650698861915272, + "grad_norm": 1608.5655613210492, + "learning_rate": 2.652504079549848e-06, + "loss": 236.1966, + "step": 39780 + }, + { + "epoch": 0.7652622114520077, + "grad_norm": 1425.7565820048073, + "learning_rate": 2.648365451996466e-06, + "loss": 231.6341, + "step": 39790 + }, + { + "epoch": 0.7654545367124882, + "grad_norm": 1359.7013947533796, + "learning_rate": 2.6442295628055346e-06, + "loss": 222.9614, + "step": 39800 + }, + { + "epoch": 0.7656468619729687, + "grad_norm": 1571.4898704025986, + "learning_rate": 2.64009641351761e-06, + "loss": 226.5332, + "step": 39810 + }, + { + "epoch": 0.7658391872334492, + "grad_norm": 1475.7123169859588, + "learning_rate": 2.635966005672218e-06, + "loss": 231.0349, + "step": 39820 + }, + { + "epoch": 0.7660315124939298, + "grad_norm": 1564.6585313295045, + "learning_rate": 2.631838340807865e-06, + "loss": 232.6851, + "step": 39830 + }, + { + "epoch": 0.7662238377544103, + "grad_norm": 1456.3971795270434, + "learning_rate": 2.6277134204620436e-06, + "loss": 229.4299, + "step": 39840 + }, + { + "epoch": 0.7664161630148908, + "grad_norm": 1654.790642375442, + "learning_rate": 2.6235912461712167e-06, + "loss": 237.5063, + "step": 39850 + }, + { + "epoch": 0.7666084882753713, + "grad_norm": 1622.1438050190582, + "learning_rate": 2.619471819470821e-06, + "loss": 227.2596, + "step": 39860 + }, + { + "epoch": 0.7668008135358518, + "grad_norm": 1445.822420320407, + "learning_rate": 2.6153551418952827e-06, + "loss": 223.3597, + "step": 39870 + }, + { + "epoch": 0.7669931387963324, + "grad_norm": 1468.4502517005567, + "learning_rate": 2.6112412149779888e-06, + "loss": 231.4466, + "step": 39880 + }, + { + "epoch": 0.7671854640568129, + "grad_norm": 1624.240968786334, + "learning_rate": 2.6071300402513165e-06, + "loss": 232.0156, + "step": 39890 + }, + { + "epoch": 0.7673777893172934, + "grad_norm": 1592.8445640755456, + "learning_rate": 2.603021619246604e-06, + "loss": 232.5667, + "step": 39900 + }, + { + "epoch": 0.7675701145777739, + "grad_norm": 1364.0891676198787, + "learning_rate": 2.5989159534941768e-06, + "loss": 228.7056, + "step": 39910 + }, + { + "epoch": 0.7677624398382544, + "grad_norm": 1362.1823548520736, + "learning_rate": 2.5948130445233232e-06, + "loss": 228.5754, + "step": 39920 + }, + { + "epoch": 0.767954765098735, + "grad_norm": 1540.7993054253168, + "learning_rate": 2.5907128938623093e-06, + "loss": 230.0194, + "step": 39930 + }, + { + "epoch": 0.7681470903592155, + "grad_norm": 1729.8661181939376, + "learning_rate": 2.5866155030383722e-06, + "loss": 236.0014, + "step": 39940 + }, + { + "epoch": 0.768339415619696, + "grad_norm": 1680.22327267366, + "learning_rate": 2.582520873577726e-06, + "loss": 224.4291, + "step": 39950 + }, + { + "epoch": 0.7685317408801765, + "grad_norm": 1355.6570509368657, + "learning_rate": 2.578429007005552e-06, + "loss": 227.8073, + "step": 39960 + }, + { + "epoch": 0.768724066140657, + "grad_norm": 1517.5289082127358, + "learning_rate": 2.5743399048460004e-06, + "loss": 233.4162, + "step": 39970 + }, + { + "epoch": 0.7689163914011377, + "grad_norm": 1529.6247759705575, + "learning_rate": 2.570253568622193e-06, + "loss": 230.2485, + "step": 39980 + }, + { + "epoch": 0.7691087166616182, + "grad_norm": 1521.9041550375655, + "learning_rate": 2.5661699998562286e-06, + "loss": 229.9751, + "step": 39990 + }, + { + "epoch": 0.7693010419220987, + "grad_norm": 1592.7690722009593, + "learning_rate": 2.5620892000691643e-06, + "loss": 239.7344, + "step": 40000 + }, + { + "epoch": 0.7694933671825792, + "grad_norm": 1536.1139050369718, + "learning_rate": 2.5580111707810296e-06, + "loss": 223.93, + "step": 40010 + }, + { + "epoch": 0.7696856924430597, + "grad_norm": 1701.5655218724514, + "learning_rate": 2.5539359135108244e-06, + "loss": 230.5768, + "step": 40020 + }, + { + "epoch": 0.7698780177035403, + "grad_norm": 1373.3462386369006, + "learning_rate": 2.549863429776519e-06, + "loss": 225.1608, + "step": 40030 + }, + { + "epoch": 0.7700703429640208, + "grad_norm": 1556.2910638255069, + "learning_rate": 2.5457937210950433e-06, + "loss": 229.5541, + "step": 40040 + }, + { + "epoch": 0.7702626682245013, + "grad_norm": 1579.768718214408, + "learning_rate": 2.541726788982294e-06, + "loss": 227.8932, + "step": 40050 + }, + { + "epoch": 0.7704549934849818, + "grad_norm": 1434.5027060967066, + "learning_rate": 2.5376626349531395e-06, + "loss": 229.1317, + "step": 40060 + }, + { + "epoch": 0.7706473187454623, + "grad_norm": 1517.7401641889667, + "learning_rate": 2.53360126052141e-06, + "loss": 233.8522, + "step": 40070 + }, + { + "epoch": 0.7708396440059428, + "grad_norm": 1594.181864715632, + "learning_rate": 2.529542667199896e-06, + "loss": 233.9767, + "step": 40080 + }, + { + "epoch": 0.7710319692664234, + "grad_norm": 1532.3078184249018, + "learning_rate": 2.525486856500363e-06, + "loss": 225.3485, + "step": 40090 + }, + { + "epoch": 0.7712242945269039, + "grad_norm": 1350.3336512430171, + "learning_rate": 2.5214338299335306e-06, + "loss": 229.533, + "step": 40100 + }, + { + "epoch": 0.7714166197873844, + "grad_norm": 1488.1248068203115, + "learning_rate": 2.5173835890090826e-06, + "loss": 232.382, + "step": 40110 + }, + { + "epoch": 0.7716089450478649, + "grad_norm": 1620.2971711654454, + "learning_rate": 2.5133361352356666e-06, + "loss": 228.0891, + "step": 40120 + }, + { + "epoch": 0.7718012703083454, + "grad_norm": 1413.8491979953592, + "learning_rate": 2.5092914701208958e-06, + "loss": 221.1879, + "step": 40130 + }, + { + "epoch": 0.771993595568826, + "grad_norm": 1386.9575119558153, + "learning_rate": 2.5052495951713406e-06, + "loss": 229.0098, + "step": 40140 + }, + { + "epoch": 0.7721859208293065, + "grad_norm": 1411.5234756516775, + "learning_rate": 2.5012105118925267e-06, + "loss": 226.465, + "step": 40150 + }, + { + "epoch": 0.772378246089787, + "grad_norm": 1566.5085338791948, + "learning_rate": 2.497174221788955e-06, + "loss": 226.8515, + "step": 40160 + }, + { + "epoch": 0.7725705713502675, + "grad_norm": 1554.7575223229185, + "learning_rate": 2.4931407263640683e-06, + "loss": 228.7184, + "step": 40170 + }, + { + "epoch": 0.7727628966107482, + "grad_norm": 1498.1724529363096, + "learning_rate": 2.489110027120285e-06, + "loss": 231.9207, + "step": 40180 + }, + { + "epoch": 0.7729552218712287, + "grad_norm": 1369.6770834399115, + "learning_rate": 2.4850821255589664e-06, + "loss": 226.0766, + "step": 40190 + }, + { + "epoch": 0.7731475471317092, + "grad_norm": 1381.2827277417787, + "learning_rate": 2.4810570231804463e-06, + "loss": 235.2267, + "step": 40200 + }, + { + "epoch": 0.7733398723921897, + "grad_norm": 1441.4432966750805, + "learning_rate": 2.4770347214840063e-06, + "loss": 225.7035, + "step": 40210 + }, + { + "epoch": 0.7735321976526702, + "grad_norm": 1443.7836043588495, + "learning_rate": 2.473015221967886e-06, + "loss": 225.2694, + "step": 40220 + }, + { + "epoch": 0.7737245229131507, + "grad_norm": 1808.5225238707985, + "learning_rate": 2.4689985261292805e-06, + "loss": 226.0416, + "step": 40230 + }, + { + "epoch": 0.7739168481736313, + "grad_norm": 1478.8505085391268, + "learning_rate": 2.464984635464348e-06, + "loss": 226.1488, + "step": 40240 + }, + { + "epoch": 0.7741091734341118, + "grad_norm": 1355.2658061098073, + "learning_rate": 2.460973551468194e-06, + "loss": 215.6184, + "step": 40250 + }, + { + "epoch": 0.7743014986945923, + "grad_norm": 1484.8008634830073, + "learning_rate": 2.456965275634878e-06, + "loss": 232.3315, + "step": 40260 + }, + { + "epoch": 0.7744938239550728, + "grad_norm": 1617.43936160178, + "learning_rate": 2.4529598094574226e-06, + "loss": 231.874, + "step": 40270 + }, + { + "epoch": 0.7746861492155533, + "grad_norm": 1566.1125054785784, + "learning_rate": 2.4489571544277944e-06, + "loss": 225.6771, + "step": 40280 + }, + { + "epoch": 0.7748784744760339, + "grad_norm": 1418.8194346181263, + "learning_rate": 2.444957312036914e-06, + "loss": 235.0988, + "step": 40290 + }, + { + "epoch": 0.7750707997365144, + "grad_norm": 1379.16405744577, + "learning_rate": 2.4409602837746625e-06, + "loss": 223.6478, + "step": 40300 + }, + { + "epoch": 0.7752631249969949, + "grad_norm": 1341.2557067983473, + "learning_rate": 2.4369660711298603e-06, + "loss": 224.6993, + "step": 40310 + }, + { + "epoch": 0.7754554502574754, + "grad_norm": 1531.9208625184538, + "learning_rate": 2.4329746755902917e-06, + "loss": 235.4249, + "step": 40320 + }, + { + "epoch": 0.7756477755179559, + "grad_norm": 1540.8569156599892, + "learning_rate": 2.428986098642684e-06, + "loss": 224.8458, + "step": 40330 + }, + { + "epoch": 0.7758401007784365, + "grad_norm": 1434.9122551682985, + "learning_rate": 2.425000341772711e-06, + "loss": 228.2546, + "step": 40340 + }, + { + "epoch": 0.776032426038917, + "grad_norm": 1533.5765462473894, + "learning_rate": 2.4210174064650084e-06, + "loss": 221.0433, + "step": 40350 + }, + { + "epoch": 0.7762247512993975, + "grad_norm": 1519.58865568952, + "learning_rate": 2.4170372942031506e-06, + "loss": 227.4079, + "step": 40360 + }, + { + "epoch": 0.776417076559878, + "grad_norm": 1603.639785063924, + "learning_rate": 2.4130600064696618e-06, + "loss": 227.0689, + "step": 40370 + }, + { + "epoch": 0.7766094018203585, + "grad_norm": 1572.8152371465953, + "learning_rate": 2.4090855447460205e-06, + "loss": 231.0838, + "step": 40380 + }, + { + "epoch": 0.7768017270808392, + "grad_norm": 1516.5582203496074, + "learning_rate": 2.4051139105126463e-06, + "loss": 228.535, + "step": 40390 + }, + { + "epoch": 0.7769940523413197, + "grad_norm": 1600.033725151574, + "learning_rate": 2.4011451052489064e-06, + "loss": 233.3731, + "step": 40400 + }, + { + "epoch": 0.7771863776018002, + "grad_norm": 1543.3682251301354, + "learning_rate": 2.3971791304331125e-06, + "loss": 223.3933, + "step": 40410 + }, + { + "epoch": 0.7773787028622807, + "grad_norm": 1401.8375212049893, + "learning_rate": 2.393215987542531e-06, + "loss": 227.7518, + "step": 40420 + }, + { + "epoch": 0.7775710281227612, + "grad_norm": 1542.0076337547246, + "learning_rate": 2.3892556780533606e-06, + "loss": 242.05, + "step": 40430 + }, + { + "epoch": 0.7777633533832418, + "grad_norm": 1670.9464137610357, + "learning_rate": 2.385298203440758e-06, + "loss": 226.5708, + "step": 40440 + }, + { + "epoch": 0.7779556786437223, + "grad_norm": 1379.6058889042256, + "learning_rate": 2.3813435651788107e-06, + "loss": 226.8058, + "step": 40450 + }, + { + "epoch": 0.7781480039042028, + "grad_norm": 1411.6719599942162, + "learning_rate": 2.377391764740562e-06, + "loss": 223.2353, + "step": 40460 + }, + { + "epoch": 0.7783403291646833, + "grad_norm": 1503.4557370807229, + "learning_rate": 2.3734428035979883e-06, + "loss": 220.4783, + "step": 40470 + }, + { + "epoch": 0.7785326544251638, + "grad_norm": 1357.084009449347, + "learning_rate": 2.3694966832220123e-06, + "loss": 226.8251, + "step": 40480 + }, + { + "epoch": 0.7787249796856444, + "grad_norm": 1489.3730747509462, + "learning_rate": 2.365553405082501e-06, + "loss": 230.1492, + "step": 40490 + }, + { + "epoch": 0.7789173049461249, + "grad_norm": 1338.5946968965673, + "learning_rate": 2.3616129706482604e-06, + "loss": 229.6824, + "step": 40500 + }, + { + "epoch": 0.7791096302066054, + "grad_norm": 1552.2410985614727, + "learning_rate": 2.357675381387036e-06, + "loss": 229.3827, + "step": 40510 + }, + { + "epoch": 0.7793019554670859, + "grad_norm": 1508.2537415895647, + "learning_rate": 2.3537406387655114e-06, + "loss": 224.9, + "step": 40520 + }, + { + "epoch": 0.7794942807275664, + "grad_norm": 1526.085897457577, + "learning_rate": 2.349808744249321e-06, + "loss": 227.9886, + "step": 40530 + }, + { + "epoch": 0.779686605988047, + "grad_norm": 1323.7807537402425, + "learning_rate": 2.345879699303025e-06, + "loss": 227.4332, + "step": 40540 + }, + { + "epoch": 0.7798789312485275, + "grad_norm": 1533.2274887748467, + "learning_rate": 2.3419535053901264e-06, + "loss": 226.85, + "step": 40550 + }, + { + "epoch": 0.780071256509008, + "grad_norm": 1892.5537707451906, + "learning_rate": 2.338030163973073e-06, + "loss": 241.7282, + "step": 40560 + }, + { + "epoch": 0.7802635817694885, + "grad_norm": 2098.48051973417, + "learning_rate": 2.334109676513242e-06, + "loss": 227.7498, + "step": 40570 + }, + { + "epoch": 0.780455907029969, + "grad_norm": 1402.0562953995811, + "learning_rate": 2.330192044470948e-06, + "loss": 224.9712, + "step": 40580 + }, + { + "epoch": 0.7806482322904497, + "grad_norm": 1523.3168543907718, + "learning_rate": 2.3262772693054457e-06, + "loss": 234.5564, + "step": 40590 + }, + { + "epoch": 0.7808405575509302, + "grad_norm": 1526.1610008678756, + "learning_rate": 2.322365352474928e-06, + "loss": 226.7791, + "step": 40600 + }, + { + "epoch": 0.7810328828114107, + "grad_norm": 1627.713212806367, + "learning_rate": 2.3184562954365153e-06, + "loss": 233.5697, + "step": 40610 + }, + { + "epoch": 0.7812252080718912, + "grad_norm": 1536.8872791126494, + "learning_rate": 2.3145500996462656e-06, + "loss": 233.8028, + "step": 40620 + }, + { + "epoch": 0.7814175333323717, + "grad_norm": 1544.0005262375464, + "learning_rate": 2.310646766559177e-06, + "loss": 236.3372, + "step": 40630 + }, + { + "epoch": 0.7816098585928523, + "grad_norm": 1355.3349946567957, + "learning_rate": 2.3067462976291744e-06, + "loss": 239.5528, + "step": 40640 + }, + { + "epoch": 0.7818021838533328, + "grad_norm": 1393.7196373154275, + "learning_rate": 2.302848694309118e-06, + "loss": 232.6877, + "step": 40650 + }, + { + "epoch": 0.7819945091138133, + "grad_norm": 1521.018926923358, + "learning_rate": 2.2989539580507957e-06, + "loss": 229.9074, + "step": 40660 + }, + { + "epoch": 0.7821868343742938, + "grad_norm": 1393.9483678876109, + "learning_rate": 2.2950620903049414e-06, + "loss": 231.9409, + "step": 40670 + }, + { + "epoch": 0.7823791596347743, + "grad_norm": 1449.210680831741, + "learning_rate": 2.2911730925212073e-06, + "loss": 231.3504, + "step": 40680 + }, + { + "epoch": 0.7825714848952549, + "grad_norm": 1644.9880550694936, + "learning_rate": 2.2872869661481766e-06, + "loss": 222.984, + "step": 40690 + }, + { + "epoch": 0.7827638101557354, + "grad_norm": 1471.3705734924645, + "learning_rate": 2.283403712633375e-06, + "loss": 223.9134, + "step": 40700 + }, + { + "epoch": 0.7829561354162159, + "grad_norm": 1535.5573364192828, + "learning_rate": 2.279523333423247e-06, + "loss": 235.2469, + "step": 40710 + }, + { + "epoch": 0.7831484606766964, + "grad_norm": 1510.987186257855, + "learning_rate": 2.2756458299631667e-06, + "loss": 220.6051, + "step": 40720 + }, + { + "epoch": 0.7833407859371769, + "grad_norm": 1562.9839076985177, + "learning_rate": 2.271771203697445e-06, + "loss": 225.6615, + "step": 40730 + }, + { + "epoch": 0.7835331111976575, + "grad_norm": 1642.8036388046785, + "learning_rate": 2.267899456069311e-06, + "loss": 229.096, + "step": 40740 + }, + { + "epoch": 0.783725436458138, + "grad_norm": 1514.0328472822666, + "learning_rate": 2.2640305885209336e-06, + "loss": 233.7914, + "step": 40750 + }, + { + "epoch": 0.7839177617186185, + "grad_norm": 1863.5990877946347, + "learning_rate": 2.2601646024933976e-06, + "loss": 235.72, + "step": 40760 + }, + { + "epoch": 0.784110086979099, + "grad_norm": 1425.6558101053422, + "learning_rate": 2.256301499426716e-06, + "loss": 226.4633, + "step": 40770 + }, + { + "epoch": 0.7843024122395795, + "grad_norm": 1472.4617980259707, + "learning_rate": 2.252441280759838e-06, + "loss": 226.7071, + "step": 40780 + }, + { + "epoch": 0.78449473750006, + "grad_norm": 1528.3722409226293, + "learning_rate": 2.248583947930628e-06, + "loss": 221.2189, + "step": 40790 + }, + { + "epoch": 0.7846870627605407, + "grad_norm": 1374.7888939546492, + "learning_rate": 2.2447295023758755e-06, + "loss": 225.7388, + "step": 40800 + }, + { + "epoch": 0.7848793880210212, + "grad_norm": 1620.8582955241884, + "learning_rate": 2.2408779455313035e-06, + "loss": 228.7956, + "step": 40810 + }, + { + "epoch": 0.7850717132815017, + "grad_norm": 1490.526283775315, + "learning_rate": 2.2370292788315505e-06, + "loss": 229.4782, + "step": 40820 + }, + { + "epoch": 0.7852640385419822, + "grad_norm": 1460.5091473901052, + "learning_rate": 2.2331835037101825e-06, + "loss": 220.2695, + "step": 40830 + }, + { + "epoch": 0.7854563638024628, + "grad_norm": 1465.1085778814581, + "learning_rate": 2.2293406215996814e-06, + "loss": 226.2115, + "step": 40840 + }, + { + "epoch": 0.7856486890629433, + "grad_norm": 1412.0950540608337, + "learning_rate": 2.2255006339314667e-06, + "loss": 227.4971, + "step": 40850 + }, + { + "epoch": 0.7858410143234238, + "grad_norm": 1447.6390826221968, + "learning_rate": 2.2216635421358623e-06, + "loss": 222.4478, + "step": 40860 + }, + { + "epoch": 0.7860333395839043, + "grad_norm": 1461.774153868207, + "learning_rate": 2.2178293476421276e-06, + "loss": 223.5899, + "step": 40870 + }, + { + "epoch": 0.7862256648443848, + "grad_norm": 1848.7914057486973, + "learning_rate": 2.213998051878431e-06, + "loss": 224.4984, + "step": 40880 + }, + { + "epoch": 0.7864179901048653, + "grad_norm": 1575.747483253397, + "learning_rate": 2.2101696562718735e-06, + "loss": 220.7876, + "step": 40890 + }, + { + "epoch": 0.7866103153653459, + "grad_norm": 1645.0734697340743, + "learning_rate": 2.206344162248466e-06, + "loss": 234.4414, + "step": 40900 + }, + { + "epoch": 0.7868026406258264, + "grad_norm": 1441.4290721070868, + "learning_rate": 2.2025215712331383e-06, + "loss": 227.5106, + "step": 40910 + }, + { + "epoch": 0.7869949658863069, + "grad_norm": 1643.0677555218856, + "learning_rate": 2.1987018846497487e-06, + "loss": 223.9698, + "step": 40920 + }, + { + "epoch": 0.7871872911467874, + "grad_norm": 1439.6237334511682, + "learning_rate": 2.194885103921064e-06, + "loss": 228.259, + "step": 40930 + }, + { + "epoch": 0.787379616407268, + "grad_norm": 1467.4516329863982, + "learning_rate": 2.191071230468772e-06, + "loss": 230.5067, + "step": 40940 + }, + { + "epoch": 0.7875719416677485, + "grad_norm": 1759.973294282919, + "learning_rate": 2.1872602657134757e-06, + "loss": 231.1099, + "step": 40950 + }, + { + "epoch": 0.787764266928229, + "grad_norm": 1485.6441736705547, + "learning_rate": 2.1834522110747014e-06, + "loss": 227.9294, + "step": 40960 + }, + { + "epoch": 0.7879565921887095, + "grad_norm": 1450.771930508663, + "learning_rate": 2.179647067970885e-06, + "loss": 225.9228, + "step": 40970 + }, + { + "epoch": 0.78814891744919, + "grad_norm": 1638.076217006254, + "learning_rate": 2.1758448378193743e-06, + "loss": 232.0011, + "step": 40980 + }, + { + "epoch": 0.7883412427096705, + "grad_norm": 1523.1982805396844, + "learning_rate": 2.1720455220364443e-06, + "loss": 225.8148, + "step": 40990 + }, + { + "epoch": 0.7885335679701512, + "grad_norm": 1652.9155537154872, + "learning_rate": 2.168249122037275e-06, + "loss": 223.9595, + "step": 41000 + }, + { + "epoch": 0.7887258932306317, + "grad_norm": 1451.996061054277, + "learning_rate": 2.1644556392359583e-06, + "loss": 225.9462, + "step": 41010 + }, + { + "epoch": 0.7889182184911122, + "grad_norm": 1420.226376785837, + "learning_rate": 2.160665075045508e-06, + "loss": 217.4813, + "step": 41020 + }, + { + "epoch": 0.7891105437515927, + "grad_norm": 1481.5532658731017, + "learning_rate": 2.1568774308778494e-06, + "loss": 229.8827, + "step": 41030 + }, + { + "epoch": 0.7893028690120732, + "grad_norm": 1565.4603306776348, + "learning_rate": 2.1530927081438148e-06, + "loss": 239.1232, + "step": 41040 + }, + { + "epoch": 0.7894951942725538, + "grad_norm": 1486.631631394588, + "learning_rate": 2.1493109082531473e-06, + "loss": 217.7836, + "step": 41050 + }, + { + "epoch": 0.7896875195330343, + "grad_norm": 1412.8829691386582, + "learning_rate": 2.1455320326145103e-06, + "loss": 221.3021, + "step": 41060 + }, + { + "epoch": 0.7898798447935148, + "grad_norm": 1387.1226392915962, + "learning_rate": 2.141756082635471e-06, + "loss": 224.7436, + "step": 41070 + }, + { + "epoch": 0.7900721700539953, + "grad_norm": 1453.4667795473927, + "learning_rate": 2.137983059722507e-06, + "loss": 218.7273, + "step": 41080 + }, + { + "epoch": 0.7902644953144758, + "grad_norm": 1596.264832146378, + "learning_rate": 2.1342129652810063e-06, + "loss": 225.6246, + "step": 41090 + }, + { + "epoch": 0.7904568205749564, + "grad_norm": 1658.0471103267905, + "learning_rate": 2.1304458007152694e-06, + "loss": 222.6364, + "step": 41100 + }, + { + "epoch": 0.7906491458354369, + "grad_norm": 1593.536466513159, + "learning_rate": 2.1266815674285026e-06, + "loss": 234.3179, + "step": 41110 + }, + { + "epoch": 0.7908414710959174, + "grad_norm": 1875.4041549400936, + "learning_rate": 2.1229202668228197e-06, + "loss": 237.6631, + "step": 41120 + }, + { + "epoch": 0.7910337963563979, + "grad_norm": 1421.251489425682, + "learning_rate": 2.1191619002992405e-06, + "loss": 218.0321, + "step": 41130 + }, + { + "epoch": 0.7912261216168784, + "grad_norm": 1855.742910843341, + "learning_rate": 2.1154064692577e-06, + "loss": 225.4099, + "step": 41140 + }, + { + "epoch": 0.791418446877359, + "grad_norm": 1457.0500531867021, + "learning_rate": 2.111653975097029e-06, + "loss": 223.2658, + "step": 41150 + }, + { + "epoch": 0.7916107721378395, + "grad_norm": 1588.1258031117427, + "learning_rate": 2.1079044192149713e-06, + "loss": 226.5197, + "step": 41160 + }, + { + "epoch": 0.79180309739832, + "grad_norm": 1478.0282946570778, + "learning_rate": 2.1041578030081777e-06, + "loss": 225.7318, + "step": 41170 + }, + { + "epoch": 0.7919954226588005, + "grad_norm": 1437.370431691596, + "learning_rate": 2.100414127872198e-06, + "loss": 221.009, + "step": 41180 + }, + { + "epoch": 0.792187747919281, + "grad_norm": 1615.315421894461, + "learning_rate": 2.0966733952014904e-06, + "loss": 233.6552, + "step": 41190 + }, + { + "epoch": 0.7923800731797617, + "grad_norm": 1333.4924681749596, + "learning_rate": 2.0929356063894125e-06, + "loss": 225.8451, + "step": 41200 + }, + { + "epoch": 0.7925723984402422, + "grad_norm": 1664.8868194383929, + "learning_rate": 2.089200762828234e-06, + "loss": 227.6047, + "step": 41210 + }, + { + "epoch": 0.7927647237007227, + "grad_norm": 1423.8189205683134, + "learning_rate": 2.0854688659091203e-06, + "loss": 228.7122, + "step": 41220 + }, + { + "epoch": 0.7929570489612032, + "grad_norm": 1473.3704616277328, + "learning_rate": 2.08173991702214e-06, + "loss": 218.8115, + "step": 41230 + }, + { + "epoch": 0.7931493742216837, + "grad_norm": 1610.98361990669, + "learning_rate": 2.0780139175562675e-06, + "loss": 228.3723, + "step": 41240 + }, + { + "epoch": 0.7933416994821643, + "grad_norm": 1597.2059162794196, + "learning_rate": 2.0742908688993746e-06, + "loss": 222.0698, + "step": 41250 + }, + { + "epoch": 0.7935340247426448, + "grad_norm": 1464.8980865615158, + "learning_rate": 2.070570772438236e-06, + "loss": 220.8634, + "step": 41260 + }, + { + "epoch": 0.7937263500031253, + "grad_norm": 1585.9093915142003, + "learning_rate": 2.066853629558524e-06, + "loss": 228.2862, + "step": 41270 + }, + { + "epoch": 0.7939186752636058, + "grad_norm": 1400.350859921243, + "learning_rate": 2.0631394416448157e-06, + "loss": 217.2972, + "step": 41280 + }, + { + "epoch": 0.7941110005240863, + "grad_norm": 1381.219744142666, + "learning_rate": 2.059428210080583e-06, + "loss": 219.7499, + "step": 41290 + }, + { + "epoch": 0.7943033257845669, + "grad_norm": 1631.2974577290759, + "learning_rate": 2.0557199362482005e-06, + "loss": 228.6004, + "step": 41300 + }, + { + "epoch": 0.7944956510450474, + "grad_norm": 1523.135496361386, + "learning_rate": 2.052014621528935e-06, + "loss": 225.3294, + "step": 41310 + }, + { + "epoch": 0.7946879763055279, + "grad_norm": 1443.709965686497, + "learning_rate": 2.048312267302961e-06, + "loss": 223.9784, + "step": 41320 + }, + { + "epoch": 0.7948803015660084, + "grad_norm": 1299.031510551774, + "learning_rate": 2.044612874949341e-06, + "loss": 221.0394, + "step": 41330 + }, + { + "epoch": 0.7950726268264889, + "grad_norm": 1524.2265805064194, + "learning_rate": 2.040916445846034e-06, + "loss": 232.1858, + "step": 41340 + }, + { + "epoch": 0.7952649520869695, + "grad_norm": 1603.2730902821102, + "learning_rate": 2.037222981369905e-06, + "loss": 222.1017, + "step": 41350 + }, + { + "epoch": 0.79545727734745, + "grad_norm": 1587.3299425360408, + "learning_rate": 2.033532482896707e-06, + "loss": 223.766, + "step": 41360 + }, + { + "epoch": 0.7956496026079305, + "grad_norm": 1795.0011121604073, + "learning_rate": 2.0298449518010875e-06, + "loss": 224.8888, + "step": 41370 + }, + { + "epoch": 0.795841927868411, + "grad_norm": 1633.8645199689108, + "learning_rate": 2.0261603894565897e-06, + "loss": 224.6307, + "step": 41380 + }, + { + "epoch": 0.7960342531288915, + "grad_norm": 1460.093598687193, + "learning_rate": 2.0224787972356574e-06, + "loss": 228.3823, + "step": 41390 + }, + { + "epoch": 0.796226578389372, + "grad_norm": 1368.5075483467213, + "learning_rate": 2.0188001765096198e-06, + "loss": 229.9522, + "step": 41400 + }, + { + "epoch": 0.7964189036498527, + "grad_norm": 1474.3032963938929, + "learning_rate": 2.0151245286486998e-06, + "loss": 225.8562, + "step": 41410 + }, + { + "epoch": 0.7966112289103332, + "grad_norm": 1457.0164609325093, + "learning_rate": 2.011451855022021e-06, + "loss": 229.4697, + "step": 41420 + }, + { + "epoch": 0.7968035541708137, + "grad_norm": 1436.3632642150014, + "learning_rate": 2.0077821569975885e-06, + "loss": 225.8121, + "step": 41430 + }, + { + "epoch": 0.7969958794312942, + "grad_norm": 1513.7343517483034, + "learning_rate": 2.0041154359423087e-06, + "loss": 232.8264, + "step": 41440 + }, + { + "epoch": 0.7971882046917748, + "grad_norm": 1610.768583100523, + "learning_rate": 2.000451693221971e-06, + "loss": 232.3558, + "step": 41450 + }, + { + "epoch": 0.7973805299522553, + "grad_norm": 1437.5389649037047, + "learning_rate": 1.9967909302012635e-06, + "loss": 223.8995, + "step": 41460 + }, + { + "epoch": 0.7975728552127358, + "grad_norm": 1443.5420866599811, + "learning_rate": 1.9931331482437553e-06, + "loss": 226.6861, + "step": 41470 + }, + { + "epoch": 0.7977651804732163, + "grad_norm": 1414.388069525892, + "learning_rate": 1.989478348711913e-06, + "loss": 223.2291, + "step": 41480 + }, + { + "epoch": 0.7979575057336968, + "grad_norm": 1537.3167023340775, + "learning_rate": 1.9858265329670844e-06, + "loss": 223.2962, + "step": 41490 + }, + { + "epoch": 0.7981498309941774, + "grad_norm": 1459.426850248904, + "learning_rate": 1.9821777023695178e-06, + "loss": 224.2818, + "step": 41500 + }, + { + "epoch": 0.7983421562546579, + "grad_norm": 1378.8985638760314, + "learning_rate": 1.9785318582783375e-06, + "loss": 223.5442, + "step": 41510 + }, + { + "epoch": 0.7985344815151384, + "grad_norm": 1365.9414691439122, + "learning_rate": 1.9748890020515577e-06, + "loss": 226.3647, + "step": 41520 + }, + { + "epoch": 0.7987268067756189, + "grad_norm": 1406.3202830468524, + "learning_rate": 1.9712491350460895e-06, + "loss": 220.562, + "step": 41530 + }, + { + "epoch": 0.7989191320360994, + "grad_norm": 1353.166381529778, + "learning_rate": 1.967612258617718e-06, + "loss": 221.5289, + "step": 41540 + }, + { + "epoch": 0.79911145729658, + "grad_norm": 1626.867499781207, + "learning_rate": 1.9639783741211218e-06, + "loss": 226.8763, + "step": 41550 + }, + { + "epoch": 0.7993037825570605, + "grad_norm": 1480.278065129493, + "learning_rate": 1.960347482909859e-06, + "loss": 226.7404, + "step": 41560 + }, + { + "epoch": 0.799496107817541, + "grad_norm": 1572.5062905441623, + "learning_rate": 1.956719586336382e-06, + "loss": 230.2421, + "step": 41570 + }, + { + "epoch": 0.7996884330780215, + "grad_norm": 1474.517313489486, + "learning_rate": 1.953094685752017e-06, + "loss": 226.2306, + "step": 41580 + }, + { + "epoch": 0.799880758338502, + "grad_norm": 1400.2555283665315, + "learning_rate": 1.949472782506984e-06, + "loss": 228.999, + "step": 41590 + }, + { + "epoch": 0.8000730835989825, + "grad_norm": 1734.7104279328498, + "learning_rate": 1.945853877950382e-06, + "loss": 236.8713, + "step": 41600 + }, + { + "epoch": 0.8002654088594632, + "grad_norm": 1705.9170945908565, + "learning_rate": 1.942237973430192e-06, + "loss": 231.3811, + "step": 41610 + }, + { + "epoch": 0.8004577341199437, + "grad_norm": 1619.689816450686, + "learning_rate": 1.9386250702932784e-06, + "loss": 228.5134, + "step": 41620 + }, + { + "epoch": 0.8006500593804242, + "grad_norm": 1423.624254683205, + "learning_rate": 1.9350151698853857e-06, + "loss": 228.8275, + "step": 41630 + }, + { + "epoch": 0.8008423846409047, + "grad_norm": 1381.19426376922, + "learning_rate": 1.9314082735511475e-06, + "loss": 225.8395, + "step": 41640 + }, + { + "epoch": 0.8010347099013853, + "grad_norm": 1518.1720519766825, + "learning_rate": 1.92780438263407e-06, + "loss": 221.9989, + "step": 41650 + }, + { + "epoch": 0.8012270351618658, + "grad_norm": 1322.3398325351816, + "learning_rate": 1.9242034984765436e-06, + "loss": 226.5761, + "step": 41660 + }, + { + "epoch": 0.8014193604223463, + "grad_norm": 1410.095477432377, + "learning_rate": 1.9206056224198346e-06, + "loss": 220.5587, + "step": 41670 + }, + { + "epoch": 0.8016116856828268, + "grad_norm": 1541.3107475864845, + "learning_rate": 1.9170107558040983e-06, + "loss": 233.8152, + "step": 41680 + }, + { + "epoch": 0.8018040109433073, + "grad_norm": 1450.8107893911642, + "learning_rate": 1.9134188999683613e-06, + "loss": 219.7217, + "step": 41690 + }, + { + "epoch": 0.8019963362037879, + "grad_norm": 1512.6253332264887, + "learning_rate": 1.9098300562505266e-06, + "loss": 224.997, + "step": 41700 + }, + { + "epoch": 0.8021886614642684, + "grad_norm": 1432.0950359231251, + "learning_rate": 1.9062442259873847e-06, + "loss": 228.6781, + "step": 41710 + }, + { + "epoch": 0.8023809867247489, + "grad_norm": 1612.6722181101088, + "learning_rate": 1.9026614105145935e-06, + "loss": 233.8999, + "step": 41720 + }, + { + "epoch": 0.8025733119852294, + "grad_norm": 1468.310800618677, + "learning_rate": 1.8990816111666976e-06, + "loss": 225.3823, + "step": 41730 + }, + { + "epoch": 0.8027656372457099, + "grad_norm": 1398.6367937041161, + "learning_rate": 1.8955048292771083e-06, + "loss": 222.4204, + "step": 41740 + }, + { + "epoch": 0.8029579625061904, + "grad_norm": 1540.886255575622, + "learning_rate": 1.891931066178122e-06, + "loss": 217.259, + "step": 41750 + }, + { + "epoch": 0.803150287766671, + "grad_norm": 1517.518704529034, + "learning_rate": 1.888360323200904e-06, + "loss": 216.4562, + "step": 41760 + }, + { + "epoch": 0.8033426130271515, + "grad_norm": 2003.9853512557702, + "learning_rate": 1.8847926016754947e-06, + "loss": 223.3393, + "step": 41770 + }, + { + "epoch": 0.803534938287632, + "grad_norm": 1663.3804137696186, + "learning_rate": 1.8812279029308177e-06, + "loss": 223.0596, + "step": 41780 + }, + { + "epoch": 0.8037272635481125, + "grad_norm": 1407.2459427677816, + "learning_rate": 1.87766622829466e-06, + "loss": 226.0139, + "step": 41790 + }, + { + "epoch": 0.803919588808593, + "grad_norm": 1568.0152663864164, + "learning_rate": 1.874107579093688e-06, + "loss": 236.8645, + "step": 41800 + }, + { + "epoch": 0.8041119140690736, + "grad_norm": 1468.331395695032, + "learning_rate": 1.870551956653437e-06, + "loss": 230.0562, + "step": 41810 + }, + { + "epoch": 0.8043042393295542, + "grad_norm": 1485.1079855934788, + "learning_rate": 1.8669993622983217e-06, + "loss": 230.1099, + "step": 41820 + }, + { + "epoch": 0.8044965645900347, + "grad_norm": 1353.9816137716862, + "learning_rate": 1.863449797351624e-06, + "loss": 224.6421, + "step": 41830 + }, + { + "epoch": 0.8046888898505152, + "grad_norm": 1438.6240927843733, + "learning_rate": 1.8599032631354963e-06, + "loss": 227.7561, + "step": 41840 + }, + { + "epoch": 0.8048812151109958, + "grad_norm": 1842.5025861498002, + "learning_rate": 1.8563597609709626e-06, + "loss": 234.0146, + "step": 41850 + }, + { + "epoch": 0.8050735403714763, + "grad_norm": 1443.658980293271, + "learning_rate": 1.852819292177922e-06, + "loss": 227.0709, + "step": 41860 + }, + { + "epoch": 0.8052658656319568, + "grad_norm": 1760.242506004807, + "learning_rate": 1.8492818580751414e-06, + "loss": 236.386, + "step": 41870 + }, + { + "epoch": 0.8054581908924373, + "grad_norm": 1478.5638971645387, + "learning_rate": 1.8457474599802527e-06, + "loss": 220.4513, + "step": 41880 + }, + { + "epoch": 0.8056505161529178, + "grad_norm": 1570.9939045590684, + "learning_rate": 1.842216099209767e-06, + "loss": 230.289, + "step": 41890 + }, + { + "epoch": 0.8058428414133983, + "grad_norm": 1355.8225959178471, + "learning_rate": 1.8386877770790524e-06, + "loss": 223.0868, + "step": 41900 + }, + { + "epoch": 0.8060351666738789, + "grad_norm": 1718.523060136808, + "learning_rate": 1.8351624949023539e-06, + "loss": 232.3824, + "step": 41910 + }, + { + "epoch": 0.8062274919343594, + "grad_norm": 1451.5035113607748, + "learning_rate": 1.8316402539927757e-06, + "loss": 227.675, + "step": 41920 + }, + { + "epoch": 0.8064198171948399, + "grad_norm": 1538.859997449665, + "learning_rate": 1.8281210556623007e-06, + "loss": 220.1817, + "step": 41930 + }, + { + "epoch": 0.8066121424553204, + "grad_norm": 1464.86440166237, + "learning_rate": 1.8246049012217693e-06, + "loss": 221.5458, + "step": 41940 + }, + { + "epoch": 0.8068044677158009, + "grad_norm": 1512.595131305657, + "learning_rate": 1.8210917919808891e-06, + "loss": 230.8413, + "step": 41950 + }, + { + "epoch": 0.8069967929762815, + "grad_norm": 1482.5182904132826, + "learning_rate": 1.817581729248239e-06, + "loss": 225.2418, + "step": 41960 + }, + { + "epoch": 0.807189118236762, + "grad_norm": 1725.5288490985367, + "learning_rate": 1.8140747143312588e-06, + "loss": 224.3757, + "step": 41970 + }, + { + "epoch": 0.8073814434972425, + "grad_norm": 1571.8246060725655, + "learning_rate": 1.8105707485362511e-06, + "loss": 217.8115, + "step": 41980 + }, + { + "epoch": 0.807573768757723, + "grad_norm": 1374.5560541387367, + "learning_rate": 1.8070698331683844e-06, + "loss": 225.0413, + "step": 41990 + }, + { + "epoch": 0.8077660940182035, + "grad_norm": 1430.9684476880302, + "learning_rate": 1.8035719695316955e-06, + "loss": 233.4534, + "step": 42000 + }, + { + "epoch": 0.8079584192786841, + "grad_norm": 1565.0280665823886, + "learning_rate": 1.800077158929081e-06, + "loss": 225.7904, + "step": 42010 + }, + { + "epoch": 0.8081507445391647, + "grad_norm": 1473.0317998942007, + "learning_rate": 1.7965854026622953e-06, + "loss": 222.2656, + "step": 42020 + }, + { + "epoch": 0.8083430697996452, + "grad_norm": 1615.1995173455418, + "learning_rate": 1.7930967020319667e-06, + "loss": 228.0199, + "step": 42030 + }, + { + "epoch": 0.8085353950601257, + "grad_norm": 1425.7883677433506, + "learning_rate": 1.7896110583375747e-06, + "loss": 216.8364, + "step": 42040 + }, + { + "epoch": 0.8087277203206062, + "grad_norm": 1339.312100319773, + "learning_rate": 1.7861284728774652e-06, + "loss": 224.1174, + "step": 42050 + }, + { + "epoch": 0.8089200455810868, + "grad_norm": 1388.5394112114393, + "learning_rate": 1.7826489469488395e-06, + "loss": 221.5267, + "step": 42060 + }, + { + "epoch": 0.8091123708415673, + "grad_norm": 1595.4362923310557, + "learning_rate": 1.7791724818477708e-06, + "loss": 222.603, + "step": 42070 + }, + { + "epoch": 0.8093046961020478, + "grad_norm": 1597.9080246972799, + "learning_rate": 1.7756990788691797e-06, + "loss": 220.4012, + "step": 42080 + }, + { + "epoch": 0.8094970213625283, + "grad_norm": 1392.3267254806283, + "learning_rate": 1.772228739306854e-06, + "loss": 224.5479, + "step": 42090 + }, + { + "epoch": 0.8096893466230088, + "grad_norm": 1491.138464024459, + "learning_rate": 1.7687614644534333e-06, + "loss": 221.3646, + "step": 42100 + }, + { + "epoch": 0.8098816718834894, + "grad_norm": 1676.4568754630063, + "learning_rate": 1.7652972556004267e-06, + "loss": 230.1084, + "step": 42110 + }, + { + "epoch": 0.8100739971439699, + "grad_norm": 1533.1927379000833, + "learning_rate": 1.7618361140381922e-06, + "loss": 234.9796, + "step": 42120 + }, + { + "epoch": 0.8102663224044504, + "grad_norm": 1694.2491070808348, + "learning_rate": 1.7583780410559449e-06, + "loss": 226.4873, + "step": 42130 + }, + { + "epoch": 0.8104586476649309, + "grad_norm": 1452.8672302265786, + "learning_rate": 1.7549230379417636e-06, + "loss": 226.541, + "step": 42140 + }, + { + "epoch": 0.8106509729254114, + "grad_norm": 1499.1505710043314, + "learning_rate": 1.7514711059825773e-06, + "loss": 220.3394, + "step": 42150 + }, + { + "epoch": 0.810843298185892, + "grad_norm": 1533.378060626979, + "learning_rate": 1.7480222464641783e-06, + "loss": 236.7391, + "step": 42160 + }, + { + "epoch": 0.8110356234463725, + "grad_norm": 1482.6842334290056, + "learning_rate": 1.7445764606712024e-06, + "loss": 221.3659, + "step": 42170 + }, + { + "epoch": 0.811227948706853, + "grad_norm": 1559.034946778375, + "learning_rate": 1.7411337498871561e-06, + "loss": 222.6689, + "step": 42180 + }, + { + "epoch": 0.8114202739673335, + "grad_norm": 1637.9939160450422, + "learning_rate": 1.737694115394387e-06, + "loss": 228.9204, + "step": 42190 + }, + { + "epoch": 0.811612599227814, + "grad_norm": 1509.8790838070415, + "learning_rate": 1.7342575584741018e-06, + "loss": 223.104, + "step": 42200 + }, + { + "epoch": 0.8118049244882946, + "grad_norm": 1635.9237807301313, + "learning_rate": 1.7308240804063648e-06, + "loss": 223.4572, + "step": 42210 + }, + { + "epoch": 0.8119972497487751, + "grad_norm": 1379.2380085877755, + "learning_rate": 1.7273936824700888e-06, + "loss": 230.1087, + "step": 42220 + }, + { + "epoch": 0.8121895750092557, + "grad_norm": 1305.2883850681742, + "learning_rate": 1.7239663659430384e-06, + "loss": 225.0166, + "step": 42230 + }, + { + "epoch": 0.8123819002697362, + "grad_norm": 1556.707919190861, + "learning_rate": 1.7205421321018312e-06, + "loss": 230.2891, + "step": 42240 + }, + { + "epoch": 0.8125742255302167, + "grad_norm": 1424.9442666222474, + "learning_rate": 1.7171209822219427e-06, + "loss": 232.6741, + "step": 42250 + }, + { + "epoch": 0.8127665507906973, + "grad_norm": 1508.260282414269, + "learning_rate": 1.713702917577692e-06, + "loss": 227.0938, + "step": 42260 + }, + { + "epoch": 0.8129588760511778, + "grad_norm": 1428.3730558534426, + "learning_rate": 1.71028793944225e-06, + "loss": 223.3841, + "step": 42270 + }, + { + "epoch": 0.8131512013116583, + "grad_norm": 1429.5273679361626, + "learning_rate": 1.7068760490876425e-06, + "loss": 223.1546, + "step": 42280 + }, + { + "epoch": 0.8133435265721388, + "grad_norm": 1482.4694123804115, + "learning_rate": 1.7034672477847402e-06, + "loss": 225.6837, + "step": 42290 + }, + { + "epoch": 0.8135358518326193, + "grad_norm": 1380.4252499273334, + "learning_rate": 1.700061536803268e-06, + "loss": 221.5166, + "step": 42300 + }, + { + "epoch": 0.8137281770930999, + "grad_norm": 1901.3136030811497, + "learning_rate": 1.696658917411793e-06, + "loss": 222.6794, + "step": 42310 + }, + { + "epoch": 0.8139205023535804, + "grad_norm": 1365.8335175244717, + "learning_rate": 1.6932593908777394e-06, + "loss": 227.1507, + "step": 42320 + }, + { + "epoch": 0.8141128276140609, + "grad_norm": 1375.7556180125393, + "learning_rate": 1.689862958467372e-06, + "loss": 221.4263, + "step": 42330 + }, + { + "epoch": 0.8143051528745414, + "grad_norm": 1626.2539559401287, + "learning_rate": 1.6864696214458065e-06, + "loss": 225.4705, + "step": 42340 + }, + { + "epoch": 0.8144974781350219, + "grad_norm": 1434.6336067280931, + "learning_rate": 1.683079381077003e-06, + "loss": 229.2176, + "step": 42350 + }, + { + "epoch": 0.8146898033955025, + "grad_norm": 1724.9467242078645, + "learning_rate": 1.6796922386237724e-06, + "loss": 231.7839, + "step": 42360 + }, + { + "epoch": 0.814882128655983, + "grad_norm": 1568.501378833988, + "learning_rate": 1.67630819534777e-06, + "loss": 226.6076, + "step": 42370 + }, + { + "epoch": 0.8150744539164635, + "grad_norm": 1670.451033621797, + "learning_rate": 1.6729272525094908e-06, + "loss": 226.68, + "step": 42380 + }, + { + "epoch": 0.815266779176944, + "grad_norm": 1510.4298470406395, + "learning_rate": 1.6695494113682874e-06, + "loss": 228.1382, + "step": 42390 + }, + { + "epoch": 0.8154591044374245, + "grad_norm": 1544.1046478193641, + "learning_rate": 1.6661746731823458e-06, + "loss": 221.8056, + "step": 42400 + }, + { + "epoch": 0.815651429697905, + "grad_norm": 1409.7897076184765, + "learning_rate": 1.6628030392087001e-06, + "loss": 227.2639, + "step": 42410 + }, + { + "epoch": 0.8158437549583856, + "grad_norm": 1441.3276676767996, + "learning_rate": 1.6594345107032273e-06, + "loss": 230.0636, + "step": 42420 + }, + { + "epoch": 0.8160360802188662, + "grad_norm": 1467.6132957286763, + "learning_rate": 1.6560690889206499e-06, + "loss": 229.1264, + "step": 42430 + }, + { + "epoch": 0.8162284054793467, + "grad_norm": 1403.3290535279677, + "learning_rate": 1.6527067751145354e-06, + "loss": 218.5342, + "step": 42440 + }, + { + "epoch": 0.8164207307398272, + "grad_norm": 1554.803857859893, + "learning_rate": 1.6493475705372863e-06, + "loss": 224.7972, + "step": 42450 + }, + { + "epoch": 0.8166130560003078, + "grad_norm": 1568.6354465635306, + "learning_rate": 1.6459914764401497e-06, + "loss": 230.3151, + "step": 42460 + }, + { + "epoch": 0.8168053812607883, + "grad_norm": 1645.2967112572787, + "learning_rate": 1.64263849407322e-06, + "loss": 225.4138, + "step": 42470 + }, + { + "epoch": 0.8169977065212688, + "grad_norm": 1417.7049701690194, + "learning_rate": 1.6392886246854234e-06, + "loss": 213.743, + "step": 42480 + }, + { + "epoch": 0.8171900317817493, + "grad_norm": 1582.4693283726072, + "learning_rate": 1.6359418695245311e-06, + "loss": 221.4301, + "step": 42490 + }, + { + "epoch": 0.8173823570422298, + "grad_norm": 1482.2087313161098, + "learning_rate": 1.632598229837158e-06, + "loss": 225.2374, + "step": 42500 + }, + { + "epoch": 0.8175746823027104, + "grad_norm": 1442.3268427688743, + "learning_rate": 1.629257706868751e-06, + "loss": 223.0, + "step": 42510 + }, + { + "epoch": 0.8177670075631909, + "grad_norm": 1407.8046978948455, + "learning_rate": 1.6259203018636016e-06, + "loss": 222.0701, + "step": 42520 + }, + { + "epoch": 0.8179593328236714, + "grad_norm": 1520.653430167389, + "learning_rate": 1.6225860160648343e-06, + "loss": 231.2354, + "step": 42530 + }, + { + "epoch": 0.8181516580841519, + "grad_norm": 1449.6154322147395, + "learning_rate": 1.6192548507144213e-06, + "loss": 227.8629, + "step": 42540 + }, + { + "epoch": 0.8183439833446324, + "grad_norm": 1433.257553441374, + "learning_rate": 1.6159268070531642e-06, + "loss": 219.5006, + "step": 42550 + }, + { + "epoch": 0.818536308605113, + "grad_norm": 1654.1740475795948, + "learning_rate": 1.6126018863207005e-06, + "loss": 232.3285, + "step": 42560 + }, + { + "epoch": 0.8187286338655935, + "grad_norm": 1476.4215860476145, + "learning_rate": 1.609280089755515e-06, + "loss": 226.2384, + "step": 42570 + }, + { + "epoch": 0.818920959126074, + "grad_norm": 1614.6135528003551, + "learning_rate": 1.6059614185949157e-06, + "loss": 226.9501, + "step": 42580 + }, + { + "epoch": 0.8191132843865545, + "grad_norm": 1451.2994149600986, + "learning_rate": 1.6026458740750584e-06, + "loss": 214.5616, + "step": 42590 + }, + { + "epoch": 0.819305609647035, + "grad_norm": 1410.1708830708812, + "learning_rate": 1.5993334574309238e-06, + "loss": 228.6564, + "step": 42600 + }, + { + "epoch": 0.8194979349075155, + "grad_norm": 1427.8235152665707, + "learning_rate": 1.5960241698963374e-06, + "loss": 218.9107, + "step": 42610 + }, + { + "epoch": 0.8196902601679961, + "grad_norm": 1438.13525418863, + "learning_rate": 1.592718012703951e-06, + "loss": 223.5326, + "step": 42620 + }, + { + "epoch": 0.8198825854284766, + "grad_norm": 1436.9970690690038, + "learning_rate": 1.589414987085255e-06, + "loss": 216.0942, + "step": 42630 + }, + { + "epoch": 0.8200749106889572, + "grad_norm": 1440.1952059084329, + "learning_rate": 1.5861150942705672e-06, + "loss": 223.2584, + "step": 42640 + }, + { + "epoch": 0.8202672359494377, + "grad_norm": 1310.5762976050596, + "learning_rate": 1.5828183354890504e-06, + "loss": 216.1386, + "step": 42650 + }, + { + "epoch": 0.8204595612099183, + "grad_norm": 1314.2962857885525, + "learning_rate": 1.5795247119686885e-06, + "loss": 217.2636, + "step": 42660 + }, + { + "epoch": 0.8206518864703988, + "grad_norm": 1648.8155214634844, + "learning_rate": 1.5762342249363006e-06, + "loss": 229.1493, + "step": 42670 + }, + { + "epoch": 0.8208442117308793, + "grad_norm": 1380.3027020197358, + "learning_rate": 1.5729468756175426e-06, + "loss": 215.5619, + "step": 42680 + }, + { + "epoch": 0.8210365369913598, + "grad_norm": 1497.7760674089052, + "learning_rate": 1.5696626652368973e-06, + "loss": 220.6415, + "step": 42690 + }, + { + "epoch": 0.8212288622518403, + "grad_norm": 1531.6529123032683, + "learning_rate": 1.5663815950176742e-06, + "loss": 225.0178, + "step": 42700 + }, + { + "epoch": 0.8214211875123208, + "grad_norm": 1496.7894727302607, + "learning_rate": 1.5631036661820232e-06, + "loss": 229.5158, + "step": 42710 + }, + { + "epoch": 0.8216135127728014, + "grad_norm": 1310.4907574438366, + "learning_rate": 1.5598288799509153e-06, + "loss": 220.1782, + "step": 42720 + }, + { + "epoch": 0.8218058380332819, + "grad_norm": 1465.0493818727255, + "learning_rate": 1.5565572375441573e-06, + "loss": 222.9248, + "step": 42730 + }, + { + "epoch": 0.8219981632937624, + "grad_norm": 1699.6861573890478, + "learning_rate": 1.5532887401803787e-06, + "loss": 225.4941, + "step": 42740 + }, + { + "epoch": 0.8221904885542429, + "grad_norm": 1474.509262915218, + "learning_rate": 1.5500233890770434e-06, + "loss": 223.9475, + "step": 42750 + }, + { + "epoch": 0.8223828138147234, + "grad_norm": 1479.9237445482188, + "learning_rate": 1.5467611854504406e-06, + "loss": 218.3563, + "step": 42760 + }, + { + "epoch": 0.822575139075204, + "grad_norm": 1478.6754869003344, + "learning_rate": 1.5435021305156862e-06, + "loss": 225.0581, + "step": 42770 + }, + { + "epoch": 0.8227674643356845, + "grad_norm": 1434.4670315392, + "learning_rate": 1.5402462254867222e-06, + "loss": 230.8776, + "step": 42780 + }, + { + "epoch": 0.822959789596165, + "grad_norm": 1617.2008651536803, + "learning_rate": 1.5369934715763235e-06, + "loss": 225.4059, + "step": 42790 + }, + { + "epoch": 0.8231521148566455, + "grad_norm": 1342.971304790718, + "learning_rate": 1.533743869996086e-06, + "loss": 219.8375, + "step": 42800 + }, + { + "epoch": 0.823344440117126, + "grad_norm": 1564.6699496237663, + "learning_rate": 1.5304974219564318e-06, + "loss": 222.778, + "step": 42810 + }, + { + "epoch": 0.8235367653776066, + "grad_norm": 1449.2396650691437, + "learning_rate": 1.5272541286666075e-06, + "loss": 219.3594, + "step": 42820 + }, + { + "epoch": 0.8237290906380871, + "grad_norm": 1502.664272263605, + "learning_rate": 1.5240139913346906e-06, + "loss": 224.089, + "step": 42830 + }, + { + "epoch": 0.8239214158985677, + "grad_norm": 1689.1833276265363, + "learning_rate": 1.5207770111675735e-06, + "loss": 230.1453, + "step": 42840 + }, + { + "epoch": 0.8241137411590482, + "grad_norm": 1334.5046971311374, + "learning_rate": 1.5175431893709836e-06, + "loss": 225.4314, + "step": 42850 + }, + { + "epoch": 0.8243060664195287, + "grad_norm": 1523.1610938396905, + "learning_rate": 1.5143125271494607e-06, + "loss": 221.144, + "step": 42860 + }, + { + "epoch": 0.8244983916800093, + "grad_norm": 1497.6574033801644, + "learning_rate": 1.511085025706378e-06, + "loss": 221.9221, + "step": 42870 + }, + { + "epoch": 0.8246907169404898, + "grad_norm": 1438.2043595609148, + "learning_rate": 1.5078606862439248e-06, + "loss": 220.46, + "step": 42880 + }, + { + "epoch": 0.8248830422009703, + "grad_norm": 1564.0477266607927, + "learning_rate": 1.5046395099631106e-06, + "loss": 224.4029, + "step": 42890 + }, + { + "epoch": 0.8250753674614508, + "grad_norm": 1635.0379462651622, + "learning_rate": 1.5014214980637754e-06, + "loss": 221.3462, + "step": 42900 + }, + { + "epoch": 0.8252676927219313, + "grad_norm": 1551.5149649277223, + "learning_rate": 1.4982066517445748e-06, + "loss": 219.9687, + "step": 42910 + }, + { + "epoch": 0.8254600179824119, + "grad_norm": 1707.4302422145995, + "learning_rate": 1.4949949722029811e-06, + "loss": 222.5139, + "step": 42920 + }, + { + "epoch": 0.8256523432428924, + "grad_norm": 1445.9578331543678, + "learning_rate": 1.4917864606352983e-06, + "loss": 233.4687, + "step": 42930 + }, + { + "epoch": 0.8258446685033729, + "grad_norm": 1599.6250489434442, + "learning_rate": 1.4885811182366406e-06, + "loss": 228.2906, + "step": 42940 + }, + { + "epoch": 0.8260369937638534, + "grad_norm": 1409.1313919489892, + "learning_rate": 1.485378946200946e-06, + "loss": 223.3477, + "step": 42950 + }, + { + "epoch": 0.8262293190243339, + "grad_norm": 1337.6787003616673, + "learning_rate": 1.4821799457209684e-06, + "loss": 220.3701, + "step": 42960 + }, + { + "epoch": 0.8264216442848145, + "grad_norm": 1462.1516407910956, + "learning_rate": 1.478984117988287e-06, + "loss": 224.9028, + "step": 42970 + }, + { + "epoch": 0.826613969545295, + "grad_norm": 1461.5549226889477, + "learning_rate": 1.4757914641932924e-06, + "loss": 219.3262, + "step": 42980 + }, + { + "epoch": 0.8268062948057755, + "grad_norm": 1452.2664113251153, + "learning_rate": 1.4726019855251928e-06, + "loss": 224.7554, + "step": 42990 + }, + { + "epoch": 0.826998620066256, + "grad_norm": 1406.4120536350338, + "learning_rate": 1.4694156831720185e-06, + "loss": 221.8885, + "step": 43000 + }, + { + "epoch": 0.8271909453267365, + "grad_norm": 1441.9756662111256, + "learning_rate": 1.4662325583206172e-06, + "loss": 228.0354, + "step": 43010 + }, + { + "epoch": 0.8273832705872171, + "grad_norm": 1452.276030064802, + "learning_rate": 1.463052612156649e-06, + "loss": 226.7396, + "step": 43020 + }, + { + "epoch": 0.8275755958476976, + "grad_norm": 1589.3922746547805, + "learning_rate": 1.4598758458645878e-06, + "loss": 226.6761, + "step": 43030 + }, + { + "epoch": 0.8277679211081782, + "grad_norm": 1606.813383999375, + "learning_rate": 1.4567022606277314e-06, + "loss": 222.9822, + "step": 43040 + }, + { + "epoch": 0.8279602463686587, + "grad_norm": 1482.623935353459, + "learning_rate": 1.4535318576281854e-06, + "loss": 219.5229, + "step": 43050 + }, + { + "epoch": 0.8281525716291392, + "grad_norm": 1470.459030106924, + "learning_rate": 1.4503646380468729e-06, + "loss": 219.3171, + "step": 43060 + }, + { + "epoch": 0.8283448968896198, + "grad_norm": 1678.8660847012534, + "learning_rate": 1.4472006030635288e-06, + "loss": 225.4163, + "step": 43070 + }, + { + "epoch": 0.8285372221501003, + "grad_norm": 1715.8514913998072, + "learning_rate": 1.4440397538567086e-06, + "loss": 227.607, + "step": 43080 + }, + { + "epoch": 0.8287295474105808, + "grad_norm": 1472.8729075439112, + "learning_rate": 1.4408820916037735e-06, + "loss": 222.1429, + "step": 43090 + }, + { + "epoch": 0.8289218726710613, + "grad_norm": 1623.528660479627, + "learning_rate": 1.4377276174808984e-06, + "loss": 221.4274, + "step": 43100 + }, + { + "epoch": 0.8291141979315418, + "grad_norm": 1559.8267466951684, + "learning_rate": 1.4345763326630768e-06, + "loss": 223.9511, + "step": 43110 + }, + { + "epoch": 0.8293065231920224, + "grad_norm": 1526.2159550879214, + "learning_rate": 1.4314282383241097e-06, + "loss": 221.1235, + "step": 43120 + }, + { + "epoch": 0.8294988484525029, + "grad_norm": 1578.8018533864104, + "learning_rate": 1.4282833356366066e-06, + "loss": 222.0095, + "step": 43130 + }, + { + "epoch": 0.8296911737129834, + "grad_norm": 1300.7080183334708, + "learning_rate": 1.4251416257719962e-06, + "loss": 219.9324, + "step": 43140 + }, + { + "epoch": 0.8298834989734639, + "grad_norm": 1772.3218074526933, + "learning_rate": 1.4220031099005094e-06, + "loss": 223.9319, + "step": 43150 + }, + { + "epoch": 0.8300758242339444, + "grad_norm": 1409.969836447749, + "learning_rate": 1.4188677891911961e-06, + "loss": 215.4104, + "step": 43160 + }, + { + "epoch": 0.830268149494425, + "grad_norm": 1550.5193590378076, + "learning_rate": 1.4157356648119103e-06, + "loss": 218.971, + "step": 43170 + }, + { + "epoch": 0.8304604747549055, + "grad_norm": 1563.572617345743, + "learning_rate": 1.412606737929313e-06, + "loss": 227.1725, + "step": 43180 + }, + { + "epoch": 0.830652800015386, + "grad_norm": 1311.857591919677, + "learning_rate": 1.4094810097088817e-06, + "loss": 224.0366, + "step": 43190 + }, + { + "epoch": 0.8308451252758665, + "grad_norm": 1407.0813132596777, + "learning_rate": 1.4063584813148979e-06, + "loss": 220.8986, + "step": 43200 + }, + { + "epoch": 0.831037450536347, + "grad_norm": 1509.6331052617006, + "learning_rate": 1.4032391539104484e-06, + "loss": 225.7195, + "step": 43210 + }, + { + "epoch": 0.8312297757968276, + "grad_norm": 1436.965555732776, + "learning_rate": 1.4001230286574363e-06, + "loss": 224.4622, + "step": 43220 + }, + { + "epoch": 0.8314221010573081, + "grad_norm": 1604.1059247403773, + "learning_rate": 1.3970101067165642e-06, + "loss": 222.9508, + "step": 43230 + }, + { + "epoch": 0.8316144263177886, + "grad_norm": 1426.6876703718547, + "learning_rate": 1.3939003892473446e-06, + "loss": 219.5681, + "step": 43240 + }, + { + "epoch": 0.8318067515782692, + "grad_norm": 1951.9408808587825, + "learning_rate": 1.390793877408093e-06, + "loss": 225.5036, + "step": 43250 + }, + { + "epoch": 0.8319990768387497, + "grad_norm": 1360.2465553514594, + "learning_rate": 1.3876905723559397e-06, + "loss": 219.0112, + "step": 43260 + }, + { + "epoch": 0.8321914020992303, + "grad_norm": 1403.5484396162897, + "learning_rate": 1.3845904752468075e-06, + "loss": 225.2093, + "step": 43270 + }, + { + "epoch": 0.8323837273597108, + "grad_norm": 1591.7749258548881, + "learning_rate": 1.3814935872354385e-06, + "loss": 220.1429, + "step": 43280 + }, + { + "epoch": 0.8325760526201913, + "grad_norm": 1395.5504830242419, + "learning_rate": 1.3783999094753653e-06, + "loss": 222.5725, + "step": 43290 + }, + { + "epoch": 0.8327683778806718, + "grad_norm": 1478.6462610159524, + "learning_rate": 1.3753094431189385e-06, + "loss": 227.9779, + "step": 43300 + }, + { + "epoch": 0.8329607031411523, + "grad_norm": 1328.720582165635, + "learning_rate": 1.3722221893173027e-06, + "loss": 215.7067, + "step": 43310 + }, + { + "epoch": 0.8331530284016329, + "grad_norm": 1331.731828943892, + "learning_rate": 1.369138149220407e-06, + "loss": 222.5609, + "step": 43320 + }, + { + "epoch": 0.8333453536621134, + "grad_norm": 1525.8811052669473, + "learning_rate": 1.3660573239770091e-06, + "loss": 226.8789, + "step": 43330 + }, + { + "epoch": 0.8335376789225939, + "grad_norm": 1507.0222285506968, + "learning_rate": 1.3629797147346635e-06, + "loss": 231.5131, + "step": 43340 + }, + { + "epoch": 0.8337300041830744, + "grad_norm": 1518.3584506104048, + "learning_rate": 1.3599053226397275e-06, + "loss": 226.9579, + "step": 43350 + }, + { + "epoch": 0.8339223294435549, + "grad_norm": 1421.9651108724152, + "learning_rate": 1.3568341488373637e-06, + "loss": 225.9569, + "step": 43360 + }, + { + "epoch": 0.8341146547040355, + "grad_norm": 1473.318928649225, + "learning_rate": 1.3537661944715342e-06, + "loss": 221.6633, + "step": 43370 + }, + { + "epoch": 0.834306979964516, + "grad_norm": 1378.9753465096528, + "learning_rate": 1.350701460685e-06, + "loss": 227.3245, + "step": 43380 + }, + { + "epoch": 0.8344993052249965, + "grad_norm": 1551.987471309156, + "learning_rate": 1.3476399486193214e-06, + "loss": 219.946, + "step": 43390 + }, + { + "epoch": 0.834691630485477, + "grad_norm": 1680.3512826887784, + "learning_rate": 1.3445816594148654e-06, + "loss": 231.3844, + "step": 43400 + }, + { + "epoch": 0.8348839557459575, + "grad_norm": 1737.9875027238609, + "learning_rate": 1.3415265942107925e-06, + "loss": 227.8542, + "step": 43410 + }, + { + "epoch": 0.835076281006438, + "grad_norm": 1318.0451602571711, + "learning_rate": 1.3384747541450615e-06, + "loss": 216.5979, + "step": 43420 + }, + { + "epoch": 0.8352686062669186, + "grad_norm": 1640.0894669784952, + "learning_rate": 1.3354261403544345e-06, + "loss": 224.2853, + "step": 43430 + }, + { + "epoch": 0.8354609315273991, + "grad_norm": 1575.223275261, + "learning_rate": 1.3323807539744726e-06, + "loss": 232.1573, + "step": 43440 + }, + { + "epoch": 0.8356532567878797, + "grad_norm": 1544.904791717601, + "learning_rate": 1.32933859613953e-06, + "loss": 224.5349, + "step": 43450 + }, + { + "epoch": 0.8358455820483602, + "grad_norm": 1606.8584481255227, + "learning_rate": 1.3262996679827567e-06, + "loss": 225.4846, + "step": 43460 + }, + { + "epoch": 0.8360379073088408, + "grad_norm": 1495.2814562081387, + "learning_rate": 1.3232639706361083e-06, + "loss": 224.0928, + "step": 43470 + }, + { + "epoch": 0.8362302325693213, + "grad_norm": 1749.0174161938844, + "learning_rate": 1.3202315052303304e-06, + "loss": 214.18, + "step": 43480 + }, + { + "epoch": 0.8364225578298018, + "grad_norm": 1457.572530900232, + "learning_rate": 1.3172022728949651e-06, + "loss": 223.0677, + "step": 43490 + }, + { + "epoch": 0.8366148830902823, + "grad_norm": 1659.302804104909, + "learning_rate": 1.3141762747583498e-06, + "loss": 216.7371, + "step": 43500 + }, + { + "epoch": 0.8368072083507628, + "grad_norm": 1420.9136059960424, + "learning_rate": 1.3111535119476237e-06, + "loss": 223.184, + "step": 43510 + }, + { + "epoch": 0.8369995336112434, + "grad_norm": 1676.0494682498827, + "learning_rate": 1.3081339855887133e-06, + "loss": 229.2944, + "step": 43520 + }, + { + "epoch": 0.8371918588717239, + "grad_norm": 1477.8419388152613, + "learning_rate": 1.3051176968063407e-06, + "loss": 226.9572, + "step": 43530 + }, + { + "epoch": 0.8373841841322044, + "grad_norm": 1455.6740985715503, + "learning_rate": 1.302104646724026e-06, + "loss": 222.4934, + "step": 43540 + }, + { + "epoch": 0.8375765093926849, + "grad_norm": 1473.3636505248817, + "learning_rate": 1.299094836464081e-06, + "loss": 225.0963, + "step": 43550 + }, + { + "epoch": 0.8377688346531654, + "grad_norm": 1497.7140480845171, + "learning_rate": 1.2960882671476062e-06, + "loss": 220.6281, + "step": 43560 + }, + { + "epoch": 0.837961159913646, + "grad_norm": 1291.8235591520413, + "learning_rate": 1.2930849398945033e-06, + "loss": 219.9122, + "step": 43570 + }, + { + "epoch": 0.8381534851741265, + "grad_norm": 1472.0839395415944, + "learning_rate": 1.2900848558234625e-06, + "loss": 224.1345, + "step": 43580 + }, + { + "epoch": 0.838345810434607, + "grad_norm": 1393.9235174101336, + "learning_rate": 1.2870880160519628e-06, + "loss": 223.6116, + "step": 43590 + }, + { + "epoch": 0.8385381356950875, + "grad_norm": 1556.480565862197, + "learning_rate": 1.2840944216962802e-06, + "loss": 226.3156, + "step": 43600 + }, + { + "epoch": 0.838730460955568, + "grad_norm": 1393.7299081988726, + "learning_rate": 1.2811040738714742e-06, + "loss": 220.957, + "step": 43610 + }, + { + "epoch": 0.8389227862160485, + "grad_norm": 1391.969938940345, + "learning_rate": 1.2781169736914067e-06, + "loss": 219.2455, + "step": 43620 + }, + { + "epoch": 0.8391151114765291, + "grad_norm": 1467.4454476909652, + "learning_rate": 1.275133122268719e-06, + "loss": 216.2487, + "step": 43630 + }, + { + "epoch": 0.8393074367370096, + "grad_norm": 1434.4400913722538, + "learning_rate": 1.2721525207148456e-06, + "loss": 221.2937, + "step": 43640 + }, + { + "epoch": 0.8394997619974901, + "grad_norm": 1527.7518616830482, + "learning_rate": 1.2691751701400145e-06, + "loss": 223.8823, + "step": 43650 + }, + { + "epoch": 0.8396920872579707, + "grad_norm": 1519.8938594863816, + "learning_rate": 1.2662010716532392e-06, + "loss": 223.0174, + "step": 43660 + }, + { + "epoch": 0.8398844125184513, + "grad_norm": 1515.1833066857318, + "learning_rate": 1.2632302263623198e-06, + "loss": 217.628, + "step": 43670 + }, + { + "epoch": 0.8400767377789318, + "grad_norm": 1410.9299564052328, + "learning_rate": 1.2602626353738479e-06, + "loss": 223.9453, + "step": 43680 + }, + { + "epoch": 0.8402690630394123, + "grad_norm": 1620.277058779955, + "learning_rate": 1.2572982997932037e-06, + "loss": 227.3114, + "step": 43690 + }, + { + "epoch": 0.8404613882998928, + "grad_norm": 1533.6290807670493, + "learning_rate": 1.2543372207245508e-06, + "loss": 222.9592, + "step": 43700 + }, + { + "epoch": 0.8406537135603733, + "grad_norm": 1402.3982433221956, + "learning_rate": 1.2513793992708467e-06, + "loss": 226.6808, + "step": 43710 + }, + { + "epoch": 0.8408460388208538, + "grad_norm": 1416.0522248472248, + "learning_rate": 1.2484248365338248e-06, + "loss": 222.8583, + "step": 43720 + }, + { + "epoch": 0.8410383640813344, + "grad_norm": 1411.279093768394, + "learning_rate": 1.2454735336140167e-06, + "loss": 224.7821, + "step": 43730 + }, + { + "epoch": 0.8412306893418149, + "grad_norm": 1430.420101239886, + "learning_rate": 1.2425254916107321e-06, + "loss": 222.7541, + "step": 43740 + }, + { + "epoch": 0.8414230146022954, + "grad_norm": 1434.1488896863368, + "learning_rate": 1.2395807116220648e-06, + "loss": 225.4853, + "step": 43750 + }, + { + "epoch": 0.8416153398627759, + "grad_norm": 1496.9988491196166, + "learning_rate": 1.236639194744902e-06, + "loss": 227.2553, + "step": 43760 + }, + { + "epoch": 0.8418076651232564, + "grad_norm": 1587.7390295015002, + "learning_rate": 1.233700942074907e-06, + "loss": 222.028, + "step": 43770 + }, + { + "epoch": 0.841999990383737, + "grad_norm": 1231.4497036693028, + "learning_rate": 1.230765954706531e-06, + "loss": 219.2602, + "step": 43780 + }, + { + "epoch": 0.8421923156442175, + "grad_norm": 1630.3885380414208, + "learning_rate": 1.227834233733005e-06, + "loss": 226.5649, + "step": 43790 + }, + { + "epoch": 0.842384640904698, + "grad_norm": 1494.4716992968256, + "learning_rate": 1.2249057802463527e-06, + "loss": 222.6803, + "step": 43800 + }, + { + "epoch": 0.8425769661651785, + "grad_norm": 1302.6174037147368, + "learning_rate": 1.221980595337372e-06, + "loss": 222.1433, + "step": 43810 + }, + { + "epoch": 0.842769291425659, + "grad_norm": 1445.7922549214225, + "learning_rate": 1.219058680095644e-06, + "loss": 224.5352, + "step": 43820 + }, + { + "epoch": 0.8429616166861396, + "grad_norm": 1415.5083646614253, + "learning_rate": 1.2161400356095376e-06, + "loss": 216.6406, + "step": 43830 + }, + { + "epoch": 0.8431539419466201, + "grad_norm": 1361.6642000422942, + "learning_rate": 1.2132246629661948e-06, + "loss": 221.9346, + "step": 43840 + }, + { + "epoch": 0.8433462672071006, + "grad_norm": 1357.4578283708697, + "learning_rate": 1.21031256325155e-06, + "loss": 222.678, + "step": 43850 + }, + { + "epoch": 0.8435385924675812, + "grad_norm": 1920.1957526131248, + "learning_rate": 1.2074037375503056e-06, + "loss": 214.6385, + "step": 43860 + }, + { + "epoch": 0.8437309177280617, + "grad_norm": 1392.2492299323367, + "learning_rate": 1.2044981869459571e-06, + "loss": 227.5373, + "step": 43870 + }, + { + "epoch": 0.8439232429885423, + "grad_norm": 1423.496928685203, + "learning_rate": 1.201595912520771e-06, + "loss": 225.3438, + "step": 43880 + }, + { + "epoch": 0.8441155682490228, + "grad_norm": 1565.7378212014708, + "learning_rate": 1.198696915355796e-06, + "loss": 232.4191, + "step": 43890 + }, + { + "epoch": 0.8443078935095033, + "grad_norm": 1586.833360537692, + "learning_rate": 1.1958011965308624e-06, + "loss": 228.0277, + "step": 43900 + }, + { + "epoch": 0.8445002187699838, + "grad_norm": 1599.4084633087273, + "learning_rate": 1.192908757124578e-06, + "loss": 224.7376, + "step": 43910 + }, + { + "epoch": 0.8446925440304643, + "grad_norm": 1355.9518693533403, + "learning_rate": 1.190019598214327e-06, + "loss": 224.9384, + "step": 43920 + }, + { + "epoch": 0.8448848692909449, + "grad_norm": 1757.2523577222307, + "learning_rate": 1.1871337208762723e-06, + "loss": 218.5743, + "step": 43930 + }, + { + "epoch": 0.8450771945514254, + "grad_norm": 1610.8442993721146, + "learning_rate": 1.1842511261853596e-06, + "loss": 228.0625, + "step": 43940 + }, + { + "epoch": 0.8452695198119059, + "grad_norm": 1406.8465327967401, + "learning_rate": 1.1813718152153054e-06, + "loss": 219.3295, + "step": 43950 + }, + { + "epoch": 0.8454618450723864, + "grad_norm": 1600.0839402379015, + "learning_rate": 1.1784957890386051e-06, + "loss": 219.9695, + "step": 43960 + }, + { + "epoch": 0.8456541703328669, + "grad_norm": 1600.5809616542167, + "learning_rate": 1.1756230487265296e-06, + "loss": 220.5824, + "step": 43970 + }, + { + "epoch": 0.8458464955933475, + "grad_norm": 1487.417388175908, + "learning_rate": 1.1727535953491308e-06, + "loss": 221.6486, + "step": 43980 + }, + { + "epoch": 0.846038820853828, + "grad_norm": 1334.7323064862499, + "learning_rate": 1.1698874299752293e-06, + "loss": 216.1754, + "step": 43990 + }, + { + "epoch": 0.8462311461143085, + "grad_norm": 1419.6007140269585, + "learning_rate": 1.1670245536724267e-06, + "loss": 223.4285, + "step": 44000 + }, + { + "epoch": 0.846423471374789, + "grad_norm": 1431.1831841195954, + "learning_rate": 1.1641649675070975e-06, + "loss": 224.9286, + "step": 44010 + }, + { + "epoch": 0.8466157966352695, + "grad_norm": 1701.9355507971266, + "learning_rate": 1.161308672544389e-06, + "loss": 237.1768, + "step": 44020 + }, + { + "epoch": 0.84680812189575, + "grad_norm": 1536.6224091129468, + "learning_rate": 1.1584556698482252e-06, + "loss": 220.6659, + "step": 44030 + }, + { + "epoch": 0.8470004471562306, + "grad_norm": 1439.5508660709074, + "learning_rate": 1.1556059604812985e-06, + "loss": 220.4054, + "step": 44040 + }, + { + "epoch": 0.8471927724167111, + "grad_norm": 1449.787752541528, + "learning_rate": 1.1527595455050844e-06, + "loss": 223.6956, + "step": 44050 + }, + { + "epoch": 0.8473850976771916, + "grad_norm": 1941.3541577436552, + "learning_rate": 1.1499164259798223e-06, + "loss": 223.6243, + "step": 44060 + }, + { + "epoch": 0.8475774229376722, + "grad_norm": 1329.2756865018018, + "learning_rate": 1.1470766029645253e-06, + "loss": 226.6673, + "step": 44070 + }, + { + "epoch": 0.8477697481981528, + "grad_norm": 1629.470305184223, + "learning_rate": 1.1442400775169849e-06, + "loss": 225.2041, + "step": 44080 + }, + { + "epoch": 0.8479620734586333, + "grad_norm": 1342.6852915218171, + "learning_rate": 1.141406850693757e-06, + "loss": 215.5427, + "step": 44090 + }, + { + "epoch": 0.8481543987191138, + "grad_norm": 1437.5793214251187, + "learning_rate": 1.1385769235501742e-06, + "loss": 226.4937, + "step": 44100 + }, + { + "epoch": 0.8483467239795943, + "grad_norm": 1616.9790363480909, + "learning_rate": 1.1357502971403335e-06, + "loss": 225.5648, + "step": 44110 + }, + { + "epoch": 0.8485390492400748, + "grad_norm": 1506.5620085123712, + "learning_rate": 1.132926972517111e-06, + "loss": 220.107, + "step": 44120 + }, + { + "epoch": 0.8487313745005554, + "grad_norm": 1889.0140679439746, + "learning_rate": 1.130106950732145e-06, + "loss": 217.7576, + "step": 44130 + }, + { + "epoch": 0.8489236997610359, + "grad_norm": 1456.0511442976265, + "learning_rate": 1.1272902328358514e-06, + "loss": 222.2518, + "step": 44140 + }, + { + "epoch": 0.8491160250215164, + "grad_norm": 1573.3631460039203, + "learning_rate": 1.1244768198774047e-06, + "loss": 229.7807, + "step": 44150 + }, + { + "epoch": 0.8493083502819969, + "grad_norm": 1680.161434159948, + "learning_rate": 1.121666712904762e-06, + "loss": 240.4775, + "step": 44160 + }, + { + "epoch": 0.8495006755424774, + "grad_norm": 1376.2295667532603, + "learning_rate": 1.1188599129646382e-06, + "loss": 222.535, + "step": 44170 + }, + { + "epoch": 0.849693000802958, + "grad_norm": 1611.2210517756535, + "learning_rate": 1.116056421102517e-06, + "loss": 230.8724, + "step": 44180 + }, + { + "epoch": 0.8498853260634385, + "grad_norm": 1445.7080513888532, + "learning_rate": 1.113256238362659e-06, + "loss": 227.2497, + "step": 44190 + }, + { + "epoch": 0.850077651323919, + "grad_norm": 1727.3940019080158, + "learning_rate": 1.1104593657880812e-06, + "loss": 221.5471, + "step": 44200 + }, + { + "epoch": 0.8502699765843995, + "grad_norm": 1576.8126251934868, + "learning_rate": 1.1076658044205746e-06, + "loss": 229.829, + "step": 44210 + }, + { + "epoch": 0.85046230184488, + "grad_norm": 1510.919988458017, + "learning_rate": 1.1048755553006928e-06, + "loss": 227.9921, + "step": 44220 + }, + { + "epoch": 0.8506546271053606, + "grad_norm": 1407.745251374931, + "learning_rate": 1.1020886194677605e-06, + "loss": 217.1494, + "step": 44230 + }, + { + "epoch": 0.8508469523658411, + "grad_norm": 1453.471618245384, + "learning_rate": 1.0993049979598635e-06, + "loss": 224.0534, + "step": 44240 + }, + { + "epoch": 0.8510392776263216, + "grad_norm": 1558.454782872489, + "learning_rate": 1.0965246918138529e-06, + "loss": 230.5848, + "step": 44250 + }, + { + "epoch": 0.8512316028868021, + "grad_norm": 1364.2571575929665, + "learning_rate": 1.093747702065351e-06, + "loss": 221.4876, + "step": 44260 + }, + { + "epoch": 0.8514239281472827, + "grad_norm": 1425.7823028069986, + "learning_rate": 1.090974029748736e-06, + "loss": 224.7708, + "step": 44270 + }, + { + "epoch": 0.8516162534077633, + "grad_norm": 1518.0736735190394, + "learning_rate": 1.0882036758971592e-06, + "loss": 222.334, + "step": 44280 + }, + { + "epoch": 0.8518085786682438, + "grad_norm": 1500.5306460131499, + "learning_rate": 1.0854366415425289e-06, + "loss": 231.6998, + "step": 44290 + }, + { + "epoch": 0.8520009039287243, + "grad_norm": 1259.386747688931, + "learning_rate": 1.0826729277155224e-06, + "loss": 222.8064, + "step": 44300 + }, + { + "epoch": 0.8521932291892048, + "grad_norm": 1519.9163576944618, + "learning_rate": 1.0799125354455752e-06, + "loss": 231.1701, + "step": 44310 + }, + { + "epoch": 0.8523855544496853, + "grad_norm": 1568.6990583958495, + "learning_rate": 1.0771554657608896e-06, + "loss": 222.5424, + "step": 44320 + }, + { + "epoch": 0.8525778797101659, + "grad_norm": 1632.8774978537, + "learning_rate": 1.0744017196884248e-06, + "loss": 221.433, + "step": 44330 + }, + { + "epoch": 0.8527702049706464, + "grad_norm": 1490.702483080145, + "learning_rate": 1.0716512982539106e-06, + "loss": 218.7103, + "step": 44340 + }, + { + "epoch": 0.8529625302311269, + "grad_norm": 1454.0263266258567, + "learning_rate": 1.0689042024818307e-06, + "loss": 221.9324, + "step": 44350 + }, + { + "epoch": 0.8531548554916074, + "grad_norm": 1826.897269366102, + "learning_rate": 1.0661604333954312e-06, + "loss": 228.8074, + "step": 44360 + }, + { + "epoch": 0.8533471807520879, + "grad_norm": 1324.664578344857, + "learning_rate": 1.0634199920167255e-06, + "loss": 218.7342, + "step": 44370 + }, + { + "epoch": 0.8535395060125684, + "grad_norm": 1476.943747636655, + "learning_rate": 1.0606828793664804e-06, + "loss": 221.6734, + "step": 44380 + }, + { + "epoch": 0.853731831273049, + "grad_norm": 1540.1715394889052, + "learning_rate": 1.0579490964642247e-06, + "loss": 224.3827, + "step": 44390 + }, + { + "epoch": 0.8539241565335295, + "grad_norm": 1664.3088120174123, + "learning_rate": 1.0552186443282464e-06, + "loss": 223.0133, + "step": 44400 + }, + { + "epoch": 0.85411648179401, + "grad_norm": 1376.1979941534355, + "learning_rate": 1.0524915239755939e-06, + "loss": 217.2271, + "step": 44410 + }, + { + "epoch": 0.8543088070544905, + "grad_norm": 1730.3667765054035, + "learning_rate": 1.0497677364220792e-06, + "loss": 222.5058, + "step": 44420 + }, + { + "epoch": 0.854501132314971, + "grad_norm": 1318.9617982524283, + "learning_rate": 1.047047282682262e-06, + "loss": 226.7995, + "step": 44430 + }, + { + "epoch": 0.8546934575754516, + "grad_norm": 1494.160955604607, + "learning_rate": 1.0443301637694713e-06, + "loss": 229.824, + "step": 44440 + }, + { + "epoch": 0.8548857828359321, + "grad_norm": 1616.6699796428238, + "learning_rate": 1.0416163806957857e-06, + "loss": 213.0148, + "step": 44450 + }, + { + "epoch": 0.8550781080964126, + "grad_norm": 1439.2446420159963, + "learning_rate": 1.0389059344720475e-06, + "loss": 222.4147, + "step": 44460 + }, + { + "epoch": 0.8552704333568931, + "grad_norm": 1338.2216121922509, + "learning_rate": 1.0361988261078482e-06, + "loss": 218.9765, + "step": 44470 + }, + { + "epoch": 0.8554627586173738, + "grad_norm": 1446.8531489569643, + "learning_rate": 1.0334950566115466e-06, + "loss": 216.0462, + "step": 44480 + }, + { + "epoch": 0.8556550838778543, + "grad_norm": 1478.387854339477, + "learning_rate": 1.0307946269902492e-06, + "loss": 223.0573, + "step": 44490 + }, + { + "epoch": 0.8558474091383348, + "grad_norm": 1408.6418333847616, + "learning_rate": 1.0280975382498225e-06, + "loss": 223.774, + "step": 44500 + }, + { + "epoch": 0.8560397343988153, + "grad_norm": 1431.47655305968, + "learning_rate": 1.0254037913948845e-06, + "loss": 211.6654, + "step": 44510 + }, + { + "epoch": 0.8562320596592958, + "grad_norm": 1550.89382220905, + "learning_rate": 1.0227133874288152e-06, + "loss": 227.8492, + "step": 44520 + }, + { + "epoch": 0.8564243849197763, + "grad_norm": 1383.1472593550031, + "learning_rate": 1.0200263273537458e-06, + "loss": 222.0676, + "step": 44530 + }, + { + "epoch": 0.8566167101802569, + "grad_norm": 1663.3412184091596, + "learning_rate": 1.0173426121705577e-06, + "loss": 228.0787, + "step": 44540 + }, + { + "epoch": 0.8568090354407374, + "grad_norm": 1449.0111952355635, + "learning_rate": 1.0146622428788943e-06, + "loss": 223.2425, + "step": 44550 + }, + { + "epoch": 0.8570013607012179, + "grad_norm": 1623.2614043960539, + "learning_rate": 1.0119852204771463e-06, + "loss": 230.3152, + "step": 44560 + }, + { + "epoch": 0.8571936859616984, + "grad_norm": 1375.9670550186127, + "learning_rate": 1.0093115459624637e-06, + "loss": 220.7529, + "step": 44570 + }, + { + "epoch": 0.857386011222179, + "grad_norm": 1395.1593052107223, + "learning_rate": 1.0066412203307419e-06, + "loss": 227.525, + "step": 44580 + }, + { + "epoch": 0.8575783364826595, + "grad_norm": 1527.8896744503206, + "learning_rate": 1.0039742445766376e-06, + "loss": 220.6923, + "step": 44590 + }, + { + "epoch": 0.85777066174314, + "grad_norm": 1505.368331046335, + "learning_rate": 1.0013106196935528e-06, + "loss": 226.6611, + "step": 44600 + }, + { + "epoch": 0.8579629870036205, + "grad_norm": 1602.0880846873388, + "learning_rate": 9.986503466736419e-07, + "loss": 218.8952, + "step": 44610 + }, + { + "epoch": 0.858155312264101, + "grad_norm": 1471.3230673162595, + "learning_rate": 9.959934265078176e-07, + "loss": 230.1545, + "step": 44620 + }, + { + "epoch": 0.8583476375245815, + "grad_norm": 1905.7754469448398, + "learning_rate": 9.933398601857347e-07, + "loss": 219.7658, + "step": 44630 + }, + { + "epoch": 0.8585399627850621, + "grad_norm": 1416.584642647331, + "learning_rate": 9.90689648695804e-07, + "loss": 219.2939, + "step": 44640 + }, + { + "epoch": 0.8587322880455426, + "grad_norm": 1490.6522672373076, + "learning_rate": 9.880427930251834e-07, + "loss": 219.2778, + "step": 44650 + }, + { + "epoch": 0.8589246133060231, + "grad_norm": 1387.4361209300773, + "learning_rate": 9.853992941597878e-07, + "loss": 219.1722, + "step": 44660 + }, + { + "epoch": 0.8591169385665036, + "grad_norm": 1330.4902070161509, + "learning_rate": 9.827591530842729e-07, + "loss": 217.3535, + "step": 44670 + }, + { + "epoch": 0.8593092638269842, + "grad_norm": 1289.5976318761966, + "learning_rate": 9.801223707820484e-07, + "loss": 218.8888, + "step": 44680 + }, + { + "epoch": 0.8595015890874648, + "grad_norm": 1396.0786162588342, + "learning_rate": 9.774889482352735e-07, + "loss": 216.9593, + "step": 44690 + }, + { + "epoch": 0.8596939143479453, + "grad_norm": 1314.511252984968, + "learning_rate": 9.74858886424852e-07, + "loss": 218.7266, + "step": 44700 + }, + { + "epoch": 0.8598862396084258, + "grad_norm": 1467.4830446146557, + "learning_rate": 9.722321863304418e-07, + "loss": 224.7169, + "step": 44710 + }, + { + "epoch": 0.8600785648689063, + "grad_norm": 1651.1710817944315, + "learning_rate": 9.696088489304412e-07, + "loss": 225.7997, + "step": 44720 + }, + { + "epoch": 0.8602708901293868, + "grad_norm": 1472.1879795849393, + "learning_rate": 9.669888752020061e-07, + "loss": 221.0106, + "step": 44730 + }, + { + "epoch": 0.8604632153898674, + "grad_norm": 1379.900325401597, + "learning_rate": 9.643722661210285e-07, + "loss": 213.6883, + "step": 44740 + }, + { + "epoch": 0.8606555406503479, + "grad_norm": 1403.5933765281186, + "learning_rate": 9.617590226621543e-07, + "loss": 218.9994, + "step": 44750 + }, + { + "epoch": 0.8608478659108284, + "grad_norm": 1375.9082875043493, + "learning_rate": 9.5914914579877e-07, + "loss": 216.9004, + "step": 44760 + }, + { + "epoch": 0.8610401911713089, + "grad_norm": 1412.0687070540819, + "learning_rate": 9.565426365030172e-07, + "loss": 222.487, + "step": 44770 + }, + { + "epoch": 0.8612325164317894, + "grad_norm": 1341.036516872241, + "learning_rate": 9.539394957457737e-07, + "loss": 229.0115, + "step": 44780 + }, + { + "epoch": 0.86142484169227, + "grad_norm": 1292.2400960749824, + "learning_rate": 9.51339724496666e-07, + "loss": 220.5234, + "step": 44790 + }, + { + "epoch": 0.8616171669527505, + "grad_norm": 1258.388564414747, + "learning_rate": 9.487433237240695e-07, + "loss": 216.5741, + "step": 44800 + }, + { + "epoch": 0.861809492213231, + "grad_norm": 1546.7115017649805, + "learning_rate": 9.461502943950973e-07, + "loss": 227.9115, + "step": 44810 + }, + { + "epoch": 0.8620018174737115, + "grad_norm": 1233.3411836550538, + "learning_rate": 9.435606374756123e-07, + "loss": 213.2369, + "step": 44820 + }, + { + "epoch": 0.862194142734192, + "grad_norm": 1451.3451103240286, + "learning_rate": 9.409743539302152e-07, + "loss": 214.0367, + "step": 44830 + }, + { + "epoch": 0.8623864679946726, + "grad_norm": 1606.773925143552, + "learning_rate": 9.383914447222576e-07, + "loss": 222.4851, + "step": 44840 + }, + { + "epoch": 0.8625787932551531, + "grad_norm": 1467.028494450233, + "learning_rate": 9.358119108138309e-07, + "loss": 223.8904, + "step": 44850 + }, + { + "epoch": 0.8627711185156336, + "grad_norm": 1395.128933470903, + "learning_rate": 9.332357531657644e-07, + "loss": 214.0299, + "step": 44860 + }, + { + "epoch": 0.8629634437761141, + "grad_norm": 1427.7804928539213, + "learning_rate": 9.306629727376404e-07, + "loss": 220.1048, + "step": 44870 + }, + { + "epoch": 0.8631557690365947, + "grad_norm": 1489.3891766195459, + "learning_rate": 9.280935704877736e-07, + "loss": 216.5844, + "step": 44880 + }, + { + "epoch": 0.8633480942970753, + "grad_norm": 1596.6850039793194, + "learning_rate": 9.255275473732239e-07, + "loss": 223.8956, + "step": 44890 + }, + { + "epoch": 0.8635404195575558, + "grad_norm": 1552.2301596822097, + "learning_rate": 9.229649043497924e-07, + "loss": 223.7186, + "step": 44900 + }, + { + "epoch": 0.8637327448180363, + "grad_norm": 1658.1151447156915, + "learning_rate": 9.20405642372022e-07, + "loss": 226.0265, + "step": 44910 + }, + { + "epoch": 0.8639250700785168, + "grad_norm": 1654.2692392789286, + "learning_rate": 9.178497623931959e-07, + "loss": 222.4252, + "step": 44920 + }, + { + "epoch": 0.8641173953389973, + "grad_norm": 1458.4345958966583, + "learning_rate": 9.152972653653369e-07, + "loss": 218.5969, + "step": 44930 + }, + { + "epoch": 0.8643097205994779, + "grad_norm": 1486.6553104634793, + "learning_rate": 9.127481522392068e-07, + "loss": 222.7092, + "step": 44940 + }, + { + "epoch": 0.8645020458599584, + "grad_norm": 1472.2827187767418, + "learning_rate": 9.102024239643092e-07, + "loss": 220.9687, + "step": 44950 + }, + { + "epoch": 0.8646943711204389, + "grad_norm": 1632.8645940264787, + "learning_rate": 9.076600814888869e-07, + "loss": 223.1505, + "step": 44960 + }, + { + "epoch": 0.8648866963809194, + "grad_norm": 1430.1459777674636, + "learning_rate": 9.051211257599169e-07, + "loss": 223.5466, + "step": 44970 + }, + { + "epoch": 0.8650790216413999, + "grad_norm": 1289.12944328112, + "learning_rate": 9.025855577231224e-07, + "loss": 223.2009, + "step": 44980 + }, + { + "epoch": 0.8652713469018805, + "grad_norm": 1509.108472176515, + "learning_rate": 9.000533783229581e-07, + "loss": 224.9003, + "step": 44990 + }, + { + "epoch": 0.865463672162361, + "grad_norm": 1355.3375270870872, + "learning_rate": 8.975245885026207e-07, + "loss": 226.8134, + "step": 45000 + }, + { + "epoch": 0.8656559974228415, + "grad_norm": 1628.4568008727547, + "learning_rate": 8.949991892040399e-07, + "loss": 224.4073, + "step": 45010 + }, + { + "epoch": 0.865848322683322, + "grad_norm": 1581.0882427538825, + "learning_rate": 8.92477181367889e-07, + "loss": 218.0373, + "step": 45020 + }, + { + "epoch": 0.8660406479438025, + "grad_norm": 1430.9257248548472, + "learning_rate": 8.899585659335719e-07, + "loss": 219.8096, + "step": 45030 + }, + { + "epoch": 0.866232973204283, + "grad_norm": 1634.061489982721, + "learning_rate": 8.874433438392305e-07, + "loss": 237.2581, + "step": 45040 + }, + { + "epoch": 0.8664252984647636, + "grad_norm": 1455.2076029075677, + "learning_rate": 8.849315160217465e-07, + "loss": 222.0419, + "step": 45050 + }, + { + "epoch": 0.8666176237252441, + "grad_norm": 1568.5322390886718, + "learning_rate": 8.824230834167325e-07, + "loss": 221.4173, + "step": 45060 + }, + { + "epoch": 0.8668099489857246, + "grad_norm": 1378.8242173660622, + "learning_rate": 8.799180469585378e-07, + "loss": 233.1714, + "step": 45070 + }, + { + "epoch": 0.8670022742462051, + "grad_norm": 1381.1798836559813, + "learning_rate": 8.77416407580246e-07, + "loss": 220.9373, + "step": 45080 + }, + { + "epoch": 0.8671945995066858, + "grad_norm": 1524.988751031165, + "learning_rate": 8.749181662136785e-07, + "loss": 227.4413, + "step": 45090 + }, + { + "epoch": 0.8673869247671663, + "grad_norm": 1581.5874654034042, + "learning_rate": 8.724233237893897e-07, + "loss": 219.9931, + "step": 45100 + }, + { + "epoch": 0.8675792500276468, + "grad_norm": 1483.5962658636972, + "learning_rate": 8.699318812366641e-07, + "loss": 223.0417, + "step": 45110 + }, + { + "epoch": 0.8677715752881273, + "grad_norm": 1390.2358468890366, + "learning_rate": 8.67443839483526e-07, + "loss": 226.2141, + "step": 45120 + }, + { + "epoch": 0.8679639005486078, + "grad_norm": 1369.8699902251954, + "learning_rate": 8.649591994567275e-07, + "loss": 219.5672, + "step": 45130 + }, + { + "epoch": 0.8681562258090884, + "grad_norm": 1333.5229238336788, + "learning_rate": 8.62477962081758e-07, + "loss": 221.5544, + "step": 45140 + }, + { + "epoch": 0.8683485510695689, + "grad_norm": 1616.3378021343551, + "learning_rate": 8.600001282828341e-07, + "loss": 229.0536, + "step": 45150 + }, + { + "epoch": 0.8685408763300494, + "grad_norm": 1475.6091182030611, + "learning_rate": 8.57525698982914e-07, + "loss": 224.3053, + "step": 45160 + }, + { + "epoch": 0.8687332015905299, + "grad_norm": 1461.103468956903, + "learning_rate": 8.550546751036759e-07, + "loss": 227.872, + "step": 45170 + }, + { + "epoch": 0.8689255268510104, + "grad_norm": 1622.1520427962919, + "learning_rate": 8.525870575655393e-07, + "loss": 221.3598, + "step": 45180 + }, + { + "epoch": 0.869117852111491, + "grad_norm": 1466.3661645519253, + "learning_rate": 8.501228472876466e-07, + "loss": 225.4994, + "step": 45190 + }, + { + "epoch": 0.8693101773719715, + "grad_norm": 1515.9959590644955, + "learning_rate": 8.476620451878803e-07, + "loss": 219.3026, + "step": 45200 + }, + { + "epoch": 0.869502502632452, + "grad_norm": 1418.7419482863531, + "learning_rate": 8.45204652182846e-07, + "loss": 224.0093, + "step": 45210 + }, + { + "epoch": 0.8696948278929325, + "grad_norm": 1488.427391426985, + "learning_rate": 8.427506691878806e-07, + "loss": 223.803, + "step": 45220 + }, + { + "epoch": 0.869887153153413, + "grad_norm": 2231.450311309086, + "learning_rate": 8.403000971170561e-07, + "loss": 218.2955, + "step": 45230 + }, + { + "epoch": 0.8700794784138935, + "grad_norm": 1375.2335577883996, + "learning_rate": 8.378529368831667e-07, + "loss": 221.5526, + "step": 45240 + }, + { + "epoch": 0.8702718036743741, + "grad_norm": 1424.9905254612718, + "learning_rate": 8.354091893977401e-07, + "loss": 218.7038, + "step": 45250 + }, + { + "epoch": 0.8704641289348546, + "grad_norm": 1247.697137215622, + "learning_rate": 8.329688555710336e-07, + "loss": 221.3929, + "step": 45260 + }, + { + "epoch": 0.8706564541953351, + "grad_norm": 1351.9565887296799, + "learning_rate": 8.305319363120279e-07, + "loss": 226.2381, + "step": 45270 + }, + { + "epoch": 0.8708487794558156, + "grad_norm": 1512.986668930324, + "learning_rate": 8.280984325284392e-07, + "loss": 220.8623, + "step": 45280 + }, + { + "epoch": 0.8710411047162963, + "grad_norm": 1421.0483552375156, + "learning_rate": 8.256683451267044e-07, + "loss": 218.3972, + "step": 45290 + }, + { + "epoch": 0.8712334299767768, + "grad_norm": 1622.1881608777096, + "learning_rate": 8.232416750119921e-07, + "loss": 220.7554, + "step": 45300 + }, + { + "epoch": 0.8714257552372573, + "grad_norm": 2046.142356125899, + "learning_rate": 8.208184230881966e-07, + "loss": 220.5351, + "step": 45310 + }, + { + "epoch": 0.8716180804977378, + "grad_norm": 1521.4251426531416, + "learning_rate": 8.183985902579405e-07, + "loss": 218.0601, + "step": 45320 + }, + { + "epoch": 0.8718104057582183, + "grad_norm": 1304.96593477745, + "learning_rate": 8.159821774225685e-07, + "loss": 213.2861, + "step": 45330 + }, + { + "epoch": 0.8720027310186989, + "grad_norm": 1541.0009248092513, + "learning_rate": 8.13569185482157e-07, + "loss": 217.9085, + "step": 45340 + }, + { + "epoch": 0.8721950562791794, + "grad_norm": 1342.556267511391, + "learning_rate": 8.111596153355061e-07, + "loss": 214.8615, + "step": 45350 + }, + { + "epoch": 0.8723873815396599, + "grad_norm": 1433.9957221971968, + "learning_rate": 8.08753467880139e-07, + "loss": 223.0772, + "step": 45360 + }, + { + "epoch": 0.8725797068001404, + "grad_norm": 1404.9413843291154, + "learning_rate": 8.063507440123052e-07, + "loss": 216.1271, + "step": 45370 + }, + { + "epoch": 0.8727720320606209, + "grad_norm": 1585.5505368463832, + "learning_rate": 8.039514446269836e-07, + "loss": 225.7973, + "step": 45380 + }, + { + "epoch": 0.8729643573211014, + "grad_norm": 1503.9200999930997, + "learning_rate": 8.015555706178702e-07, + "loss": 220.2815, + "step": 45390 + }, + { + "epoch": 0.873156682581582, + "grad_norm": 1564.5730298590745, + "learning_rate": 7.991631228773889e-07, + "loss": 224.5119, + "step": 45400 + }, + { + "epoch": 0.8733490078420625, + "grad_norm": 1470.1508064096213, + "learning_rate": 7.967741022966857e-07, + "loss": 216.5776, + "step": 45410 + }, + { + "epoch": 0.873541333102543, + "grad_norm": 1455.5821755407633, + "learning_rate": 7.943885097656356e-07, + "loss": 214.1013, + "step": 45420 + }, + { + "epoch": 0.8737336583630235, + "grad_norm": 1414.6460342623484, + "learning_rate": 7.920063461728311e-07, + "loss": 217.3768, + "step": 45430 + }, + { + "epoch": 0.873925983623504, + "grad_norm": 1421.9858631033972, + "learning_rate": 7.896276124055846e-07, + "loss": 220.0106, + "step": 45440 + }, + { + "epoch": 0.8741183088839846, + "grad_norm": 1500.0846151527558, + "learning_rate": 7.872523093499396e-07, + "loss": 223.4208, + "step": 45450 + }, + { + "epoch": 0.8743106341444651, + "grad_norm": 1495.365602861149, + "learning_rate": 7.848804378906561e-07, + "loss": 216.5039, + "step": 45460 + }, + { + "epoch": 0.8745029594049456, + "grad_norm": 1285.9684832354624, + "learning_rate": 7.825119989112173e-07, + "loss": 213.8228, + "step": 45470 + }, + { + "epoch": 0.8746952846654261, + "grad_norm": 1457.220260123584, + "learning_rate": 7.801469932938255e-07, + "loss": 222.097, + "step": 45480 + }, + { + "epoch": 0.8748876099259066, + "grad_norm": 1345.7025421902993, + "learning_rate": 7.777854219194092e-07, + "loss": 215.61, + "step": 45490 + }, + { + "epoch": 0.8750799351863873, + "grad_norm": 1358.6142492246643, + "learning_rate": 7.754272856676126e-07, + "loss": 223.7884, + "step": 45500 + }, + { + "epoch": 0.8752722604468678, + "grad_norm": 1661.8384848373844, + "learning_rate": 7.73072585416802e-07, + "loss": 225.425, + "step": 45510 + }, + { + "epoch": 0.8754645857073483, + "grad_norm": 1437.152989007928, + "learning_rate": 7.707213220440679e-07, + "loss": 214.1261, + "step": 45520 + }, + { + "epoch": 0.8756569109678288, + "grad_norm": 1391.568431687959, + "learning_rate": 7.683734964252143e-07, + "loss": 219.6732, + "step": 45530 + }, + { + "epoch": 0.8758492362283093, + "grad_norm": 1767.2067400187598, + "learning_rate": 7.66029109434766e-07, + "loss": 223.9062, + "step": 45540 + }, + { + "epoch": 0.8760415614887899, + "grad_norm": 1676.618011885755, + "learning_rate": 7.636881619459724e-07, + "loss": 220.9183, + "step": 45550 + }, + { + "epoch": 0.8762338867492704, + "grad_norm": 1699.694816764324, + "learning_rate": 7.613506548307936e-07, + "loss": 222.0517, + "step": 45560 + }, + { + "epoch": 0.8764262120097509, + "grad_norm": 1458.4701739727388, + "learning_rate": 7.590165889599166e-07, + "loss": 219.5811, + "step": 45570 + }, + { + "epoch": 0.8766185372702314, + "grad_norm": 1284.517256290324, + "learning_rate": 7.566859652027381e-07, + "loss": 217.8593, + "step": 45580 + }, + { + "epoch": 0.8768108625307119, + "grad_norm": 1589.057420851381, + "learning_rate": 7.543587844273814e-07, + "loss": 221.2403, + "step": 45590 + }, + { + "epoch": 0.8770031877911925, + "grad_norm": 1435.0343680908527, + "learning_rate": 7.52035047500681e-07, + "loss": 215.9271, + "step": 45600 + }, + { + "epoch": 0.877195513051673, + "grad_norm": 1497.3560958758344, + "learning_rate": 7.497147552881901e-07, + "loss": 222.116, + "step": 45610 + }, + { + "epoch": 0.8773878383121535, + "grad_norm": 1523.3861710197339, + "learning_rate": 7.473979086541772e-07, + "loss": 221.9458, + "step": 45620 + }, + { + "epoch": 0.877580163572634, + "grad_norm": 1440.1717469910482, + "learning_rate": 7.450845084616332e-07, + "loss": 217.7438, + "step": 45630 + }, + { + "epoch": 0.8777724888331145, + "grad_norm": 1462.3226228629226, + "learning_rate": 7.427745555722598e-07, + "loss": 227.7105, + "step": 45640 + }, + { + "epoch": 0.8779648140935951, + "grad_norm": 1560.5866363561129, + "learning_rate": 7.404680508464767e-07, + "loss": 223.216, + "step": 45650 + }, + { + "epoch": 0.8781571393540756, + "grad_norm": 1723.0172870025417, + "learning_rate": 7.381649951434167e-07, + "loss": 222.4714, + "step": 45660 + }, + { + "epoch": 0.8783494646145561, + "grad_norm": 1546.0606515429151, + "learning_rate": 7.358653893209333e-07, + "loss": 228.8557, + "step": 45670 + }, + { + "epoch": 0.8785417898750366, + "grad_norm": 1319.804459303906, + "learning_rate": 7.335692342355882e-07, + "loss": 221.1393, + "step": 45680 + }, + { + "epoch": 0.8787341151355171, + "grad_norm": 1265.532708727102, + "learning_rate": 7.312765307426662e-07, + "loss": 215.6548, + "step": 45690 + }, + { + "epoch": 0.8789264403959978, + "grad_norm": 1423.9723752623859, + "learning_rate": 7.28987279696155e-07, + "loss": 222.775, + "step": 45700 + }, + { + "epoch": 0.8791187656564783, + "grad_norm": 1382.3509709150283, + "learning_rate": 7.267014819487695e-07, + "loss": 226.3133, + "step": 45710 + }, + { + "epoch": 0.8793110909169588, + "grad_norm": 1436.0670010700708, + "learning_rate": 7.244191383519272e-07, + "loss": 219.3337, + "step": 45720 + }, + { + "epoch": 0.8795034161774393, + "grad_norm": 1577.488486576761, + "learning_rate": 7.221402497557629e-07, + "loss": 217.439, + "step": 45730 + }, + { + "epoch": 0.8796957414379198, + "grad_norm": 1461.6194812951574, + "learning_rate": 7.198648170091294e-07, + "loss": 215.2838, + "step": 45740 + }, + { + "epoch": 0.8798880666984004, + "grad_norm": 1517.99586248658, + "learning_rate": 7.175928409595844e-07, + "loss": 220.1899, + "step": 45750 + }, + { + "epoch": 0.8800803919588809, + "grad_norm": 1657.9740523783892, + "learning_rate": 7.153243224534001e-07, + "loss": 224.8396, + "step": 45760 + }, + { + "epoch": 0.8802727172193614, + "grad_norm": 1689.1605040515612, + "learning_rate": 7.130592623355659e-07, + "loss": 217.9619, + "step": 45770 + }, + { + "epoch": 0.8804650424798419, + "grad_norm": 1365.0743638550964, + "learning_rate": 7.10797661449778e-07, + "loss": 218.4567, + "step": 45780 + }, + { + "epoch": 0.8806573677403224, + "grad_norm": 1506.705052941599, + "learning_rate": 7.085395206384449e-07, + "loss": 220.4682, + "step": 45790 + }, + { + "epoch": 0.880849693000803, + "grad_norm": 1354.267316484208, + "learning_rate": 7.062848407426859e-07, + "loss": 217.3667, + "step": 45800 + }, + { + "epoch": 0.8810420182612835, + "grad_norm": 1489.8298171483382, + "learning_rate": 7.040336226023336e-07, + "loss": 215.0995, + "step": 45810 + }, + { + "epoch": 0.881234343521764, + "grad_norm": 1376.774603339723, + "learning_rate": 7.017858670559274e-07, + "loss": 222.7015, + "step": 45820 + }, + { + "epoch": 0.8814266687822445, + "grad_norm": 1580.9217395512842, + "learning_rate": 6.99541574940722e-07, + "loss": 226.3356, + "step": 45830 + }, + { + "epoch": 0.881618994042725, + "grad_norm": 1492.3541294544646, + "learning_rate": 6.973007470926774e-07, + "loss": 220.9657, + "step": 45840 + }, + { + "epoch": 0.8818113193032056, + "grad_norm": 1692.0635701177573, + "learning_rate": 6.95063384346466e-07, + "loss": 221.4249, + "step": 45850 + }, + { + "epoch": 0.8820036445636861, + "grad_norm": 1332.0832671726175, + "learning_rate": 6.92829487535468e-07, + "loss": 223.9194, + "step": 45860 + }, + { + "epoch": 0.8821959698241666, + "grad_norm": 1414.4945731135972, + "learning_rate": 6.905990574917709e-07, + "loss": 222.3355, + "step": 45870 + }, + { + "epoch": 0.8823882950846471, + "grad_norm": 1388.3547762166402, + "learning_rate": 6.88372095046177e-07, + "loss": 215.6123, + "step": 45880 + }, + { + "epoch": 0.8825806203451276, + "grad_norm": 1461.3707583105254, + "learning_rate": 6.861486010281915e-07, + "loss": 226.8827, + "step": 45890 + }, + { + "epoch": 0.8827729456056082, + "grad_norm": 1346.9723806785742, + "learning_rate": 6.839285762660275e-07, + "loss": 213.6974, + "step": 45900 + }, + { + "epoch": 0.8829652708660888, + "grad_norm": 1356.4343327787112, + "learning_rate": 6.81712021586608e-07, + "loss": 220.1619, + "step": 45910 + }, + { + "epoch": 0.8831575961265693, + "grad_norm": 1361.4909883178495, + "learning_rate": 6.794989378155659e-07, + "loss": 223.4412, + "step": 45920 + }, + { + "epoch": 0.8833499213870498, + "grad_norm": 1374.9365533472032, + "learning_rate": 6.772893257772361e-07, + "loss": 225.8385, + "step": 45930 + }, + { + "epoch": 0.8835422466475303, + "grad_norm": 1605.6222221516082, + "learning_rate": 6.750831862946605e-07, + "loss": 230.5761, + "step": 45940 + }, + { + "epoch": 0.8837345719080109, + "grad_norm": 1462.61106839348, + "learning_rate": 6.728805201895949e-07, + "loss": 225.3928, + "step": 45950 + }, + { + "epoch": 0.8839268971684914, + "grad_norm": 1541.5948137478513, + "learning_rate": 6.70681328282492e-07, + "loss": 208.7761, + "step": 45960 + }, + { + "epoch": 0.8841192224289719, + "grad_norm": 1423.7975209623678, + "learning_rate": 6.684856113925143e-07, + "loss": 221.4539, + "step": 45970 + }, + { + "epoch": 0.8843115476894524, + "grad_norm": 1520.9614802863646, + "learning_rate": 6.662933703375307e-07, + "loss": 226.6568, + "step": 45980 + }, + { + "epoch": 0.8845038729499329, + "grad_norm": 1476.3241792098945, + "learning_rate": 6.641046059341171e-07, + "loss": 213.7385, + "step": 45990 + }, + { + "epoch": 0.8846961982104135, + "grad_norm": 1618.9427451080335, + "learning_rate": 6.619193189975515e-07, + "loss": 223.3078, + "step": 46000 + }, + { + "epoch": 0.884888523470894, + "grad_norm": 1454.2732669525421, + "learning_rate": 6.597375103418135e-07, + "loss": 218.7141, + "step": 46010 + }, + { + "epoch": 0.8850808487313745, + "grad_norm": 1320.791435154261, + "learning_rate": 6.575591807795944e-07, + "loss": 212.0283, + "step": 46020 + }, + { + "epoch": 0.885273173991855, + "grad_norm": 1412.6693213232759, + "learning_rate": 6.553843311222863e-07, + "loss": 217.7653, + "step": 46030 + }, + { + "epoch": 0.8854654992523355, + "grad_norm": 1293.7639851681747, + "learning_rate": 6.532129621799832e-07, + "loss": 221.3312, + "step": 46040 + }, + { + "epoch": 0.885657824512816, + "grad_norm": 1660.7692875702126, + "learning_rate": 6.510450747614816e-07, + "loss": 226.6156, + "step": 46050 + }, + { + "epoch": 0.8858501497732966, + "grad_norm": 1443.8799605617487, + "learning_rate": 6.488806696742889e-07, + "loss": 229.9846, + "step": 46060 + }, + { + "epoch": 0.8860424750337771, + "grad_norm": 1490.2750334231596, + "learning_rate": 6.46719747724609e-07, + "loss": 233.9441, + "step": 46070 + }, + { + "epoch": 0.8862348002942576, + "grad_norm": 1428.396812622404, + "learning_rate": 6.44562309717347e-07, + "loss": 227.9069, + "step": 46080 + }, + { + "epoch": 0.8864271255547381, + "grad_norm": 1537.7132188985988, + "learning_rate": 6.424083564561134e-07, + "loss": 224.3031, + "step": 46090 + }, + { + "epoch": 0.8866194508152186, + "grad_norm": 1408.4615940504423, + "learning_rate": 6.402578887432232e-07, + "loss": 216.4268, + "step": 46100 + }, + { + "epoch": 0.8868117760756993, + "grad_norm": 1613.007077744771, + "learning_rate": 6.381109073796865e-07, + "loss": 223.7321, + "step": 46110 + }, + { + "epoch": 0.8870041013361798, + "grad_norm": 1448.9604743948537, + "learning_rate": 6.359674131652204e-07, + "loss": 215.2867, + "step": 46120 + }, + { + "epoch": 0.8871964265966603, + "grad_norm": 1595.2968969784808, + "learning_rate": 6.338274068982408e-07, + "loss": 221.4875, + "step": 46130 + }, + { + "epoch": 0.8873887518571408, + "grad_norm": 1490.5522710098103, + "learning_rate": 6.316908893758656e-07, + "loss": 219.4413, + "step": 46140 + }, + { + "epoch": 0.8875810771176214, + "grad_norm": 1480.085861014192, + "learning_rate": 6.295578613939113e-07, + "loss": 220.9146, + "step": 46150 + }, + { + "epoch": 0.8877734023781019, + "grad_norm": 1508.3910262287934, + "learning_rate": 6.274283237468948e-07, + "loss": 214.3749, + "step": 46160 + }, + { + "epoch": 0.8879657276385824, + "grad_norm": 1475.6728959557029, + "learning_rate": 6.25302277228036e-07, + "loss": 223.789, + "step": 46170 + }, + { + "epoch": 0.8881580528990629, + "grad_norm": 1611.933041951094, + "learning_rate": 6.231797226292502e-07, + "loss": 225.78, + "step": 46180 + }, + { + "epoch": 0.8883503781595434, + "grad_norm": 1483.3084428805128, + "learning_rate": 6.210606607411529e-07, + "loss": 214.7251, + "step": 46190 + }, + { + "epoch": 0.888542703420024, + "grad_norm": 1318.8166480175398, + "learning_rate": 6.189450923530627e-07, + "loss": 216.3322, + "step": 46200 + }, + { + "epoch": 0.8887350286805045, + "grad_norm": 1384.306628668819, + "learning_rate": 6.168330182529924e-07, + "loss": 225.431, + "step": 46210 + }, + { + "epoch": 0.888927353940985, + "grad_norm": 1472.0396016767743, + "learning_rate": 6.147244392276541e-07, + "loss": 220.7442, + "step": 46220 + }, + { + "epoch": 0.8891196792014655, + "grad_norm": 1280.2636634885869, + "learning_rate": 6.126193560624583e-07, + "loss": 223.7494, + "step": 46230 + }, + { + "epoch": 0.889312004461946, + "grad_norm": 1479.376389148199, + "learning_rate": 6.105177695415165e-07, + "loss": 218.8775, + "step": 46240 + }, + { + "epoch": 0.8895043297224265, + "grad_norm": 1337.081755758868, + "learning_rate": 6.084196804476317e-07, + "loss": 217.6594, + "step": 46250 + }, + { + "epoch": 0.8896966549829071, + "grad_norm": 1461.7468868415488, + "learning_rate": 6.063250895623096e-07, + "loss": 216.627, + "step": 46260 + }, + { + "epoch": 0.8898889802433876, + "grad_norm": 2250.7741556144065, + "learning_rate": 6.042339976657486e-07, + "loss": 227.4446, + "step": 46270 + }, + { + "epoch": 0.8900813055038681, + "grad_norm": 1431.2070494282514, + "learning_rate": 6.021464055368498e-07, + "loss": 220.2569, + "step": 46280 + }, + { + "epoch": 0.8902736307643486, + "grad_norm": 1326.0558774466288, + "learning_rate": 6.000623139532036e-07, + "loss": 228.0942, + "step": 46290 + }, + { + "epoch": 0.8904659560248291, + "grad_norm": 1294.28514245179, + "learning_rate": 5.979817236910979e-07, + "loss": 220.3547, + "step": 46300 + }, + { + "epoch": 0.8906582812853097, + "grad_norm": 1404.4233102421995, + "learning_rate": 5.959046355255238e-07, + "loss": 232.473, + "step": 46310 + }, + { + "epoch": 0.8908506065457903, + "grad_norm": 1458.577998790589, + "learning_rate": 5.93831050230158e-07, + "loss": 219.4227, + "step": 46320 + }, + { + "epoch": 0.8910429318062708, + "grad_norm": 1484.8663443865212, + "learning_rate": 5.917609685773784e-07, + "loss": 218.1252, + "step": 46330 + }, + { + "epoch": 0.8912352570667513, + "grad_norm": 1482.839545464784, + "learning_rate": 5.896943913382547e-07, + "loss": 213.3422, + "step": 46340 + }, + { + "epoch": 0.8914275823272318, + "grad_norm": 1272.5806136822664, + "learning_rate": 5.876313192825544e-07, + "loss": 214.6004, + "step": 46350 + }, + { + "epoch": 0.8916199075877124, + "grad_norm": 1575.626673681023, + "learning_rate": 5.855717531787375e-07, + "loss": 227.8473, + "step": 46360 + }, + { + "epoch": 0.8918122328481929, + "grad_norm": 1520.8216265388264, + "learning_rate": 5.835156937939568e-07, + "loss": 226.4484, + "step": 46370 + }, + { + "epoch": 0.8920045581086734, + "grad_norm": 1388.38765817661, + "learning_rate": 5.814631418940641e-07, + "loss": 218.6067, + "step": 46380 + }, + { + "epoch": 0.8921968833691539, + "grad_norm": 1391.1677029761859, + "learning_rate": 5.794140982435981e-07, + "loss": 218.7422, + "step": 46390 + }, + { + "epoch": 0.8923892086296344, + "grad_norm": 1477.1739172444923, + "learning_rate": 5.773685636057924e-07, + "loss": 220.2675, + "step": 46400 + }, + { + "epoch": 0.892581533890115, + "grad_norm": 1363.037747944765, + "learning_rate": 5.753265387425777e-07, + "loss": 229.6712, + "step": 46410 + }, + { + "epoch": 0.8927738591505955, + "grad_norm": 1866.725434299131, + "learning_rate": 5.732880244145744e-07, + "loss": 216.3705, + "step": 46420 + }, + { + "epoch": 0.892966184411076, + "grad_norm": 1309.853753693657, + "learning_rate": 5.712530213810951e-07, + "loss": 221.7039, + "step": 46430 + }, + { + "epoch": 0.8931585096715565, + "grad_norm": 1392.555337628686, + "learning_rate": 5.692215304001447e-07, + "loss": 217.0223, + "step": 46440 + }, + { + "epoch": 0.893350834932037, + "grad_norm": 1430.7355999475292, + "learning_rate": 5.671935522284177e-07, + "loss": 223.6912, + "step": 46450 + }, + { + "epoch": 0.8935431601925176, + "grad_norm": 1538.112836502128, + "learning_rate": 5.651690876213067e-07, + "loss": 226.1131, + "step": 46460 + }, + { + "epoch": 0.8937354854529981, + "grad_norm": 1463.9496914215588, + "learning_rate": 5.631481373328895e-07, + "loss": 227.5667, + "step": 46470 + }, + { + "epoch": 0.8939278107134786, + "grad_norm": 1344.7445157577852, + "learning_rate": 5.61130702115934e-07, + "loss": 224.5837, + "step": 46480 + }, + { + "epoch": 0.8941201359739591, + "grad_norm": 1542.225333096471, + "learning_rate": 5.591167827219057e-07, + "loss": 220.551, + "step": 46490 + }, + { + "epoch": 0.8943124612344396, + "grad_norm": 1656.3510143022625, + "learning_rate": 5.571063799009546e-07, + "loss": 222.3722, + "step": 46500 + }, + { + "epoch": 0.8945047864949202, + "grad_norm": 1368.3589018208604, + "learning_rate": 5.550994944019216e-07, + "loss": 221.5293, + "step": 46510 + }, + { + "epoch": 0.8946971117554008, + "grad_norm": 1399.5642335697032, + "learning_rate": 5.53096126972339e-07, + "loss": 222.8575, + "step": 46520 + }, + { + "epoch": 0.8948894370158813, + "grad_norm": 1375.8183121848929, + "learning_rate": 5.510962783584295e-07, + "loss": 222.4795, + "step": 46530 + }, + { + "epoch": 0.8950817622763618, + "grad_norm": 1610.9080169549472, + "learning_rate": 5.490999493051008e-07, + "loss": 218.747, + "step": 46540 + }, + { + "epoch": 0.8952740875368423, + "grad_norm": 1424.7380763758229, + "learning_rate": 5.471071405559547e-07, + "loss": 220.7228, + "step": 46550 + }, + { + "epoch": 0.8954664127973229, + "grad_norm": 1436.3949981178098, + "learning_rate": 5.451178528532786e-07, + "loss": 222.3391, + "step": 46560 + }, + { + "epoch": 0.8956587380578034, + "grad_norm": 1360.3733143259442, + "learning_rate": 5.431320869380519e-07, + "loss": 210.3986, + "step": 46570 + }, + { + "epoch": 0.8958510633182839, + "grad_norm": 1331.1294292070177, + "learning_rate": 5.411498435499363e-07, + "loss": 226.2973, + "step": 46580 + }, + { + "epoch": 0.8960433885787644, + "grad_norm": 1430.736991708031, + "learning_rate": 5.391711234272856e-07, + "loss": 217.365, + "step": 46590 + }, + { + "epoch": 0.8962357138392449, + "grad_norm": 1394.9522651995821, + "learning_rate": 5.371959273071414e-07, + "loss": 221.7367, + "step": 46600 + }, + { + "epoch": 0.8964280390997255, + "grad_norm": 1330.574883087887, + "learning_rate": 5.352242559252308e-07, + "loss": 212.5495, + "step": 46610 + }, + { + "epoch": 0.896620364360206, + "grad_norm": 1339.4661413834856, + "learning_rate": 5.332561100159683e-07, + "loss": 222.5836, + "step": 46620 + }, + { + "epoch": 0.8968126896206865, + "grad_norm": 1403.4451768756446, + "learning_rate": 5.312914903124566e-07, + "loss": 218.0726, + "step": 46630 + }, + { + "epoch": 0.897005014881167, + "grad_norm": 1542.6051427233544, + "learning_rate": 5.293303975464836e-07, + "loss": 219.1479, + "step": 46640 + }, + { + "epoch": 0.8971973401416475, + "grad_norm": 1537.3246365725583, + "learning_rate": 5.273728324485261e-07, + "loss": 221.9458, + "step": 46650 + }, + { + "epoch": 0.8973896654021281, + "grad_norm": 1478.0167750122955, + "learning_rate": 5.254187957477397e-07, + "loss": 232.0157, + "step": 46660 + }, + { + "epoch": 0.8975819906626086, + "grad_norm": 1411.1370098994937, + "learning_rate": 5.234682881719766e-07, + "loss": 222.2815, + "step": 46670 + }, + { + "epoch": 0.8977743159230891, + "grad_norm": 1376.0540284869267, + "learning_rate": 5.215213104477645e-07, + "loss": 218.5224, + "step": 46680 + }, + { + "epoch": 0.8979666411835696, + "grad_norm": 1360.111196179622, + "learning_rate": 5.195778633003223e-07, + "loss": 214.8784, + "step": 46690 + }, + { + "epoch": 0.8981589664440501, + "grad_norm": 1299.634393360051, + "learning_rate": 5.176379474535509e-07, + "loss": 217.2428, + "step": 46700 + }, + { + "epoch": 0.8983512917045307, + "grad_norm": 1606.7830309270275, + "learning_rate": 5.1570156363004e-07, + "loss": 224.4943, + "step": 46710 + }, + { + "epoch": 0.8985436169650112, + "grad_norm": 1408.556361161924, + "learning_rate": 5.13768712551057e-07, + "loss": 227.2928, + "step": 46720 + }, + { + "epoch": 0.8987359422254918, + "grad_norm": 1324.8942740295813, + "learning_rate": 5.118393949365574e-07, + "loss": 220.0283, + "step": 46730 + }, + { + "epoch": 0.8989282674859723, + "grad_norm": 1901.0604238102271, + "learning_rate": 5.099136115051829e-07, + "loss": 220.363, + "step": 46740 + }, + { + "epoch": 0.8991205927464528, + "grad_norm": 1478.726525220824, + "learning_rate": 5.079913629742539e-07, + "loss": 222.2089, + "step": 46750 + }, + { + "epoch": 0.8993129180069334, + "grad_norm": 1505.128030958836, + "learning_rate": 5.060726500597768e-07, + "loss": 227.9248, + "step": 46760 + }, + { + "epoch": 0.8995052432674139, + "grad_norm": 1450.2383454067806, + "learning_rate": 5.041574734764376e-07, + "loss": 220.9869, + "step": 46770 + }, + { + "epoch": 0.8996975685278944, + "grad_norm": 1453.839559140101, + "learning_rate": 5.022458339376124e-07, + "loss": 218.7223, + "step": 46780 + }, + { + "epoch": 0.8998898937883749, + "grad_norm": 1468.469766703546, + "learning_rate": 5.003377321553538e-07, + "loss": 225.2005, + "step": 46790 + }, + { + "epoch": 0.9000822190488554, + "grad_norm": 1355.747466162097, + "learning_rate": 4.984331688403976e-07, + "loss": 224.5639, + "step": 46800 + }, + { + "epoch": 0.900274544309336, + "grad_norm": 1444.184541351923, + "learning_rate": 4.96532144702162e-07, + "loss": 218.3931, + "step": 46810 + }, + { + "epoch": 0.9004668695698165, + "grad_norm": 1431.5097195521855, + "learning_rate": 4.946346604487462e-07, + "loss": 231.0745, + "step": 46820 + }, + { + "epoch": 0.900659194830297, + "grad_norm": 1372.0452974625207, + "learning_rate": 4.927407167869346e-07, + "loss": 218.4352, + "step": 46830 + }, + { + "epoch": 0.9008515200907775, + "grad_norm": 1429.7090153273914, + "learning_rate": 4.908503144221877e-07, + "loss": 220.1105, + "step": 46840 + }, + { + "epoch": 0.901043845351258, + "grad_norm": 1422.8227749181583, + "learning_rate": 4.889634540586518e-07, + "loss": 220.7422, + "step": 46850 + }, + { + "epoch": 0.9012361706117386, + "grad_norm": 1519.5353671623918, + "learning_rate": 4.870801363991484e-07, + "loss": 227.8265, + "step": 46860 + }, + { + "epoch": 0.9014284958722191, + "grad_norm": 1359.3086555893549, + "learning_rate": 4.852003621451829e-07, + "loss": 216.379, + "step": 46870 + }, + { + "epoch": 0.9016208211326996, + "grad_norm": 1595.9811679470838, + "learning_rate": 4.833241319969395e-07, + "loss": 215.0732, + "step": 46880 + }, + { + "epoch": 0.9018131463931801, + "grad_norm": 1329.4766522062168, + "learning_rate": 4.814514466532849e-07, + "loss": 219.9756, + "step": 46890 + }, + { + "epoch": 0.9020054716536606, + "grad_norm": 1519.7936145474603, + "learning_rate": 4.795823068117622e-07, + "loss": 223.6884, + "step": 46900 + }, + { + "epoch": 0.9021977969141411, + "grad_norm": 1441.189960715862, + "learning_rate": 4.777167131685945e-07, + "loss": 214.417, + "step": 46910 + }, + { + "epoch": 0.9023901221746217, + "grad_norm": 1379.65981457216, + "learning_rate": 4.7585466641868696e-07, + "loss": 226.1294, + "step": 46920 + }, + { + "epoch": 0.9025824474351023, + "grad_norm": 1387.7175842725867, + "learning_rate": 4.7399616725561925e-07, + "loss": 215.7953, + "step": 46930 + }, + { + "epoch": 0.9027747726955828, + "grad_norm": 1490.7144996476823, + "learning_rate": 4.721412163716521e-07, + "loss": 219.4926, + "step": 46940 + }, + { + "epoch": 0.9029670979560633, + "grad_norm": 1606.9868485269926, + "learning_rate": 4.702898144577228e-07, + "loss": 232.0976, + "step": 46950 + }, + { + "epoch": 0.9031594232165439, + "grad_norm": 1390.983723362061, + "learning_rate": 4.6844196220345086e-07, + "loss": 216.0086, + "step": 46960 + }, + { + "epoch": 0.9033517484770244, + "grad_norm": 1617.6546655662457, + "learning_rate": 4.665976602971278e-07, + "loss": 223.763, + "step": 46970 + }, + { + "epoch": 0.9035440737375049, + "grad_norm": 1559.5855001690002, + "learning_rate": 4.647569094257276e-07, + "loss": 227.5162, + "step": 46980 + }, + { + "epoch": 0.9037363989979854, + "grad_norm": 1358.3030328722803, + "learning_rate": 4.629197102748984e-07, + "loss": 214.7696, + "step": 46990 + }, + { + "epoch": 0.9039287242584659, + "grad_norm": 1525.3432992106088, + "learning_rate": 4.610860635289671e-07, + "loss": 221.7334, + "step": 47000 + }, + { + "epoch": 0.9041210495189465, + "grad_norm": 1345.8686097189452, + "learning_rate": 4.592559698709387e-07, + "loss": 219.8919, + "step": 47010 + }, + { + "epoch": 0.904313374779427, + "grad_norm": 1331.7024925857322, + "learning_rate": 4.5742942998248774e-07, + "loss": 220.1635, + "step": 47020 + }, + { + "epoch": 0.9045057000399075, + "grad_norm": 1467.937745812986, + "learning_rate": 4.5560644454397563e-07, + "loss": 220.8538, + "step": 47030 + }, + { + "epoch": 0.904698025300388, + "grad_norm": 1359.9598669413365, + "learning_rate": 4.537870142344314e-07, + "loss": 222.1589, + "step": 47040 + }, + { + "epoch": 0.9048903505608685, + "grad_norm": 1480.9876007737616, + "learning_rate": 4.5197113973156403e-07, + "loss": 218.9359, + "step": 47050 + }, + { + "epoch": 0.905082675821349, + "grad_norm": 1470.2394457393145, + "learning_rate": 4.5015882171175476e-07, + "loss": 225.6769, + "step": 47060 + }, + { + "epoch": 0.9052750010818296, + "grad_norm": 1836.6324461425384, + "learning_rate": 4.483500608500657e-07, + "loss": 218.3168, + "step": 47070 + }, + { + "epoch": 0.9054673263423101, + "grad_norm": 1614.6640102453246, + "learning_rate": 4.4654485782022697e-07, + "loss": 219.166, + "step": 47080 + }, + { + "epoch": 0.9056596516027906, + "grad_norm": 1489.5051681586247, + "learning_rate": 4.447432132946472e-07, + "loss": 224.8024, + "step": 47090 + }, + { + "epoch": 0.9058519768632711, + "grad_norm": 1680.262134295633, + "learning_rate": 4.429451279444119e-07, + "loss": 221.4581, + "step": 47100 + }, + { + "epoch": 0.9060443021237516, + "grad_norm": 1444.001840773713, + "learning_rate": 4.411506024392753e-07, + "loss": 225.6714, + "step": 47110 + }, + { + "epoch": 0.9062366273842322, + "grad_norm": 1668.5750334172917, + "learning_rate": 4.393596374476705e-07, + "loss": 214.7576, + "step": 47120 + }, + { + "epoch": 0.9064289526447128, + "grad_norm": 1332.7983133827638, + "learning_rate": 4.3757223363670055e-07, + "loss": 211.9972, + "step": 47130 + }, + { + "epoch": 0.9066212779051933, + "grad_norm": 1314.4716216572058, + "learning_rate": 4.3578839167214505e-07, + "loss": 224.1075, + "step": 47140 + }, + { + "epoch": 0.9068136031656738, + "grad_norm": 1344.0111235957, + "learning_rate": 4.3400811221845693e-07, + "loss": 223.1015, + "step": 47150 + }, + { + "epoch": 0.9070059284261544, + "grad_norm": 1392.1378119485578, + "learning_rate": 4.322313959387592e-07, + "loss": 220.7298, + "step": 47160 + }, + { + "epoch": 0.9071982536866349, + "grad_norm": 1339.662045877732, + "learning_rate": 4.304582434948479e-07, + "loss": 219.0525, + "step": 47170 + }, + { + "epoch": 0.9073905789471154, + "grad_norm": 1237.1396430250047, + "learning_rate": 4.2868865554719583e-07, + "loss": 220.4794, + "step": 47180 + }, + { + "epoch": 0.9075829042075959, + "grad_norm": 1243.689604265467, + "learning_rate": 4.269226327549447e-07, + "loss": 222.6925, + "step": 47190 + }, + { + "epoch": 0.9077752294680764, + "grad_norm": 1525.2456161598323, + "learning_rate": 4.251601757759061e-07, + "loss": 212.6521, + "step": 47200 + }, + { + "epoch": 0.907967554728557, + "grad_norm": 1569.5996124155515, + "learning_rate": 4.234012852665703e-07, + "loss": 227.1184, + "step": 47210 + }, + { + "epoch": 0.9081598799890375, + "grad_norm": 1536.4192739967484, + "learning_rate": 4.2164596188209226e-07, + "loss": 224.376, + "step": 47220 + }, + { + "epoch": 0.908352205249518, + "grad_norm": 1622.905828787272, + "learning_rate": 4.198942062763023e-07, + "loss": 223.7278, + "step": 47230 + }, + { + "epoch": 0.9085445305099985, + "grad_norm": 1513.5467307466727, + "learning_rate": 4.181460191016984e-07, + "loss": 217.6169, + "step": 47240 + }, + { + "epoch": 0.908736855770479, + "grad_norm": 1366.5343375702782, + "learning_rate": 4.1640140100945304e-07, + "loss": 224.4244, + "step": 47250 + }, + { + "epoch": 0.9089291810309595, + "grad_norm": 1605.2650628774873, + "learning_rate": 4.146603526494086e-07, + "loss": 219.5355, + "step": 47260 + }, + { + "epoch": 0.9091215062914401, + "grad_norm": 1553.9891847004822, + "learning_rate": 4.129228746700742e-07, + "loss": 225.5607, + "step": 47270 + }, + { + "epoch": 0.9093138315519206, + "grad_norm": 1734.1490012654092, + "learning_rate": 4.111889677186354e-07, + "loss": 220.383, + "step": 47280 + }, + { + "epoch": 0.9095061568124011, + "grad_norm": 1602.6603601514983, + "learning_rate": 4.094586324409411e-07, + "loss": 216.7603, + "step": 47290 + }, + { + "epoch": 0.9096984820728816, + "grad_norm": 1305.374800293323, + "learning_rate": 4.0773186948151246e-07, + "loss": 223.2034, + "step": 47300 + }, + { + "epoch": 0.9098908073333621, + "grad_norm": 1622.6320761783568, + "learning_rate": 4.060086794835405e-07, + "loss": 217.493, + "step": 47310 + }, + { + "epoch": 0.9100831325938427, + "grad_norm": 1429.1113968411746, + "learning_rate": 4.042890630888863e-07, + "loss": 217.6178, + "step": 47320 + }, + { + "epoch": 0.9102754578543232, + "grad_norm": 1303.8000290881128, + "learning_rate": 4.025730209380774e-07, + "loss": 212.2856, + "step": 47330 + }, + { + "epoch": 0.9104677831148038, + "grad_norm": 1472.4432589699088, + "learning_rate": 4.0086055367031027e-07, + "loss": 226.2052, + "step": 47340 + }, + { + "epoch": 0.9106601083752843, + "grad_norm": 1312.9446923193095, + "learning_rate": 3.9915166192345365e-07, + "loss": 219.4008, + "step": 47350 + }, + { + "epoch": 0.9108524336357648, + "grad_norm": 1476.785470661036, + "learning_rate": 3.9744634633403944e-07, + "loss": 216.3873, + "step": 47360 + }, + { + "epoch": 0.9110447588962454, + "grad_norm": 1370.3625332409579, + "learning_rate": 3.957446075372706e-07, + "loss": 213.5251, + "step": 47370 + }, + { + "epoch": 0.9112370841567259, + "grad_norm": 1398.7029359767273, + "learning_rate": 3.940464461670135e-07, + "loss": 225.4371, + "step": 47380 + }, + { + "epoch": 0.9114294094172064, + "grad_norm": 1478.5102345817131, + "learning_rate": 3.923518628558087e-07, + "loss": 224.6383, + "step": 47390 + }, + { + "epoch": 0.9116217346776869, + "grad_norm": 1344.995625573096, + "learning_rate": 3.9066085823485923e-07, + "loss": 220.425, + "step": 47400 + }, + { + "epoch": 0.9118140599381674, + "grad_norm": 1420.2761277536722, + "learning_rate": 3.8897343293403777e-07, + "loss": 226.7782, + "step": 47410 + }, + { + "epoch": 0.912006385198648, + "grad_norm": 1428.019165995346, + "learning_rate": 3.872895875818794e-07, + "loss": 219.5789, + "step": 47420 + }, + { + "epoch": 0.9121987104591285, + "grad_norm": 1524.2914915650294, + "learning_rate": 3.856093228055924e-07, + "loss": 216.0496, + "step": 47430 + }, + { + "epoch": 0.912391035719609, + "grad_norm": 1436.1415045627953, + "learning_rate": 3.83932639231045e-07, + "loss": 222.3941, + "step": 47440 + }, + { + "epoch": 0.9125833609800895, + "grad_norm": 1503.9586716891579, + "learning_rate": 3.822595374827742e-07, + "loss": 218.0715, + "step": 47450 + }, + { + "epoch": 0.91277568624057, + "grad_norm": 1349.6842768640456, + "learning_rate": 3.805900181839839e-07, + "loss": 221.7199, + "step": 47460 + }, + { + "epoch": 0.9129680115010506, + "grad_norm": 1451.2761476664573, + "learning_rate": 3.789240819565432e-07, + "loss": 216.8558, + "step": 47470 + }, + { + "epoch": 0.9131603367615311, + "grad_norm": 1317.1640497005353, + "learning_rate": 3.772617294209835e-07, + "loss": 219.8583, + "step": 47480 + }, + { + "epoch": 0.9133526620220116, + "grad_norm": 1395.5069540858262, + "learning_rate": 3.7560296119650396e-07, + "loss": 220.1477, + "step": 47490 + }, + { + "epoch": 0.9135449872824921, + "grad_norm": 1593.1648381968544, + "learning_rate": 3.739477779009704e-07, + "loss": 231.3557, + "step": 47500 + }, + { + "epoch": 0.9137373125429726, + "grad_norm": 1328.7719753864867, + "learning_rate": 3.7229618015091065e-07, + "loss": 222.6315, + "step": 47510 + }, + { + "epoch": 0.9139296378034532, + "grad_norm": 1298.0060401238827, + "learning_rate": 3.7064816856151484e-07, + "loss": 217.4497, + "step": 47520 + }, + { + "epoch": 0.9141219630639337, + "grad_norm": 1784.7366103480408, + "learning_rate": 3.6900374374664425e-07, + "loss": 225.1923, + "step": 47530 + }, + { + "epoch": 0.9143142883244143, + "grad_norm": 1406.336524879021, + "learning_rate": 3.6736290631881667e-07, + "loss": 223.364, + "step": 47540 + }, + { + "epoch": 0.9145066135848948, + "grad_norm": 1447.0675423819398, + "learning_rate": 3.657256568892187e-07, + "loss": 224.4759, + "step": 47550 + }, + { + "epoch": 0.9146989388453753, + "grad_norm": 1558.7062176640768, + "learning_rate": 3.6409199606769806e-07, + "loss": 213.1187, + "step": 47560 + }, + { + "epoch": 0.9148912641058559, + "grad_norm": 1439.341310782253, + "learning_rate": 3.6246192446276694e-07, + "loss": 216.4039, + "step": 47570 + }, + { + "epoch": 0.9150835893663364, + "grad_norm": 1471.5174417088792, + "learning_rate": 3.6083544268160077e-07, + "loss": 222.9255, + "step": 47580 + }, + { + "epoch": 0.9152759146268169, + "grad_norm": 1409.9488850591572, + "learning_rate": 3.5921255133003483e-07, + "loss": 218.6654, + "step": 47590 + }, + { + "epoch": 0.9154682398872974, + "grad_norm": 1423.3093621885928, + "learning_rate": 3.5759325101257013e-07, + "loss": 223.0352, + "step": 47600 + }, + { + "epoch": 0.9156605651477779, + "grad_norm": 1505.4007198177992, + "learning_rate": 3.559775423323708e-07, + "loss": 219.4685, + "step": 47610 + }, + { + "epoch": 0.9158528904082585, + "grad_norm": 1388.416041488496, + "learning_rate": 3.54365425891261e-07, + "loss": 214.4795, + "step": 47620 + }, + { + "epoch": 0.916045215668739, + "grad_norm": 1372.0266667616108, + "learning_rate": 3.527569022897259e-07, + "loss": 224.1236, + "step": 47630 + }, + { + "epoch": 0.9162375409292195, + "grad_norm": 1346.3885818848808, + "learning_rate": 3.511519721269163e-07, + "loss": 222.9773, + "step": 47640 + }, + { + "epoch": 0.9164298661897, + "grad_norm": 1451.9466833275196, + "learning_rate": 3.4955063600064177e-07, + "loss": 215.0467, + "step": 47650 + }, + { + "epoch": 0.9166221914501805, + "grad_norm": 1439.0813641269924, + "learning_rate": 3.479528945073707e-07, + "loss": 218.8649, + "step": 47660 + }, + { + "epoch": 0.916814516710661, + "grad_norm": 1374.7710071859926, + "learning_rate": 3.4635874824223924e-07, + "loss": 221.2719, + "step": 47670 + }, + { + "epoch": 0.9170068419711416, + "grad_norm": 4921.765003875099, + "learning_rate": 3.4476819779903694e-07, + "loss": 221.1381, + "step": 47680 + }, + { + "epoch": 0.9171991672316221, + "grad_norm": 1490.0607461887414, + "learning_rate": 3.43181243770222e-07, + "loss": 217.945, + "step": 47690 + }, + { + "epoch": 0.9173914924921026, + "grad_norm": 1404.0460318201362, + "learning_rate": 3.4159788674690386e-07, + "loss": 215.6464, + "step": 47700 + }, + { + "epoch": 0.9175838177525831, + "grad_norm": 1391.8318599368931, + "learning_rate": 3.4001812731886077e-07, + "loss": 212.6189, + "step": 47710 + }, + { + "epoch": 0.9177761430130636, + "grad_norm": 1269.2213887480323, + "learning_rate": 3.384419660745253e-07, + "loss": 214.7372, + "step": 47720 + }, + { + "epoch": 0.9179684682735442, + "grad_norm": 1576.799759515564, + "learning_rate": 3.368694036009923e-07, + "loss": 223.3609, + "step": 47730 + }, + { + "epoch": 0.9181607935340247, + "grad_norm": 1461.9010347646336, + "learning_rate": 3.353004404840121e-07, + "loss": 211.45, + "step": 47740 + }, + { + "epoch": 0.9183531187945053, + "grad_norm": 1559.459309411269, + "learning_rate": 3.3373507730800167e-07, + "loss": 230.0832, + "step": 47750 + }, + { + "epoch": 0.9185454440549858, + "grad_norm": 1600.8985215363884, + "learning_rate": 3.321733146560324e-07, + "loss": 219.0825, + "step": 47760 + }, + { + "epoch": 0.9187377693154664, + "grad_norm": 1585.9395832715118, + "learning_rate": 3.306151531098323e-07, + "loss": 223.2969, + "step": 47770 + }, + { + "epoch": 0.9189300945759469, + "grad_norm": 1438.8339673614244, + "learning_rate": 3.2906059324979255e-07, + "loss": 219.2484, + "step": 47780 + }, + { + "epoch": 0.9191224198364274, + "grad_norm": 1693.7520237797733, + "learning_rate": 3.275096356549612e-07, + "loss": 224.3883, + "step": 47790 + }, + { + "epoch": 0.9193147450969079, + "grad_norm": 1374.1466437365805, + "learning_rate": 3.2596228090304496e-07, + "loss": 211.2069, + "step": 47800 + }, + { + "epoch": 0.9195070703573884, + "grad_norm": 1444.7493813370381, + "learning_rate": 3.2441852957040607e-07, + "loss": 220.1731, + "step": 47810 + }, + { + "epoch": 0.919699395617869, + "grad_norm": 1301.6678714795885, + "learning_rate": 3.228783822320669e-07, + "loss": 217.9352, + "step": 47820 + }, + { + "epoch": 0.9198917208783495, + "grad_norm": 1299.3317831281151, + "learning_rate": 3.213418394617085e-07, + "loss": 219.5616, + "step": 47830 + }, + { + "epoch": 0.92008404613883, + "grad_norm": 1465.8789030895596, + "learning_rate": 3.1980890183166633e-07, + "loss": 218.4252, + "step": 47840 + }, + { + "epoch": 0.9202763713993105, + "grad_norm": 1416.2344950675404, + "learning_rate": 3.1827956991293374e-07, + "loss": 221.5462, + "step": 47850 + }, + { + "epoch": 0.920468696659791, + "grad_norm": 1458.0826619236723, + "learning_rate": 3.167538442751639e-07, + "loss": 221.894, + "step": 47860 + }, + { + "epoch": 0.9206610219202715, + "grad_norm": 1552.4803382268922, + "learning_rate": 3.1523172548666215e-07, + "loss": 220.0342, + "step": 47870 + }, + { + "epoch": 0.9208533471807521, + "grad_norm": 1411.753135894563, + "learning_rate": 3.1371321411439284e-07, + "loss": 225.0019, + "step": 47880 + }, + { + "epoch": 0.9210456724412326, + "grad_norm": 1430.238728284219, + "learning_rate": 3.1219831072397787e-07, + "loss": 221.1834, + "step": 47890 + }, + { + "epoch": 0.9212379977017131, + "grad_norm": 1304.7855657849027, + "learning_rate": 3.1068701587969375e-07, + "loss": 226.8632, + "step": 47900 + }, + { + "epoch": 0.9214303229621936, + "grad_norm": 1654.4673211647885, + "learning_rate": 3.091793301444701e-07, + "loss": 221.2661, + "step": 47910 + }, + { + "epoch": 0.9216226482226741, + "grad_norm": 1394.4288875986913, + "learning_rate": 3.076752540798977e-07, + "loss": 220.475, + "step": 47920 + }, + { + "epoch": 0.9218149734831547, + "grad_norm": 1431.1259954025106, + "learning_rate": 3.061747882462185e-07, + "loss": 222.9695, + "step": 47930 + }, + { + "epoch": 0.9220072987436352, + "grad_norm": 1375.24743677573, + "learning_rate": 3.0467793320233306e-07, + "loss": 219.8958, + "step": 47940 + }, + { + "epoch": 0.9221996240041158, + "grad_norm": 1342.3495925202565, + "learning_rate": 3.031846895057922e-07, + "loss": 222.7533, + "step": 47950 + }, + { + "epoch": 0.9223919492645963, + "grad_norm": 1403.0074302009762, + "learning_rate": 3.0169505771280747e-07, + "loss": 219.4715, + "step": 47960 + }, + { + "epoch": 0.9225842745250769, + "grad_norm": 1406.458761825228, + "learning_rate": 3.002090383782408e-07, + "loss": 215.6729, + "step": 47970 + }, + { + "epoch": 0.9227765997855574, + "grad_norm": 1592.4241206752758, + "learning_rate": 2.9872663205561035e-07, + "loss": 217.0189, + "step": 47980 + }, + { + "epoch": 0.9229689250460379, + "grad_norm": 1593.4532351647367, + "learning_rate": 2.972478392970857e-07, + "loss": 224.2339, + "step": 47990 + }, + { + "epoch": 0.9231612503065184, + "grad_norm": 1519.3356496559938, + "learning_rate": 2.9577266065349716e-07, + "loss": 217.2127, + "step": 48000 + }, + { + "epoch": 0.9233535755669989, + "grad_norm": 1345.652395432279, + "learning_rate": 2.9430109667432096e-07, + "loss": 219.529, + "step": 48010 + }, + { + "epoch": 0.9235459008274794, + "grad_norm": 1671.7656770264464, + "learning_rate": 2.9283314790769177e-07, + "loss": 226.2386, + "step": 48020 + }, + { + "epoch": 0.92373822608796, + "grad_norm": 1520.2096466099476, + "learning_rate": 2.913688149003946e-07, + "loss": 216.641, + "step": 48030 + }, + { + "epoch": 0.9239305513484405, + "grad_norm": 1501.7275251998974, + "learning_rate": 2.899080981978719e-07, + "loss": 225.2489, + "step": 48040 + }, + { + "epoch": 0.924122876608921, + "grad_norm": 1321.0058719172387, + "learning_rate": 2.8845099834421517e-07, + "loss": 213.8865, + "step": 48050 + }, + { + "epoch": 0.9243152018694015, + "grad_norm": 1397.7757444555393, + "learning_rate": 2.869975158821681e-07, + "loss": 218.6217, + "step": 48060 + }, + { + "epoch": 0.924507527129882, + "grad_norm": 1507.939502356167, + "learning_rate": 2.8554765135313303e-07, + "loss": 226.7254, + "step": 48070 + }, + { + "epoch": 0.9246998523903626, + "grad_norm": 1308.9016539988518, + "learning_rate": 2.8410140529715803e-07, + "loss": 214.2825, + "step": 48080 + }, + { + "epoch": 0.9248921776508431, + "grad_norm": 1540.1751435950482, + "learning_rate": 2.826587782529444e-07, + "loss": 212.4035, + "step": 48090 + }, + { + "epoch": 0.9250845029113236, + "grad_norm": 1544.0346071366935, + "learning_rate": 2.812197707578501e-07, + "loss": 223.1791, + "step": 48100 + }, + { + "epoch": 0.9252768281718041, + "grad_norm": 1426.831497021911, + "learning_rate": 2.797843833478797e-07, + "loss": 219.2706, + "step": 48110 + }, + { + "epoch": 0.9254691534322846, + "grad_norm": 1695.5684423480686, + "learning_rate": 2.7835261655769217e-07, + "loss": 214.1696, + "step": 48120 + }, + { + "epoch": 0.9256614786927652, + "grad_norm": 1536.0103384651316, + "learning_rate": 2.769244709205976e-07, + "loss": 218.4421, + "step": 48130 + }, + { + "epoch": 0.9258538039532457, + "grad_norm": 1345.711706299068, + "learning_rate": 2.7549994696855376e-07, + "loss": 214.9853, + "step": 48140 + }, + { + "epoch": 0.9260461292137262, + "grad_norm": 1340.9451295397134, + "learning_rate": 2.740790452321751e-07, + "loss": 212.6925, + "step": 48150 + }, + { + "epoch": 0.9262384544742068, + "grad_norm": 1328.2159913932562, + "learning_rate": 2.726617662407238e-07, + "loss": 214.3774, + "step": 48160 + }, + { + "epoch": 0.9264307797346873, + "grad_norm": 1383.3575376863405, + "learning_rate": 2.7124811052211097e-07, + "loss": 223.3681, + "step": 48170 + }, + { + "epoch": 0.9266231049951679, + "grad_norm": 1462.62844723085, + "learning_rate": 2.698380786029031e-07, + "loss": 215.6205, + "step": 48180 + }, + { + "epoch": 0.9268154302556484, + "grad_norm": 1527.7695244006295, + "learning_rate": 2.6843167100831125e-07, + "loss": 219.9917, + "step": 48190 + }, + { + "epoch": 0.9270077555161289, + "grad_norm": 1418.359165205742, + "learning_rate": 2.6702888826219965e-07, + "loss": 222.0716, + "step": 48200 + }, + { + "epoch": 0.9272000807766094, + "grad_norm": 1405.4897041005295, + "learning_rate": 2.6562973088708146e-07, + "loss": 218.1003, + "step": 48210 + }, + { + "epoch": 0.92739240603709, + "grad_norm": 1372.5701243633775, + "learning_rate": 2.6423419940412086e-07, + "loss": 215.0814, + "step": 48220 + }, + { + "epoch": 0.9275847312975705, + "grad_norm": 1368.4961564221253, + "learning_rate": 2.628422943331288e-07, + "loss": 223.2442, + "step": 48230 + }, + { + "epoch": 0.927777056558051, + "grad_norm": 1347.0173733412985, + "learning_rate": 2.614540161925683e-07, + "loss": 224.957, + "step": 48240 + }, + { + "epoch": 0.9279693818185315, + "grad_norm": 1384.9718141286414, + "learning_rate": 2.6006936549954784e-07, + "loss": 219.28, + "step": 48250 + }, + { + "epoch": 0.928161707079012, + "grad_norm": 1220.9125594825982, + "learning_rate": 2.5868834276983057e-07, + "loss": 223.2387, + "step": 48260 + }, + { + "epoch": 0.9283540323394925, + "grad_norm": 1342.7009983882829, + "learning_rate": 2.573109485178216e-07, + "loss": 224.9703, + "step": 48270 + }, + { + "epoch": 0.9285463575999731, + "grad_norm": 1354.4313956353342, + "learning_rate": 2.5593718325657713e-07, + "loss": 224.9315, + "step": 48280 + }, + { + "epoch": 0.9287386828604536, + "grad_norm": 1663.4861929053834, + "learning_rate": 2.545670474978057e-07, + "loss": 213.3882, + "step": 48290 + }, + { + "epoch": 0.9289310081209341, + "grad_norm": 1443.7121404565762, + "learning_rate": 2.532005417518568e-07, + "loss": 215.4642, + "step": 48300 + }, + { + "epoch": 0.9291233333814146, + "grad_norm": 1366.3725466033384, + "learning_rate": 2.5183766652773336e-07, + "loss": 216.153, + "step": 48310 + }, + { + "epoch": 0.9293156586418951, + "grad_norm": 1431.7955612902772, + "learning_rate": 2.504784223330814e-07, + "loss": 216.707, + "step": 48320 + }, + { + "epoch": 0.9295079839023757, + "grad_norm": 1549.894658463349, + "learning_rate": 2.4912280967419934e-07, + "loss": 221.1073, + "step": 48330 + }, + { + "epoch": 0.9297003091628562, + "grad_norm": 1456.2314518427997, + "learning_rate": 2.477708290560299e-07, + "loss": 229.2287, + "step": 48340 + }, + { + "epoch": 0.9298926344233367, + "grad_norm": 1357.6297752750288, + "learning_rate": 2.464224809821614e-07, + "loss": 230.4937, + "step": 48350 + }, + { + "epoch": 0.9300849596838173, + "grad_norm": 1427.1274384670082, + "learning_rate": 2.450777659548353e-07, + "loss": 222.4397, + "step": 48360 + }, + { + "epoch": 0.9302772849442978, + "grad_norm": 1493.3636922782707, + "learning_rate": 2.4373668447493225e-07, + "loss": 224.0513, + "step": 48370 + }, + { + "epoch": 0.9304696102047784, + "grad_norm": 1228.9819821562182, + "learning_rate": 2.4239923704198476e-07, + "loss": 216.6722, + "step": 48380 + }, + { + "epoch": 0.9306619354652589, + "grad_norm": 1361.5103097302067, + "learning_rate": 2.410654241541688e-07, + "loss": 215.3536, + "step": 48390 + }, + { + "epoch": 0.9308542607257394, + "grad_norm": 1432.3808865103306, + "learning_rate": 2.3973524630830804e-07, + "loss": 220.539, + "step": 48400 + }, + { + "epoch": 0.9310465859862199, + "grad_norm": 1488.1812837252164, + "learning_rate": 2.3840870399987283e-07, + "loss": 223.2664, + "step": 48410 + }, + { + "epoch": 0.9312389112467004, + "grad_norm": 1368.829747338738, + "learning_rate": 2.370857977229768e-07, + "loss": 223.0433, + "step": 48420 + }, + { + "epoch": 0.931431236507181, + "grad_norm": 1328.082648318461, + "learning_rate": 2.3576652797038247e-07, + "loss": 221.4879, + "step": 48430 + }, + { + "epoch": 0.9316235617676615, + "grad_norm": 1418.9465086124178, + "learning_rate": 2.344508952334934e-07, + "loss": 216.7115, + "step": 48440 + }, + { + "epoch": 0.931815887028142, + "grad_norm": 1302.754902764057, + "learning_rate": 2.3313890000236316e-07, + "loss": 214.921, + "step": 48450 + }, + { + "epoch": 0.9320082122886225, + "grad_norm": 1394.5322698155076, + "learning_rate": 2.3183054276568752e-07, + "loss": 218.6302, + "step": 48460 + }, + { + "epoch": 0.932200537549103, + "grad_norm": 1448.7660192805088, + "learning_rate": 2.305258240108077e-07, + "loss": 215.6557, + "step": 48470 + }, + { + "epoch": 0.9323928628095836, + "grad_norm": 1778.5654358557942, + "learning_rate": 2.2922474422371166e-07, + "loss": 222.8299, + "step": 48480 + }, + { + "epoch": 0.9325851880700641, + "grad_norm": 1276.2701348380415, + "learning_rate": 2.279273038890273e-07, + "loss": 222.6284, + "step": 48490 + }, + { + "epoch": 0.9327775133305446, + "grad_norm": 1447.3337888479798, + "learning_rate": 2.2663350349003134e-07, + "loss": 218.724, + "step": 48500 + }, + { + "epoch": 0.9329698385910251, + "grad_norm": 1366.2347467045965, + "learning_rate": 2.2534334350864274e-07, + "loss": 212.7785, + "step": 48510 + }, + { + "epoch": 0.9331621638515056, + "grad_norm": 1581.795152606201, + "learning_rate": 2.2405682442542487e-07, + "loss": 221.3937, + "step": 48520 + }, + { + "epoch": 0.9333544891119862, + "grad_norm": 1326.9763535990462, + "learning_rate": 2.2277394671958442e-07, + "loss": 220.8175, + "step": 48530 + }, + { + "epoch": 0.9335468143724667, + "grad_norm": 1462.1562738443135, + "learning_rate": 2.2149471086897355e-07, + "loss": 219.5209, + "step": 48540 + }, + { + "epoch": 0.9337391396329472, + "grad_norm": 1840.7755545042123, + "learning_rate": 2.202191173500845e-07, + "loss": 223.9876, + "step": 48550 + }, + { + "epoch": 0.9339314648934277, + "grad_norm": 1513.7450983054312, + "learning_rate": 2.1894716663805716e-07, + "loss": 222.8194, + "step": 48560 + }, + { + "epoch": 0.9341237901539083, + "grad_norm": 1512.0765462302688, + "learning_rate": 2.176788592066692e-07, + "loss": 231.463, + "step": 48570 + }, + { + "epoch": 0.9343161154143889, + "grad_norm": 1507.880890171871, + "learning_rate": 2.164141955283472e-07, + "loss": 213.6017, + "step": 48580 + }, + { + "epoch": 0.9345084406748694, + "grad_norm": 1382.4233741772143, + "learning_rate": 2.1515317607415654e-07, + "loss": 219.7649, + "step": 48590 + }, + { + "epoch": 0.9347007659353499, + "grad_norm": 1321.2779752271076, + "learning_rate": 2.1389580131380373e-07, + "loss": 234.4291, + "step": 48600 + }, + { + "epoch": 0.9348930911958304, + "grad_norm": 1604.1237040994413, + "learning_rate": 2.126420717156441e-07, + "loss": 229.7492, + "step": 48610 + }, + { + "epoch": 0.9350854164563109, + "grad_norm": 1507.0068110620718, + "learning_rate": 2.113919877466686e-07, + "loss": 215.8246, + "step": 48620 + }, + { + "epoch": 0.9352777417167915, + "grad_norm": 1894.812129250663, + "learning_rate": 2.1014554987251356e-07, + "loss": 226.4411, + "step": 48630 + }, + { + "epoch": 0.935470066977272, + "grad_norm": 1417.8296026083271, + "learning_rate": 2.0890275855745546e-07, + "loss": 218.8423, + "step": 48640 + }, + { + "epoch": 0.9356623922377525, + "grad_norm": 1351.992081181903, + "learning_rate": 2.0766361426441505e-07, + "loss": 215.6343, + "step": 48650 + }, + { + "epoch": 0.935854717498233, + "grad_norm": 1302.7594731055633, + "learning_rate": 2.0642811745495206e-07, + "loss": 226.6181, + "step": 48660 + }, + { + "epoch": 0.9360470427587135, + "grad_norm": 1498.4650782147276, + "learning_rate": 2.0519626858926944e-07, + "loss": 219.5578, + "step": 48670 + }, + { + "epoch": 0.936239368019194, + "grad_norm": 1413.5460913091172, + "learning_rate": 2.0396806812621018e-07, + "loss": 220.2387, + "step": 48680 + }, + { + "epoch": 0.9364316932796746, + "grad_norm": 1450.5595334943514, + "learning_rate": 2.0274351652325942e-07, + "loss": 227.4826, + "step": 48690 + }, + { + "epoch": 0.9366240185401551, + "grad_norm": 1403.7716562110738, + "learning_rate": 2.0152261423654118e-07, + "loss": 223.8041, + "step": 48700 + }, + { + "epoch": 0.9368163438006356, + "grad_norm": 1331.374860598774, + "learning_rate": 2.003053617208217e-07, + "loss": 217.9378, + "step": 48710 + }, + { + "epoch": 0.9370086690611161, + "grad_norm": 1479.9874435358035, + "learning_rate": 1.9909175942950832e-07, + "loss": 227.4229, + "step": 48720 + }, + { + "epoch": 0.9372009943215966, + "grad_norm": 1659.3378128005795, + "learning_rate": 1.9788180781464716e-07, + "loss": 222.5422, + "step": 48730 + }, + { + "epoch": 0.9373933195820772, + "grad_norm": 1343.347600477696, + "learning_rate": 1.9667550732692554e-07, + "loss": 222.2111, + "step": 48740 + }, + { + "epoch": 0.9375856448425577, + "grad_norm": 1318.6667266753677, + "learning_rate": 1.9547285841566843e-07, + "loss": 213.8635, + "step": 48750 + }, + { + "epoch": 0.9377779701030382, + "grad_norm": 1493.87287592487, + "learning_rate": 1.9427386152884643e-07, + "loss": 219.1087, + "step": 48760 + }, + { + "epoch": 0.9379702953635188, + "grad_norm": 1562.078925959731, + "learning_rate": 1.9307851711306336e-07, + "loss": 221.5375, + "step": 48770 + }, + { + "epoch": 0.9381626206239994, + "grad_norm": 1388.0220825267072, + "learning_rate": 1.918868256135653e-07, + "loss": 223.972, + "step": 48780 + }, + { + "epoch": 0.9383549458844799, + "grad_norm": 1330.1853997904634, + "learning_rate": 1.9069878747423943e-07, + "loss": 219.3253, + "step": 48790 + }, + { + "epoch": 0.9385472711449604, + "grad_norm": 1756.8756966376293, + "learning_rate": 1.8951440313760838e-07, + "loss": 220.7723, + "step": 48800 + }, + { + "epoch": 0.9387395964054409, + "grad_norm": 1356.9913556748725, + "learning_rate": 1.8833367304483708e-07, + "loss": 223.0061, + "step": 48810 + }, + { + "epoch": 0.9389319216659214, + "grad_norm": 1374.9040728679893, + "learning_rate": 1.8715659763572703e-07, + "loss": 218.9188, + "step": 48820 + }, + { + "epoch": 0.939124246926402, + "grad_norm": 1432.1827949199196, + "learning_rate": 1.8598317734872086e-07, + "loss": 217.1677, + "step": 48830 + }, + { + "epoch": 0.9393165721868825, + "grad_norm": 1493.0493848455733, + "learning_rate": 1.8481341262089668e-07, + "loss": 221.1158, + "step": 48840 + }, + { + "epoch": 0.939508897447363, + "grad_norm": 1675.6485118930636, + "learning_rate": 1.836473038879727e-07, + "loss": 215.9657, + "step": 48850 + }, + { + "epoch": 0.9397012227078435, + "grad_norm": 1651.7186047597081, + "learning_rate": 1.8248485158430696e-07, + "loss": 219.7294, + "step": 48860 + }, + { + "epoch": 0.939893547968324, + "grad_norm": 1461.0578275623395, + "learning_rate": 1.813260561428909e-07, + "loss": 218.0068, + "step": 48870 + }, + { + "epoch": 0.9400858732288045, + "grad_norm": 1476.2837872092398, + "learning_rate": 1.8017091799535924e-07, + "loss": 215.0431, + "step": 48880 + }, + { + "epoch": 0.9402781984892851, + "grad_norm": 1309.5088194323287, + "learning_rate": 1.7901943757198003e-07, + "loss": 214.9583, + "step": 48890 + }, + { + "epoch": 0.9404705237497656, + "grad_norm": 1481.5788926876676, + "learning_rate": 1.7787161530166242e-07, + "loss": 218.8679, + "step": 48900 + }, + { + "epoch": 0.9406628490102461, + "grad_norm": 1544.0380454810745, + "learning_rate": 1.7672745161194992e-07, + "loss": 222.1133, + "step": 48910 + }, + { + "epoch": 0.9408551742707266, + "grad_norm": 1321.5615668746314, + "learning_rate": 1.75586946929025e-07, + "loss": 218.2311, + "step": 48920 + }, + { + "epoch": 0.9410474995312071, + "grad_norm": 1384.2284116658982, + "learning_rate": 1.7445010167770672e-07, + "loss": 222.7567, + "step": 48930 + }, + { + "epoch": 0.9412398247916877, + "grad_norm": 1464.750193221155, + "learning_rate": 1.7331691628145076e-07, + "loss": 222.5026, + "step": 48940 + }, + { + "epoch": 0.9414321500521682, + "grad_norm": 1421.0093881818298, + "learning_rate": 1.7218739116235061e-07, + "loss": 220.2543, + "step": 48950 + }, + { + "epoch": 0.9416244753126487, + "grad_norm": 1384.542091524248, + "learning_rate": 1.710615267411353e-07, + "loss": 214.7182, + "step": 48960 + }, + { + "epoch": 0.9418168005731293, + "grad_norm": 1514.8554472681276, + "learning_rate": 1.6993932343717158e-07, + "loss": 220.3634, + "step": 48970 + }, + { + "epoch": 0.9420091258336099, + "grad_norm": 1354.5647171188343, + "learning_rate": 1.6882078166846173e-07, + "loss": 215.113, + "step": 48980 + }, + { + "epoch": 0.9422014510940904, + "grad_norm": 1455.2777205649381, + "learning_rate": 1.677059018516425e-07, + "loss": 215.3395, + "step": 48990 + }, + { + "epoch": 0.9423937763545709, + "grad_norm": 1448.5088923613425, + "learning_rate": 1.6659468440198835e-07, + "loss": 222.7378, + "step": 49000 + }, + { + "epoch": 0.9425861016150514, + "grad_norm": 1250.4333686386178, + "learning_rate": 1.6548712973341152e-07, + "loss": 230.9148, + "step": 49010 + }, + { + "epoch": 0.9427784268755319, + "grad_norm": 1381.3475013362543, + "learning_rate": 1.6438323825845647e-07, + "loss": 215.9438, + "step": 49020 + }, + { + "epoch": 0.9429707521360124, + "grad_norm": 1400.720680680042, + "learning_rate": 1.6328301038830429e-07, + "loss": 221.7146, + "step": 49030 + }, + { + "epoch": 0.943163077396493, + "grad_norm": 1479.3819464434112, + "learning_rate": 1.621864465327716e-07, + "loss": 212.319, + "step": 49040 + }, + { + "epoch": 0.9433554026569735, + "grad_norm": 1666.3186738187007, + "learning_rate": 1.6109354710031167e-07, + "loss": 223.9077, + "step": 49050 + }, + { + "epoch": 0.943547727917454, + "grad_norm": 1287.1865213646581, + "learning_rate": 1.6000431249800995e-07, + "loss": 209.4681, + "step": 49060 + }, + { + "epoch": 0.9437400531779345, + "grad_norm": 1260.8907458725478, + "learning_rate": 1.5891874313158862e-07, + "loss": 209.7787, + "step": 49070 + }, + { + "epoch": 0.943932378438415, + "grad_norm": 1465.5264985262763, + "learning_rate": 1.5783683940540528e-07, + "loss": 216.3579, + "step": 49080 + }, + { + "epoch": 0.9441247036988956, + "grad_norm": 1270.9833342140068, + "learning_rate": 1.5675860172244982e-07, + "loss": 214.5415, + "step": 49090 + }, + { + "epoch": 0.9443170289593761, + "grad_norm": 1376.786440317354, + "learning_rate": 1.5568403048434877e-07, + "loss": 214.4211, + "step": 49100 + }, + { + "epoch": 0.9445093542198566, + "grad_norm": 1270.0883767865007, + "learning_rate": 1.5461312609136192e-07, + "loss": 216.8254, + "step": 49110 + }, + { + "epoch": 0.9447016794803371, + "grad_norm": 1450.5672453293744, + "learning_rate": 1.5354588894238465e-07, + "loss": 214.8898, + "step": 49120 + }, + { + "epoch": 0.9448940047408176, + "grad_norm": 1375.048397461741, + "learning_rate": 1.524823194349434e-07, + "loss": 222.6178, + "step": 49130 + }, + { + "epoch": 0.9450863300012982, + "grad_norm": 1488.461636684655, + "learning_rate": 1.514224179652013e-07, + "loss": 223.175, + "step": 49140 + }, + { + "epoch": 0.9452786552617787, + "grad_norm": 1480.9839638843337, + "learning_rate": 1.5036618492795473e-07, + "loss": 221.1118, + "step": 49150 + }, + { + "epoch": 0.9454709805222592, + "grad_norm": 1382.4697674624388, + "learning_rate": 1.4931362071663125e-07, + "loss": 218.7365, + "step": 49160 + }, + { + "epoch": 0.9456633057827397, + "grad_norm": 1390.1083629149891, + "learning_rate": 1.4826472572329498e-07, + "loss": 217.7386, + "step": 49170 + }, + { + "epoch": 0.9458556310432203, + "grad_norm": 1424.8330751478002, + "learning_rate": 1.4721950033864118e-07, + "loss": 212.3467, + "step": 49180 + }, + { + "epoch": 0.9460479563037009, + "grad_norm": 1383.6156139819386, + "learning_rate": 1.4617794495199956e-07, + "loss": 221.9907, + "step": 49190 + }, + { + "epoch": 0.9462402815641814, + "grad_norm": 1454.9331898579517, + "learning_rate": 1.4514005995133197e-07, + "loss": 221.3145, + "step": 49200 + }, + { + "epoch": 0.9464326068246619, + "grad_norm": 1329.53101841995, + "learning_rate": 1.441058457232336e-07, + "loss": 215.3924, + "step": 49210 + }, + { + "epoch": 0.9466249320851424, + "grad_norm": 1438.784677106811, + "learning_rate": 1.43075302652933e-07, + "loss": 225.9471, + "step": 49220 + }, + { + "epoch": 0.9468172573456229, + "grad_norm": 1392.8093911436142, + "learning_rate": 1.4204843112428867e-07, + "loss": 220.0533, + "step": 49230 + }, + { + "epoch": 0.9470095826061035, + "grad_norm": 1331.6673038411307, + "learning_rate": 1.4102523151979574e-07, + "loss": 218.5267, + "step": 49240 + }, + { + "epoch": 0.947201907866584, + "grad_norm": 1642.9773984773065, + "learning_rate": 1.40005704220576e-07, + "loss": 222.6884, + "step": 49250 + }, + { + "epoch": 0.9473942331270645, + "grad_norm": 1387.9204088687911, + "learning_rate": 1.3898984960638907e-07, + "loss": 221.0935, + "step": 49260 + }, + { + "epoch": 0.947586558387545, + "grad_norm": 1563.2552699163398, + "learning_rate": 1.3797766805562328e-07, + "loss": 223.8792, + "step": 49270 + }, + { + "epoch": 0.9477788836480255, + "grad_norm": 1725.6842547242015, + "learning_rate": 1.3696915994530048e-07, + "loss": 220.3075, + "step": 49280 + }, + { + "epoch": 0.9479712089085061, + "grad_norm": 1351.9204479092296, + "learning_rate": 1.359643256510701e-07, + "loss": 214.8934, + "step": 49290 + }, + { + "epoch": 0.9481635341689866, + "grad_norm": 1372.4787109431147, + "learning_rate": 1.3496316554722056e-07, + "loss": 217.8872, + "step": 49300 + }, + { + "epoch": 0.9483558594294671, + "grad_norm": 1495.7625838423826, + "learning_rate": 1.3396568000666467e-07, + "loss": 220.4245, + "step": 49310 + }, + { + "epoch": 0.9485481846899476, + "grad_norm": 1428.9404245857595, + "learning_rate": 1.3297186940094853e-07, + "loss": 223.5978, + "step": 49320 + }, + { + "epoch": 0.9487405099504281, + "grad_norm": 1797.3066779888977, + "learning_rate": 1.3198173410025383e-07, + "loss": 216.9476, + "step": 49330 + }, + { + "epoch": 0.9489328352109087, + "grad_norm": 1563.3033791239825, + "learning_rate": 1.3099527447338668e-07, + "loss": 221.7208, + "step": 49340 + }, + { + "epoch": 0.9491251604713892, + "grad_norm": 1418.473510444605, + "learning_rate": 1.3001249088778756e-07, + "loss": 223.9783, + "step": 49350 + }, + { + "epoch": 0.9493174857318697, + "grad_norm": 1764.1922197600616, + "learning_rate": 1.29033383709527e-07, + "loss": 221.6277, + "step": 49360 + }, + { + "epoch": 0.9495098109923502, + "grad_norm": 1519.3160579705714, + "learning_rate": 1.2805795330330774e-07, + "loss": 222.519, + "step": 49370 + }, + { + "epoch": 0.9497021362528308, + "grad_norm": 1410.2806834320604, + "learning_rate": 1.2708620003245908e-07, + "loss": 227.1136, + "step": 49380 + }, + { + "epoch": 0.9498944615133114, + "grad_norm": 1384.442499752756, + "learning_rate": 1.2611812425894487e-07, + "loss": 211.1012, + "step": 49390 + }, + { + "epoch": 0.9500867867737919, + "grad_norm": 1449.078012356189, + "learning_rate": 1.2515372634335666e-07, + "loss": 215.1433, + "step": 49400 + }, + { + "epoch": 0.9502791120342724, + "grad_norm": 1525.3608856907586, + "learning_rate": 1.2419300664491707e-07, + "loss": 231.105, + "step": 49410 + }, + { + "epoch": 0.9504714372947529, + "grad_norm": 1274.9084907231727, + "learning_rate": 1.2323596552147876e-07, + "loss": 220.4786, + "step": 49420 + }, + { + "epoch": 0.9506637625552334, + "grad_norm": 1491.2090463219804, + "learning_rate": 1.2228260332952213e-07, + "loss": 219.7076, + "step": 49430 + }, + { + "epoch": 0.950856087815714, + "grad_norm": 1642.0579754967641, + "learning_rate": 1.213329204241609e-07, + "loss": 215.8749, + "step": 49440 + }, + { + "epoch": 0.9510484130761945, + "grad_norm": 1622.2449385890602, + "learning_rate": 1.203869171591343e-07, + "loss": 214.1462, + "step": 49450 + }, + { + "epoch": 0.951240738336675, + "grad_norm": 1639.916115340428, + "learning_rate": 1.1944459388681496e-07, + "loss": 227.6559, + "step": 49460 + }, + { + "epoch": 0.9514330635971555, + "grad_norm": 1531.8280036441413, + "learning_rate": 1.1850595095820095e-07, + "loss": 221.534, + "step": 49470 + }, + { + "epoch": 0.951625388857636, + "grad_norm": 1592.6006498171778, + "learning_rate": 1.175709887229215e-07, + "loss": 214.6332, + "step": 49480 + }, + { + "epoch": 0.9518177141181166, + "grad_norm": 1544.0915759183692, + "learning_rate": 1.1663970752923581e-07, + "loss": 217.407, + "step": 49490 + }, + { + "epoch": 0.9520100393785971, + "grad_norm": 1257.471276929645, + "learning_rate": 1.1571210772402975e-07, + "loss": 218.6383, + "step": 49500 + }, + { + "epoch": 0.9522023646390776, + "grad_norm": 1493.143269834361, + "learning_rate": 1.1478818965281912e-07, + "loss": 216.8622, + "step": 49510 + }, + { + "epoch": 0.9523946898995581, + "grad_norm": 1587.2592223772997, + "learning_rate": 1.1386795365974757e-07, + "loss": 225.2867, + "step": 49520 + }, + { + "epoch": 0.9525870151600386, + "grad_norm": 1436.2136052394492, + "learning_rate": 1.1295140008758864e-07, + "loss": 218.4989, + "step": 49530 + }, + { + "epoch": 0.9527793404205191, + "grad_norm": 1778.5153661587804, + "learning_rate": 1.1203852927774372e-07, + "loss": 222.3573, + "step": 49540 + }, + { + "epoch": 0.9529716656809997, + "grad_norm": 1596.3578970155165, + "learning_rate": 1.111293415702408e-07, + "loss": 217.9064, + "step": 49550 + }, + { + "epoch": 0.9531639909414802, + "grad_norm": 1488.349410158663, + "learning_rate": 1.1022383730373897e-07, + "loss": 222.3463, + "step": 49560 + }, + { + "epoch": 0.9533563162019607, + "grad_norm": 1365.3116597899987, + "learning_rate": 1.093220168155218e-07, + "loss": 218.0718, + "step": 49570 + }, + { + "epoch": 0.9535486414624412, + "grad_norm": 1472.6474533627631, + "learning_rate": 1.0842388044150387e-07, + "loss": 219.2332, + "step": 49580 + }, + { + "epoch": 0.9537409667229219, + "grad_norm": 1419.849759730337, + "learning_rate": 1.0752942851622649e-07, + "loss": 217.3345, + "step": 49590 + }, + { + "epoch": 0.9539332919834024, + "grad_norm": 1414.9693040770362, + "learning_rate": 1.066386613728565e-07, + "loss": 217.0496, + "step": 49600 + }, + { + "epoch": 0.9541256172438829, + "grad_norm": 1628.0844701745593, + "learning_rate": 1.0575157934319069e-07, + "loss": 223.2177, + "step": 49610 + }, + { + "epoch": 0.9543179425043634, + "grad_norm": 1542.2438900909433, + "learning_rate": 1.0486818275765364e-07, + "loss": 219.1934, + "step": 49620 + }, + { + "epoch": 0.9545102677648439, + "grad_norm": 1467.6851503594532, + "learning_rate": 1.0398847194529437e-07, + "loss": 221.6878, + "step": 49630 + }, + { + "epoch": 0.9547025930253245, + "grad_norm": 1500.0813827002046, + "learning_rate": 1.0311244723379188e-07, + "loss": 219.5877, + "step": 49640 + }, + { + "epoch": 0.954894918285805, + "grad_norm": 1397.1826024362097, + "learning_rate": 1.0224010894944958e-07, + "loss": 225.1783, + "step": 49650 + }, + { + "epoch": 0.9550872435462855, + "grad_norm": 1363.445633430055, + "learning_rate": 1.0137145741719867e-07, + "loss": 222.808, + "step": 49660 + }, + { + "epoch": 0.955279568806766, + "grad_norm": 1656.8560138722585, + "learning_rate": 1.0050649296060033e-07, + "loss": 234.056, + "step": 49670 + }, + { + "epoch": 0.9554718940672465, + "grad_norm": 1518.0234312138584, + "learning_rate": 9.964521590183684e-08, + "loss": 223.1471, + "step": 49680 + }, + { + "epoch": 0.955664219327727, + "grad_norm": 1549.3289411961346, + "learning_rate": 9.878762656172159e-08, + "loss": 219.831, + "step": 49690 + }, + { + "epoch": 0.9558565445882076, + "grad_norm": 1392.3647382184045, + "learning_rate": 9.793372525969125e-08, + "loss": 227.9785, + "step": 49700 + }, + { + "epoch": 0.9560488698486881, + "grad_norm": 1521.286342017299, + "learning_rate": 9.70835123138103e-08, + "loss": 221.397, + "step": 49710 + }, + { + "epoch": 0.9562411951091686, + "grad_norm": 1479.5330835399277, + "learning_rate": 9.623698804076875e-08, + "loss": 222.178, + "step": 49720 + }, + { + "epoch": 0.9564335203696491, + "grad_norm": 1476.4323700275932, + "learning_rate": 9.539415275588326e-08, + "loss": 229.1022, + "step": 49730 + }, + { + "epoch": 0.9566258456301296, + "grad_norm": 1321.8535674937316, + "learning_rate": 9.455500677309603e-08, + "loss": 236.7966, + "step": 49740 + }, + { + "epoch": 0.9568181708906102, + "grad_norm": 1387.817208223957, + "learning_rate": 9.371955040497371e-08, + "loss": 218.5549, + "step": 49750 + }, + { + "epoch": 0.9570104961510907, + "grad_norm": 1431.9921146805782, + "learning_rate": 9.288778396271292e-08, + "loss": 227.8062, + "step": 49760 + }, + { + "epoch": 0.9572028214115712, + "grad_norm": 1521.7189171140803, + "learning_rate": 9.205970775613027e-08, + "loss": 220.0308, + "step": 49770 + }, + { + "epoch": 0.9573951466720517, + "grad_norm": 1726.3466369243124, + "learning_rate": 9.123532209367237e-08, + "loss": 227.9465, + "step": 49780 + }, + { + "epoch": 0.9575874719325324, + "grad_norm": 1348.7248495836477, + "learning_rate": 9.04146272824069e-08, + "loss": 210.339, + "step": 49790 + }, + { + "epoch": 0.9577797971930129, + "grad_norm": 1356.720134180842, + "learning_rate": 8.959762362803159e-08, + "loss": 217.5549, + "step": 49800 + }, + { + "epoch": 0.9579721224534934, + "grad_norm": 1299.987739470492, + "learning_rate": 8.87843114348652e-08, + "loss": 216.9105, + "step": 49810 + }, + { + "epoch": 0.9581644477139739, + "grad_norm": 1488.546770358729, + "learning_rate": 8.797469100585432e-08, + "loss": 224.1143, + "step": 49820 + }, + { + "epoch": 0.9583567729744544, + "grad_norm": 1762.9370076163752, + "learning_rate": 8.716876264256768e-08, + "loss": 232.3192, + "step": 49830 + }, + { + "epoch": 0.958549098234935, + "grad_norm": 1372.8568412599043, + "learning_rate": 8.636652664520184e-08, + "loss": 220.5682, + "step": 49840 + }, + { + "epoch": 0.9587414234954155, + "grad_norm": 1458.5762699687893, + "learning_rate": 8.556798331257555e-08, + "loss": 222.2676, + "step": 49850 + }, + { + "epoch": 0.958933748755896, + "grad_norm": 1575.3787980322772, + "learning_rate": 8.477313294213307e-08, + "loss": 219.2771, + "step": 49860 + }, + { + "epoch": 0.9591260740163765, + "grad_norm": 1357.0175353841378, + "learning_rate": 8.398197582994316e-08, + "loss": 212.6595, + "step": 49870 + }, + { + "epoch": 0.959318399276857, + "grad_norm": 1429.2296523014513, + "learning_rate": 8.319451227069897e-08, + "loss": 220.4824, + "step": 49880 + }, + { + "epoch": 0.9595107245373375, + "grad_norm": 1393.5877537069612, + "learning_rate": 8.241074255771808e-08, + "loss": 215.5422, + "step": 49890 + }, + { + "epoch": 0.9597030497978181, + "grad_norm": 1344.6991732241456, + "learning_rate": 8.163066698294031e-08, + "loss": 220.2936, + "step": 49900 + }, + { + "epoch": 0.9598953750582986, + "grad_norm": 1361.6020886917945, + "learning_rate": 8.085428583693211e-08, + "loss": 217.6353, + "step": 49910 + }, + { + "epoch": 0.9600877003187791, + "grad_norm": 1354.6067794420978, + "learning_rate": 8.008159940888216e-08, + "loss": 218.937, + "step": 49920 + }, + { + "epoch": 0.9602800255792596, + "grad_norm": 1398.4695016785956, + "learning_rate": 7.931260798660356e-08, + "loss": 223.0757, + "step": 49930 + }, + { + "epoch": 0.9604723508397401, + "grad_norm": 1348.597500175215, + "learning_rate": 7.854731185653386e-08, + "loss": 214.6391, + "step": 49940 + }, + { + "epoch": 0.9606646761002207, + "grad_norm": 1353.433160214295, + "learning_rate": 7.778571130373059e-08, + "loss": 218.2911, + "step": 49950 + }, + { + "epoch": 0.9608570013607012, + "grad_norm": 1383.1705841048174, + "learning_rate": 7.702780661188014e-08, + "loss": 222.7571, + "step": 49960 + }, + { + "epoch": 0.9610493266211817, + "grad_norm": 1386.0999581259562, + "learning_rate": 7.627359806328782e-08, + "loss": 221.2229, + "step": 49970 + }, + { + "epoch": 0.9612416518816622, + "grad_norm": 1462.5113174928572, + "learning_rate": 7.552308593888558e-08, + "loss": 220.3561, + "step": 49980 + }, + { + "epoch": 0.9614339771421427, + "grad_norm": 1483.0226051717405, + "learning_rate": 7.477627051822534e-08, + "loss": 224.737, + "step": 49990 + }, + { + "epoch": 0.9616263024026234, + "grad_norm": 1592.7711262874652, + "learning_rate": 7.403315207948236e-08, + "loss": 230.2029, + "step": 50000 + }, + { + "epoch": 0.9618186276631039, + "grad_norm": 1581.955196026486, + "learning_rate": 7.329373089945968e-08, + "loss": 213.5711, + "step": 50010 + }, + { + "epoch": 0.9620109529235844, + "grad_norm": 1383.1895906350637, + "learning_rate": 7.255800725357586e-08, + "loss": 221.3688, + "step": 50020 + }, + { + "epoch": 0.9622032781840649, + "grad_norm": 1479.0640633664084, + "learning_rate": 7.182598141587838e-08, + "loss": 221.1271, + "step": 50030 + }, + { + "epoch": 0.9623956034445454, + "grad_norm": 1507.3299417270493, + "learning_rate": 7.109765365903243e-08, + "loss": 224.5815, + "step": 50040 + }, + { + "epoch": 0.962587928705026, + "grad_norm": 1359.2587288594768, + "learning_rate": 7.03730242543299e-08, + "loss": 221.5924, + "step": 50050 + }, + { + "epoch": 0.9627802539655065, + "grad_norm": 1293.2467829465634, + "learning_rate": 6.965209347168156e-08, + "loss": 215.362, + "step": 50060 + }, + { + "epoch": 0.962972579225987, + "grad_norm": 1339.3854791985657, + "learning_rate": 6.89348615796237e-08, + "loss": 215.3806, + "step": 50070 + }, + { + "epoch": 0.9631649044864675, + "grad_norm": 1346.8258537843412, + "learning_rate": 6.822132884531373e-08, + "loss": 216.8808, + "step": 50080 + }, + { + "epoch": 0.963357229746948, + "grad_norm": 1608.656832645142, + "learning_rate": 6.751149553452907e-08, + "loss": 224.8484, + "step": 50090 + }, + { + "epoch": 0.9635495550074286, + "grad_norm": 1347.115068805702, + "learning_rate": 6.680536191167263e-08, + "loss": 215.7307, + "step": 50100 + }, + { + "epoch": 0.9637418802679091, + "grad_norm": 1475.8750918225821, + "learning_rate": 6.610292823976628e-08, + "loss": 215.2222, + "step": 50110 + }, + { + "epoch": 0.9639342055283896, + "grad_norm": 1379.2208797851886, + "learning_rate": 6.540419478045623e-08, + "loss": 223.4813, + "step": 50120 + }, + { + "epoch": 0.9641265307888701, + "grad_norm": 1517.7156384600805, + "learning_rate": 6.470916179400765e-08, + "loss": 214.4612, + "step": 50130 + }, + { + "epoch": 0.9643188560493506, + "grad_norm": 1423.8184394785478, + "learning_rate": 6.401782953931013e-08, + "loss": 216.5052, + "step": 50140 + }, + { + "epoch": 0.9645111813098312, + "grad_norm": 1426.3021935973463, + "learning_rate": 6.33301982738721e-08, + "loss": 218.0288, + "step": 50150 + }, + { + "epoch": 0.9647035065703117, + "grad_norm": 1308.4833197316664, + "learning_rate": 6.264626825382647e-08, + "loss": 225.292, + "step": 50160 + }, + { + "epoch": 0.9648958318307922, + "grad_norm": 1428.450671731036, + "learning_rate": 6.196603973392501e-08, + "loss": 224.4039, + "step": 50170 + }, + { + "epoch": 0.9650881570912727, + "grad_norm": 1692.231727584345, + "learning_rate": 6.12895129675406e-08, + "loss": 214.8248, + "step": 50180 + }, + { + "epoch": 0.9652804823517532, + "grad_norm": 1456.3327960599509, + "learning_rate": 6.061668820667055e-08, + "loss": 222.6079, + "step": 50190 + }, + { + "epoch": 0.9654728076122339, + "grad_norm": 1503.3810416757115, + "learning_rate": 5.994756570192994e-08, + "loss": 225.3887, + "step": 50200 + }, + { + "epoch": 0.9656651328727144, + "grad_norm": 1447.2828900576167, + "learning_rate": 5.928214570255497e-08, + "loss": 211.5125, + "step": 50210 + }, + { + "epoch": 0.9658574581331949, + "grad_norm": 1482.458328963002, + "learning_rate": 5.862042845640403e-08, + "loss": 217.6564, + "step": 50220 + }, + { + "epoch": 0.9660497833936754, + "grad_norm": 1423.4256362289439, + "learning_rate": 5.796241420995663e-08, + "loss": 211.4614, + "step": 50230 + }, + { + "epoch": 0.9662421086541559, + "grad_norm": 1295.2582420324156, + "learning_rate": 5.730810320831226e-08, + "loss": 219.6026, + "step": 50240 + }, + { + "epoch": 0.9664344339146365, + "grad_norm": 1411.0831890260745, + "learning_rate": 5.66574956951893e-08, + "loss": 221.2823, + "step": 50250 + }, + { + "epoch": 0.966626759175117, + "grad_norm": 1963.7417916944628, + "learning_rate": 5.6010591912930565e-08, + "loss": 225.4074, + "step": 50260 + }, + { + "epoch": 0.9668190844355975, + "grad_norm": 1359.6122733778202, + "learning_rate": 5.5367392102495534e-08, + "loss": 220.1798, + "step": 50270 + }, + { + "epoch": 0.967011409696078, + "grad_norm": 1532.095801029173, + "learning_rate": 5.472789650346588e-08, + "loss": 222.7269, + "step": 50280 + }, + { + "epoch": 0.9672037349565585, + "grad_norm": 1508.0119451134963, + "learning_rate": 5.4092105354043304e-08, + "loss": 221.8856, + "step": 50290 + }, + { + "epoch": 0.967396060217039, + "grad_norm": 1378.122736390622, + "learning_rate": 5.346001889104946e-08, + "loss": 212.4824, + "step": 50300 + }, + { + "epoch": 0.9675883854775196, + "grad_norm": 1544.1114348919405, + "learning_rate": 5.2831637349926026e-08, + "loss": 217.1256, + "step": 50310 + }, + { + "epoch": 0.9677807107380001, + "grad_norm": 1522.8427160799283, + "learning_rate": 5.220696096473465e-08, + "loss": 215.729, + "step": 50320 + }, + { + "epoch": 0.9679730359984806, + "grad_norm": 1922.6298739447218, + "learning_rate": 5.1585989968157e-08, + "loss": 231.2357, + "step": 50330 + }, + { + "epoch": 0.9681653612589611, + "grad_norm": 1284.990428705182, + "learning_rate": 5.096872459149471e-08, + "loss": 218.1397, + "step": 50340 + }, + { + "epoch": 0.9683576865194417, + "grad_norm": 1531.5772526680107, + "learning_rate": 5.035516506466942e-08, + "loss": 229.0436, + "step": 50350 + }, + { + "epoch": 0.9685500117799222, + "grad_norm": 1787.2079843825363, + "learning_rate": 4.9745311616220535e-08, + "loss": 230.0955, + "step": 50360 + }, + { + "epoch": 0.9687423370404027, + "grad_norm": 1261.7052608247714, + "learning_rate": 4.913916447330858e-08, + "loss": 219.1985, + "step": 50370 + }, + { + "epoch": 0.9689346623008832, + "grad_norm": 1376.5097051181942, + "learning_rate": 4.853672386171515e-08, + "loss": 225.4804, + "step": 50380 + }, + { + "epoch": 0.9691269875613637, + "grad_norm": 1381.84413928358, + "learning_rate": 4.793799000583743e-08, + "loss": 218.5802, + "step": 50390 + }, + { + "epoch": 0.9693193128218442, + "grad_norm": 1638.0263616664317, + "learning_rate": 4.73429631286948e-08, + "loss": 218.6352, + "step": 50400 + }, + { + "epoch": 0.9695116380823249, + "grad_norm": 1578.8418036853782, + "learning_rate": 4.675164345192551e-08, + "loss": 212.6262, + "step": 50410 + }, + { + "epoch": 0.9697039633428054, + "grad_norm": 1371.1759514757368, + "learning_rate": 4.6164031195785606e-08, + "loss": 220.3333, + "step": 50420 + }, + { + "epoch": 0.9698962886032859, + "grad_norm": 1293.1056215628555, + "learning_rate": 4.558012657915112e-08, + "loss": 219.4111, + "step": 50430 + }, + { + "epoch": 0.9700886138637664, + "grad_norm": 1477.342646008764, + "learning_rate": 4.4999929819515844e-08, + "loss": 225.7875, + "step": 50440 + }, + { + "epoch": 0.970280939124247, + "grad_norm": 1624.6552431312757, + "learning_rate": 4.442344113299579e-08, + "loss": 220.0965, + "step": 50450 + }, + { + "epoch": 0.9704732643847275, + "grad_norm": 1349.1434146453648, + "learning_rate": 4.385066073432143e-08, + "loss": 213.9362, + "step": 50460 + }, + { + "epoch": 0.970665589645208, + "grad_norm": 1553.172914479967, + "learning_rate": 4.3281588836844303e-08, + "loss": 217.8453, + "step": 50470 + }, + { + "epoch": 0.9708579149056885, + "grad_norm": 1374.7270202951709, + "learning_rate": 4.271622565253486e-08, + "loss": 213.7431, + "step": 50480 + }, + { + "epoch": 0.971050240166169, + "grad_norm": 1481.7411706615144, + "learning_rate": 4.2154571391982425e-08, + "loss": 217.4573, + "step": 50490 + }, + { + "epoch": 0.9712425654266496, + "grad_norm": 1487.240944704676, + "learning_rate": 4.159662626439187e-08, + "loss": 222.2234, + "step": 50500 + }, + { + "epoch": 0.9714348906871301, + "grad_norm": 1733.2112032588002, + "learning_rate": 4.104239047758918e-08, + "loss": 223.9488, + "step": 50510 + }, + { + "epoch": 0.9716272159476106, + "grad_norm": 1390.704318018731, + "learning_rate": 4.049186423801921e-08, + "loss": 212.4791, + "step": 50520 + }, + { + "epoch": 0.9718195412080911, + "grad_norm": 1526.7482218638684, + "learning_rate": 3.994504775074237e-08, + "loss": 213.7319, + "step": 50530 + }, + { + "epoch": 0.9720118664685716, + "grad_norm": 1454.2356090929586, + "learning_rate": 3.9401941219440186e-08, + "loss": 223.8824, + "step": 50540 + }, + { + "epoch": 0.9722041917290521, + "grad_norm": 1348.955669625872, + "learning_rate": 3.8862544846409725e-08, + "loss": 217.2765, + "step": 50550 + }, + { + "epoch": 0.9723965169895327, + "grad_norm": 1516.5889832832156, + "learning_rate": 3.832685883256915e-08, + "loss": 221.4098, + "step": 50560 + }, + { + "epoch": 0.9725888422500132, + "grad_norm": 1351.37761180578, + "learning_rate": 3.7794883377449966e-08, + "loss": 215.5324, + "step": 50570 + }, + { + "epoch": 0.9727811675104937, + "grad_norm": 1290.0552578950126, + "learning_rate": 3.726661867920478e-08, + "loss": 218.6886, + "step": 50580 + }, + { + "epoch": 0.9729734927709742, + "grad_norm": 1478.3844432855553, + "learning_rate": 3.674206493460508e-08, + "loss": 217.3733, + "step": 50590 + }, + { + "epoch": 0.9731658180314547, + "grad_norm": 1329.3020218449783, + "learning_rate": 3.622122233903791e-08, + "loss": 216.0461, + "step": 50600 + }, + { + "epoch": 0.9733581432919354, + "grad_norm": 1598.638778495034, + "learning_rate": 3.5704091086508076e-08, + "loss": 223.5914, + "step": 50610 + }, + { + "epoch": 0.9735504685524159, + "grad_norm": 1501.707689531157, + "learning_rate": 3.519067136963705e-08, + "loss": 217.7369, + "step": 50620 + }, + { + "epoch": 0.9737427938128964, + "grad_norm": 1596.2884135282686, + "learning_rate": 3.468096337966853e-08, + "loss": 219.5405, + "step": 50630 + }, + { + "epoch": 0.9739351190733769, + "grad_norm": 1755.822185458902, + "learning_rate": 3.417496730645731e-08, + "loss": 219.8947, + "step": 50640 + }, + { + "epoch": 0.9741274443338575, + "grad_norm": 1368.126446089764, + "learning_rate": 3.3672683338480396e-08, + "loss": 221.7425, + "step": 50650 + }, + { + "epoch": 0.974319769594338, + "grad_norm": 1495.6109987868633, + "learning_rate": 3.317411166282813e-08, + "loss": 217.1732, + "step": 50660 + }, + { + "epoch": 0.9745120948548185, + "grad_norm": 1347.4630626852975, + "learning_rate": 3.2679252465213085e-08, + "loss": 219.2742, + "step": 50670 + }, + { + "epoch": 0.974704420115299, + "grad_norm": 1476.7083655899255, + "learning_rate": 3.218810592996113e-08, + "loss": 222.0374, + "step": 50680 + }, + { + "epoch": 0.9748967453757795, + "grad_norm": 1842.1941916982119, + "learning_rate": 3.170067224001483e-08, + "loss": 230.6348, + "step": 50690 + }, + { + "epoch": 0.97508907063626, + "grad_norm": 1307.7977738124846, + "learning_rate": 3.1216951576936714e-08, + "loss": 210.7078, + "step": 50700 + }, + { + "epoch": 0.9752813958967406, + "grad_norm": 1384.566276610812, + "learning_rate": 3.073694412090489e-08, + "loss": 216.715, + "step": 50710 + }, + { + "epoch": 0.9754737211572211, + "grad_norm": 1530.8551794044608, + "learning_rate": 3.026065005071188e-08, + "loss": 221.1893, + "step": 50720 + }, + { + "epoch": 0.9756660464177016, + "grad_norm": 1486.5955679800163, + "learning_rate": 2.9788069543772445e-08, + "loss": 224.9625, + "step": 50730 + }, + { + "epoch": 0.9758583716781821, + "grad_norm": 1327.569734407385, + "learning_rate": 2.9319202776113553e-08, + "loss": 216.0199, + "step": 50740 + }, + { + "epoch": 0.9760506969386626, + "grad_norm": 1385.0947376121192, + "learning_rate": 2.8854049922379946e-08, + "loss": 215.7292, + "step": 50750 + }, + { + "epoch": 0.9762430221991432, + "grad_norm": 1519.3898920981442, + "learning_rate": 2.839261115583303e-08, + "loss": 222.5011, + "step": 50760 + }, + { + "epoch": 0.9764353474596237, + "grad_norm": 1675.3882091554156, + "learning_rate": 2.793488664835309e-08, + "loss": 219.7468, + "step": 50770 + }, + { + "epoch": 0.9766276727201042, + "grad_norm": 1402.753448994212, + "learning_rate": 2.7480876570433746e-08, + "loss": 215.0043, + "step": 50780 + }, + { + "epoch": 0.9768199979805847, + "grad_norm": 1454.2800896410947, + "learning_rate": 2.7030581091186393e-08, + "loss": 224.7813, + "step": 50790 + }, + { + "epoch": 0.9770123232410652, + "grad_norm": 1436.5471290087116, + "learning_rate": 2.658400037833686e-08, + "loss": 216.8098, + "step": 50800 + }, + { + "epoch": 0.9772046485015459, + "grad_norm": 1354.6174654199986, + "learning_rate": 2.6141134598233197e-08, + "loss": 217.105, + "step": 50810 + }, + { + "epoch": 0.9773969737620264, + "grad_norm": 1810.9408837924473, + "learning_rate": 2.5701983915831232e-08, + "loss": 223.1486, + "step": 50820 + }, + { + "epoch": 0.9775892990225069, + "grad_norm": 1265.015592760325, + "learning_rate": 2.5266548494710108e-08, + "loss": 219.1568, + "step": 50830 + }, + { + "epoch": 0.9777816242829874, + "grad_norm": 1305.251664712247, + "learning_rate": 2.4834828497062315e-08, + "loss": 214.6504, + "step": 50840 + }, + { + "epoch": 0.977973949543468, + "grad_norm": 1358.9980310003082, + "learning_rate": 2.4406824083694768e-08, + "loss": 222.9667, + "step": 50850 + }, + { + "epoch": 0.9781662748039485, + "grad_norm": 1379.5327018798964, + "learning_rate": 2.398253541403217e-08, + "loss": 226.9672, + "step": 50860 + }, + { + "epoch": 0.978358600064429, + "grad_norm": 1369.5129330917473, + "learning_rate": 2.3561962646116988e-08, + "loss": 218.3629, + "step": 50870 + }, + { + "epoch": 0.9785509253249095, + "grad_norm": 1426.930977174834, + "learning_rate": 2.3145105936603906e-08, + "loss": 211.0509, + "step": 50880 + }, + { + "epoch": 0.97874325058539, + "grad_norm": 1644.633872634359, + "learning_rate": 2.273196544076539e-08, + "loss": 225.7178, + "step": 50890 + }, + { + "epoch": 0.9789355758458705, + "grad_norm": 1286.5058961571435, + "learning_rate": 2.2322541312490565e-08, + "loss": 218.7, + "step": 50900 + }, + { + "epoch": 0.9791279011063511, + "grad_norm": 1416.0769614789701, + "learning_rate": 2.1916833704281882e-08, + "loss": 230.007, + "step": 50910 + }, + { + "epoch": 0.9793202263668316, + "grad_norm": 1710.889599421853, + "learning_rate": 2.1514842767258458e-08, + "loss": 212.4472, + "step": 50920 + }, + { + "epoch": 0.9795125516273121, + "grad_norm": 1416.8537592236978, + "learning_rate": 2.1116568651156076e-08, + "loss": 223.9885, + "step": 50930 + }, + { + "epoch": 0.9797048768877926, + "grad_norm": 1442.4390641662933, + "learning_rate": 2.0722011504326066e-08, + "loss": 218.4397, + "step": 50940 + }, + { + "epoch": 0.9798972021482731, + "grad_norm": 1524.4886403365613, + "learning_rate": 2.033117147373309e-08, + "loss": 214.8394, + "step": 50950 + }, + { + "epoch": 0.9800895274087537, + "grad_norm": 1337.31676711825, + "learning_rate": 1.9944048704959583e-08, + "loss": 219.3454, + "step": 50960 + }, + { + "epoch": 0.9802818526692342, + "grad_norm": 1249.9829223873553, + "learning_rate": 1.956064334220131e-08, + "loss": 214.8916, + "step": 50970 + }, + { + "epoch": 0.9804741779297147, + "grad_norm": 1358.3243917619675, + "learning_rate": 1.9180955528270706e-08, + "loss": 223.45, + "step": 50980 + }, + { + "epoch": 0.9806665031901952, + "grad_norm": 1626.3532054361744, + "learning_rate": 1.8804985404595743e-08, + "loss": 223.7124, + "step": 50990 + }, + { + "epoch": 0.9808588284506757, + "grad_norm": 1278.4692814716411, + "learning_rate": 1.843273311121885e-08, + "loss": 214.2552, + "step": 51000 + }, + { + "epoch": 0.9810511537111563, + "grad_norm": 1370.3837110251345, + "learning_rate": 1.806419878679799e-08, + "loss": 223.5854, + "step": 51010 + }, + { + "epoch": 0.9812434789716369, + "grad_norm": 1303.690071878926, + "learning_rate": 1.7699382568605595e-08, + "loss": 214.8925, + "step": 51020 + }, + { + "epoch": 0.9814358042321174, + "grad_norm": 1389.2505052761317, + "learning_rate": 1.733828459253073e-08, + "loss": 221.8437, + "step": 51030 + }, + { + "epoch": 0.9816281294925979, + "grad_norm": 1393.448661121099, + "learning_rate": 1.6980904993075808e-08, + "loss": 218.3199, + "step": 51040 + }, + { + "epoch": 0.9818204547530784, + "grad_norm": 1417.387868844666, + "learning_rate": 1.6627243903357682e-08, + "loss": 227.3598, + "step": 51050 + }, + { + "epoch": 0.982012780013559, + "grad_norm": 1246.5118420206204, + "learning_rate": 1.6277301455110972e-08, + "loss": 217.5961, + "step": 51060 + }, + { + "epoch": 0.9822051052740395, + "grad_norm": 1411.0164990115627, + "learning_rate": 1.5931077778682524e-08, + "loss": 221.1215, + "step": 51070 + }, + { + "epoch": 0.98239743053452, + "grad_norm": 1592.835508119975, + "learning_rate": 1.5588573003035847e-08, + "loss": 221.5829, + "step": 51080 + }, + { + "epoch": 0.9825897557950005, + "grad_norm": 1573.403061390579, + "learning_rate": 1.5249787255747774e-08, + "loss": 222.5703, + "step": 51090 + }, + { + "epoch": 0.982782081055481, + "grad_norm": 1297.3036971801798, + "learning_rate": 1.4914720663009585e-08, + "loss": 216.8494, + "step": 51100 + }, + { + "epoch": 0.9829744063159616, + "grad_norm": 1548.7706593508806, + "learning_rate": 1.4583373349629226e-08, + "loss": 217.2335, + "step": 51110 + }, + { + "epoch": 0.9831667315764421, + "grad_norm": 1374.075364719324, + "learning_rate": 1.4255745439027968e-08, + "loss": 214.8763, + "step": 51120 + }, + { + "epoch": 0.9833590568369226, + "grad_norm": 1451.1855216050324, + "learning_rate": 1.3931837053241526e-08, + "loss": 222.3662, + "step": 51130 + }, + { + "epoch": 0.9835513820974031, + "grad_norm": 1428.063812888039, + "learning_rate": 1.3611648312920057e-08, + "loss": 223.8568, + "step": 51140 + }, + { + "epoch": 0.9837437073578836, + "grad_norm": 1485.2896133925717, + "learning_rate": 1.3295179337329267e-08, + "loss": 220.7917, + "step": 51150 + }, + { + "epoch": 0.9839360326183642, + "grad_norm": 1323.231150432954, + "learning_rate": 1.2982430244347088e-08, + "loss": 226.1825, + "step": 51160 + }, + { + "epoch": 0.9841283578788447, + "grad_norm": 1576.0469204973817, + "learning_rate": 1.2673401150470333e-08, + "loss": 224.9283, + "step": 51170 + }, + { + "epoch": 0.9843206831393252, + "grad_norm": 1489.4402814999119, + "learning_rate": 1.2368092170804702e-08, + "loss": 219.7486, + "step": 51180 + }, + { + "epoch": 0.9845130083998057, + "grad_norm": 1629.4088723652553, + "learning_rate": 1.2066503419073672e-08, + "loss": 219.009, + "step": 51190 + }, + { + "epoch": 0.9847053336602862, + "grad_norm": 1346.2065111979, + "learning_rate": 1.1768635007614049e-08, + "loss": 219.1161, + "step": 51200 + }, + { + "epoch": 0.9848976589207667, + "grad_norm": 1294.1348929093222, + "learning_rate": 1.1474487047375971e-08, + "loss": 218.8163, + "step": 51210 + }, + { + "epoch": 0.9850899841812474, + "grad_norm": 1999.0228280219796, + "learning_rate": 1.1184059647926238e-08, + "loss": 224.0086, + "step": 51220 + }, + { + "epoch": 0.9852823094417279, + "grad_norm": 1565.2317858960037, + "learning_rate": 1.0897352917443871e-08, + "loss": 224.2722, + "step": 51230 + }, + { + "epoch": 0.9854746347022084, + "grad_norm": 1431.285351349309, + "learning_rate": 1.0614366962721223e-08, + "loss": 222.1215, + "step": 51240 + }, + { + "epoch": 0.9856669599626889, + "grad_norm": 1307.262296502763, + "learning_rate": 1.033510188916731e-08, + "loss": 209.9831, + "step": 51250 + }, + { + "epoch": 0.9858592852231695, + "grad_norm": 1478.7529596040304, + "learning_rate": 1.0059557800802256e-08, + "loss": 218.4157, + "step": 51260 + }, + { + "epoch": 0.98605161048365, + "grad_norm": 1342.1128420376638, + "learning_rate": 9.78773480026396e-09, + "loss": 213.2213, + "step": 51270 + }, + { + "epoch": 0.9862439357441305, + "grad_norm": 1550.7535283321577, + "learning_rate": 9.519632988800321e-09, + "loss": 223.3799, + "step": 51280 + }, + { + "epoch": 0.986436261004611, + "grad_norm": 1330.8557496003698, + "learning_rate": 9.25525246627479e-09, + "loss": 222.1909, + "step": 51290 + }, + { + "epoch": 0.9866285862650915, + "grad_norm": 1487.4320419313735, + "learning_rate": 8.994593331165257e-09, + "loss": 224.6448, + "step": 51300 + }, + { + "epoch": 0.986820911525572, + "grad_norm": 1356.2045533333326, + "learning_rate": 8.737655680562951e-09, + "loss": 213.0719, + "step": 51310 + }, + { + "epoch": 0.9870132367860526, + "grad_norm": 1506.8613464237494, + "learning_rate": 8.484439610172424e-09, + "loss": 219.0308, + "step": 51320 + }, + { + "epoch": 0.9872055620465331, + "grad_norm": 1312.2588527689834, + "learning_rate": 8.234945214312673e-09, + "loss": 221.395, + "step": 51330 + }, + { + "epoch": 0.9873978873070136, + "grad_norm": 1349.6400604286332, + "learning_rate": 7.989172585917138e-09, + "loss": 221.0535, + "step": 51340 + }, + { + "epoch": 0.9875902125674941, + "grad_norm": 1431.5555892402901, + "learning_rate": 7.747121816530368e-09, + "loss": 218.7294, + "step": 51350 + }, + { + "epoch": 0.9877825378279746, + "grad_norm": 1389.3445511851914, + "learning_rate": 7.508792996313573e-09, + "loss": 222.0628, + "step": 51360 + }, + { + "epoch": 0.9879748630884552, + "grad_norm": 1359.3494700375438, + "learning_rate": 7.274186214040191e-09, + "loss": 215.3606, + "step": 51370 + }, + { + "epoch": 0.9881671883489357, + "grad_norm": 1401.8634448461696, + "learning_rate": 7.043301557096982e-09, + "loss": 216.2671, + "step": 51380 + }, + { + "epoch": 0.9883595136094162, + "grad_norm": 1405.5532361299563, + "learning_rate": 6.816139111484044e-09, + "loss": 217.8133, + "step": 51390 + }, + { + "epoch": 0.9885518388698967, + "grad_norm": 1344.8531263881591, + "learning_rate": 6.592698961818134e-09, + "loss": 221.153, + "step": 51400 + }, + { + "epoch": 0.9887441641303772, + "grad_norm": 1297.3355723262694, + "learning_rate": 6.37298119132379e-09, + "loss": 213.2182, + "step": 51410 + }, + { + "epoch": 0.9889364893908578, + "grad_norm": 1369.353306862403, + "learning_rate": 6.156985881844435e-09, + "loss": 216.4058, + "step": 51420 + }, + { + "epoch": 0.9891288146513384, + "grad_norm": 1468.7511880833326, + "learning_rate": 5.944713113833489e-09, + "loss": 218.5407, + "step": 51430 + }, + { + "epoch": 0.9893211399118189, + "grad_norm": 1278.996437151188, + "learning_rate": 5.736162966359926e-09, + "loss": 221.9043, + "step": 51440 + }, + { + "epoch": 0.9895134651722994, + "grad_norm": 1375.160320973759, + "learning_rate": 5.531335517104941e-09, + "loss": 216.5196, + "step": 51450 + }, + { + "epoch": 0.98970579043278, + "grad_norm": 1426.4116457371038, + "learning_rate": 5.3302308423641704e-09, + "loss": 217.575, + "step": 51460 + }, + { + "epoch": 0.9898981156932605, + "grad_norm": 1458.9021200607694, + "learning_rate": 5.132849017044361e-09, + "loss": 222.4083, + "step": 51470 + }, + { + "epoch": 0.990090440953741, + "grad_norm": 1694.4203660275061, + "learning_rate": 4.939190114666703e-09, + "loss": 226.9896, + "step": 51480 + }, + { + "epoch": 0.9902827662142215, + "grad_norm": 1391.9248289774064, + "learning_rate": 4.749254207367937e-09, + "loss": 215.2023, + "step": 51490 + }, + { + "epoch": 0.990475091474702, + "grad_norm": 1266.9777005646283, + "learning_rate": 4.563041365894805e-09, + "loss": 219.9194, + "step": 51500 + }, + { + "epoch": 0.9906674167351825, + "grad_norm": 1795.4415090775, + "learning_rate": 4.380551659608489e-09, + "loss": 221.6285, + "step": 51510 + }, + { + "epoch": 0.9908597419956631, + "grad_norm": 1365.928249355028, + "learning_rate": 4.2017851564835065e-09, + "loss": 218.8447, + "step": 51520 + }, + { + "epoch": 0.9910520672561436, + "grad_norm": 1462.1396743521134, + "learning_rate": 4.026741923107702e-09, + "loss": 214.7567, + "step": 51530 + }, + { + "epoch": 0.9912443925166241, + "grad_norm": 1550.7080398876637, + "learning_rate": 3.855422024681144e-09, + "loss": 224.2405, + "step": 51540 + }, + { + "epoch": 0.9914367177771046, + "grad_norm": 1191.5138169966979, + "learning_rate": 3.6878255250183406e-09, + "loss": 210.5956, + "step": 51550 + }, + { + "epoch": 0.9916290430375851, + "grad_norm": 1297.256225222978, + "learning_rate": 3.5239524865460227e-09, + "loss": 215.2229, + "step": 51560 + }, + { + "epoch": 0.9918213682980657, + "grad_norm": 1466.4899683458307, + "learning_rate": 3.363802970304253e-09, + "loss": 227.8157, + "step": 51570 + }, + { + "epoch": 0.9920136935585462, + "grad_norm": 1431.8244867733551, + "learning_rate": 3.207377035946424e-09, + "loss": 221.4954, + "step": 51580 + }, + { + "epoch": 0.9922060188190267, + "grad_norm": 1389.1053902441847, + "learning_rate": 3.05467474173704e-09, + "loss": 220.8963, + "step": 51590 + }, + { + "epoch": 0.9923983440795072, + "grad_norm": 1441.4437514690735, + "learning_rate": 2.9056961445572686e-09, + "loss": 219.463, + "step": 51600 + }, + { + "epoch": 0.9925906693399877, + "grad_norm": 1474.2013426381927, + "learning_rate": 2.7604412998982754e-09, + "loss": 220.8356, + "step": 51610 + }, + { + "epoch": 0.9927829946004683, + "grad_norm": 1449.1238090459485, + "learning_rate": 2.61891026186456e-09, + "loss": 216.6552, + "step": 51620 + }, + { + "epoch": 0.9929753198609489, + "grad_norm": 1408.3734534433443, + "learning_rate": 2.4811030831739525e-09, + "loss": 229.2871, + "step": 51630 + }, + { + "epoch": 0.9931676451214294, + "grad_norm": 1590.7466658228748, + "learning_rate": 2.347019815158724e-09, + "loss": 223.535, + "step": 51640 + }, + { + "epoch": 0.9933599703819099, + "grad_norm": 1562.5799645393317, + "learning_rate": 2.216660507762258e-09, + "loss": 220.5486, + "step": 51650 + }, + { + "epoch": 0.9935522956423904, + "grad_norm": 1454.2733748923456, + "learning_rate": 2.0900252095401583e-09, + "loss": 215.5121, + "step": 51660 + }, + { + "epoch": 0.993744620902871, + "grad_norm": 1603.3997597332047, + "learning_rate": 1.9671139676624707e-09, + "loss": 219.2345, + "step": 51670 + }, + { + "epoch": 0.9939369461633515, + "grad_norm": 1394.3368719356417, + "learning_rate": 1.8479268279125717e-09, + "loss": 220.9174, + "step": 51680 + }, + { + "epoch": 0.994129271423832, + "grad_norm": 1343.380943134161, + "learning_rate": 1.73246383468495e-09, + "loss": 213.827, + "step": 51690 + }, + { + "epoch": 0.9943215966843125, + "grad_norm": 1285.1485220669806, + "learning_rate": 1.6207250309874246e-09, + "loss": 218.4092, + "step": 51700 + }, + { + "epoch": 0.994513921944793, + "grad_norm": 1626.560462404415, + "learning_rate": 1.512710458442257e-09, + "loss": 223.7305, + "step": 51710 + }, + { + "epoch": 0.9947062472052736, + "grad_norm": 1407.782058737943, + "learning_rate": 1.408420157280599e-09, + "loss": 226.7854, + "step": 51720 + }, + { + "epoch": 0.9948985724657541, + "grad_norm": 1400.1296413481173, + "learning_rate": 1.3078541663502642e-09, + "loss": 222.8753, + "step": 51730 + }, + { + "epoch": 0.9950908977262346, + "grad_norm": 1475.3583524220107, + "learning_rate": 1.2110125231112879e-09, + "loss": 224.1618, + "step": 51740 + }, + { + "epoch": 0.9952832229867151, + "grad_norm": 1549.8737218502094, + "learning_rate": 1.117895263633706e-09, + "loss": 219.9759, + "step": 51750 + }, + { + "epoch": 0.9954755482471956, + "grad_norm": 1444.2045857010532, + "learning_rate": 1.0285024226042162e-09, + "loss": 220.9665, + "step": 51760 + }, + { + "epoch": 0.9956678735076762, + "grad_norm": 1298.734984710355, + "learning_rate": 9.428340333184072e-10, + "loss": 214.748, + "step": 51770 + }, + { + "epoch": 0.9958601987681567, + "grad_norm": 1504.4810221591138, + "learning_rate": 8.608901276874193e-10, + "loss": 229.3391, + "step": 51780 + }, + { + "epoch": 0.9960525240286372, + "grad_norm": 1548.7392350156351, + "learning_rate": 7.826707362335039e-10, + "loss": 224.7887, + "step": 51790 + }, + { + "epoch": 0.9962448492891177, + "grad_norm": 1462.720059086555, + "learning_rate": 7.081758880911338e-10, + "loss": 237.064, + "step": 51800 + }, + { + "epoch": 0.9964371745495982, + "grad_norm": 1332.2516227802994, + "learning_rate": 6.374056110103332e-10, + "loss": 214.0306, + "step": 51810 + }, + { + "epoch": 0.9966294998100788, + "grad_norm": 1514.7331971599397, + "learning_rate": 5.703599313511277e-10, + "loss": 217.9168, + "step": 51820 + }, + { + "epoch": 0.9968218250705593, + "grad_norm": 1407.0593314278713, + "learning_rate": 5.070388740868737e-10, + "loss": 221.0774, + "step": 51830 + }, + { + "epoch": 0.9970141503310399, + "grad_norm": 1473.4218750511973, + "learning_rate": 4.474424628031493e-10, + "loss": 221.3092, + "step": 51840 + }, + { + "epoch": 0.9972064755915204, + "grad_norm": 1258.9502542379898, + "learning_rate": 3.9157071969997407e-10, + "loss": 212.6899, + "step": 51850 + }, + { + "epoch": 0.9973988008520009, + "grad_norm": 1571.1567575357894, + "learning_rate": 3.394236655873684e-10, + "loss": 222.4162, + "step": 51860 + }, + { + "epoch": 0.9975911261124815, + "grad_norm": 1349.1337089835238, + "learning_rate": 2.910013198886841e-10, + "loss": 220.9971, + "step": 51870 + }, + { + "epoch": 0.997783451372962, + "grad_norm": 1501.929159723332, + "learning_rate": 2.4630370064171463e-10, + "loss": 221.988, + "step": 51880 + }, + { + "epoch": 0.9979757766334425, + "grad_norm": 1351.6320001798006, + "learning_rate": 2.0533082449647467e-10, + "loss": 217.8682, + "step": 51890 + }, + { + "epoch": 0.998168101893923, + "grad_norm": 1407.9931741017238, + "learning_rate": 1.6808270671186954e-10, + "loss": 226.0872, + "step": 51900 + }, + { + "epoch": 0.9983604271544035, + "grad_norm": 1743.3399384079214, + "learning_rate": 1.345593611645768e-10, + "loss": 215.9324, + "step": 51910 + }, + { + "epoch": 0.9985527524148841, + "grad_norm": 1551.584540557944, + "learning_rate": 1.0476080034016456e-10, + "loss": 228.4527, + "step": 51920 + }, + { + "epoch": 0.9987450776753646, + "grad_norm": 1521.5271051523334, + "learning_rate": 7.868703533864264e-11, + "loss": 220.933, + "step": 51930 + }, + { + "epoch": 0.9989374029358451, + "grad_norm": 1734.0650736528014, + "learning_rate": 5.633807587224205e-11, + "loss": 222.5149, + "step": 51940 + }, + { + "epoch": 0.9991297281963256, + "grad_norm": 1392.847156910018, + "learning_rate": 3.771393026541503e-11, + "loss": 217.0153, + "step": 51950 + }, + { + "epoch": 0.9993220534568061, + "grad_norm": 1324.0422130016884, + "learning_rate": 2.281460545594527e-11, + "loss": 215.6331, + "step": 51960 + }, + { + "epoch": 0.9995143787172867, + "grad_norm": 1394.8856969882577, + "learning_rate": 1.164010699272744e-11, + "loss": 217.6925, + "step": 51970 + }, + { + "epoch": 0.9997067039777672, + "grad_norm": 1348.2654629021827, + "learning_rate": 4.190439037987659e-12, + "loss": 213.5433, + "step": 51980 + }, + { + "epoch": 0.9998990292382477, + "grad_norm": 1423.559718683161, + "learning_rate": 4.656043661732668e-13, + "loss": 236.7, + "step": 51990 + }, + { + "epoch": 0.999995191868488, + "eval_loss": 268.4025573730469, + "eval_runtime": 161.3455, + "eval_samples_per_second": 4.314, + "eval_steps_per_second": 0.136, + "step": 51995 + }, + { + "epoch": 0.999995191868488, + "step": 51995, + "total_flos": 3.3727656990081024e+16, + "train_loss": 478.7030639354971, + "train_runtime": 1000248.8491, + "train_samples_per_second": 3.327, + "train_steps_per_second": 0.052 + } + ], + "logging_steps": 10, + "max_steps": 51995, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 5000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 3.3727656990081024e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}